From b68966eea5aa6bb5514b2f4438321363b6811e7a Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 10 Jul 2024 12:32:23 +0200
Subject: [PATCH 001/226] [wgpu-core] use the view's format not the texture's
 format

This fixes a regression introduced in 0a76c0fa84e5e8c10c62f0a19fb54b65c0a4f6e2.
---
 wgpu-core/src/command/render.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 66abd33b60..dfeb4fb52a 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1155,7 +1155,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         let attachment_formats = AttachmentData {
             colors: color_attachments
                 .iter()
-                .map(|at| at.as_ref().map(|at| at.view.desc.texture_format))
+                .map(|at| at.as_ref().map(|at| at.view.desc.format))
                 .collect(),
             resolves: color_attachments
                 .iter()

From 0ace0813deb2f00de1526f8ac7771e830fdb7595 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 10 Jul 2024 13:11:37 +0200
Subject: [PATCH 002/226] update `target_pixel_byte_cost` and
 `target_component_alignment`

---
 wgpu-types/src/lib.rs | 87 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 5 deletions(-)

diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index d61f43496b..d60dbc2873 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -3655,27 +3655,35 @@ impl TextureFormat {
     /// <https://gpuweb.github.io/gpuweb/#render-target-pixel-byte-cost>
     pub fn target_pixel_byte_cost(&self) -> Option<u32> {
         match *self {
-            Self::R8Unorm | Self::R8Uint | Self::R8Sint => Some(1),
+            Self::R8Unorm | Self::R8Snorm | Self::R8Uint | Self::R8Sint => Some(1),
             Self::Rg8Unorm
+            | Self::Rg8Snorm
             | Self::Rg8Uint
             | Self::Rg8Sint
             | Self::R16Uint
             | Self::R16Sint
+            | Self::R16Unorm
+            | Self::R16Snorm
             | Self::R16Float => Some(2),
             Self::Rgba8Uint
             | Self::Rgba8Sint
             | Self::Rg16Uint
             | Self::Rg16Sint
+            | Self::Rg16Unorm
+            | Self::Rg16Snorm
             | Self::Rg16Float
             | Self::R32Uint
             | Self::R32Sint
             | Self::R32Float => Some(4),
             Self::Rgba8Unorm
             | Self::Rgba8UnormSrgb
+            | Self::Rgba8Snorm
             | Self::Bgra8Unorm
             | Self::Bgra8UnormSrgb
             | Self::Rgba16Uint
             | Self::Rgba16Sint
+            | Self::Rgba16Unorm
+            | Self::Rgba16Snorm
             | Self::Rgba16Float
             | Self::Rg32Uint
             | Self::Rg32Sint
@@ -3684,14 +3692,45 @@ impl TextureFormat {
             | Self::Rgb10a2Unorm
             | Self::Rg11b10Float => Some(8),
             Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16),
-            Self::Rgba8Snorm | Self::Rg8Snorm | Self::R8Snorm => None,
-            _ => None,
+            Self::Stencil8
+            | Self::Depth16Unorm
+            | Self::Depth24Plus
+            | Self::Depth24PlusStencil8
+            | Self::Depth32Float
+            | Self::Depth32FloatStencil8
+            | Self::NV12
+            | Self::Rgb9e5Ufloat
+            | Self::Bc1RgbaUnorm
+            | Self::Bc1RgbaUnormSrgb
+            | Self::Bc2RgbaUnorm
+            | Self::Bc2RgbaUnormSrgb
+            | Self::Bc3RgbaUnorm
+            | Self::Bc3RgbaUnormSrgb
+            | Self::Bc4RUnorm
+            | Self::Bc4RSnorm
+            | Self::Bc5RgUnorm
+            | Self::Bc5RgSnorm
+            | Self::Bc6hRgbUfloat
+            | Self::Bc6hRgbFloat
+            | Self::Bc7RgbaUnorm
+            | Self::Bc7RgbaUnormSrgb
+            | Self::Etc2Rgb8Unorm
+            | Self::Etc2Rgb8UnormSrgb
+            | Self::Etc2Rgb8A1Unorm
+            | Self::Etc2Rgb8A1UnormSrgb
+            | Self::Etc2Rgba8Unorm
+            | Self::Etc2Rgba8UnormSrgb
+            | Self::EacR11Unorm
+            | Self::EacR11Snorm
+            | Self::EacRg11Unorm
+            | Self::EacRg11Snorm
+            | Self::Astc { .. } => None,
         }
     }
 
     /// See <https://gpuweb.github.io/gpuweb/#render-target-component-alignment>
     pub fn target_component_alignment(&self) -> Option<u32> {
-        match self {
+        match *self {
             Self::R8Unorm
             | Self::R8Snorm
             | Self::R8Uint
@@ -3709,12 +3748,18 @@ impl TextureFormat {
             | Self::Bgra8UnormSrgb => Some(1),
             Self::R16Uint
             | Self::R16Sint
+            | Self::R16Unorm
+            | Self::R16Snorm
             | Self::R16Float
             | Self::Rg16Uint
             | Self::Rg16Sint
+            | Self::Rg16Unorm
+            | Self::Rg16Snorm
             | Self::Rg16Float
             | Self::Rgba16Uint
             | Self::Rgba16Sint
+            | Self::Rgba16Unorm
+            | Self::Rgba16Snorm
             | Self::Rgba16Float => Some(2),
             Self::R32Uint
             | Self::R32Sint
@@ -3728,7 +3773,39 @@ impl TextureFormat {
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
             | Self::Rg11b10Float => Some(4),
-            _ => None,
+            Self::Stencil8
+            | Self::Depth16Unorm
+            | Self::Depth24Plus
+            | Self::Depth24PlusStencil8
+            | Self::Depth32Float
+            | Self::Depth32FloatStencil8
+            | Self::NV12
+            | Self::Rgb9e5Ufloat
+            | Self::Bc1RgbaUnorm
+            | Self::Bc1RgbaUnormSrgb
+            | Self::Bc2RgbaUnorm
+            | Self::Bc2RgbaUnormSrgb
+            | Self::Bc3RgbaUnorm
+            | Self::Bc3RgbaUnormSrgb
+            | Self::Bc4RUnorm
+            | Self::Bc4RSnorm
+            | Self::Bc5RgUnorm
+            | Self::Bc5RgSnorm
+            | Self::Bc6hRgbUfloat
+            | Self::Bc6hRgbFloat
+            | Self::Bc7RgbaUnorm
+            | Self::Bc7RgbaUnormSrgb
+            | Self::Etc2Rgb8Unorm
+            | Self::Etc2Rgb8UnormSrgb
+            | Self::Etc2Rgb8A1Unorm
+            | Self::Etc2Rgb8A1UnormSrgb
+            | Self::Etc2Rgba8Unorm
+            | Self::Etc2Rgba8UnormSrgb
+            | Self::EacR11Unorm
+            | Self::EacR11Snorm
+            | Self::EacRg11Unorm
+            | Self::EacRg11Snorm
+            | Self::Astc { .. } => None,
         }
     }
 

From 4349e20b8d6d085d6b870701be4d108baca7960e Mon Sep 17 00:00:00 2001
From: Xiaopeng Li <lixiaopeng.jetspark@bytedance.com>
Date: Wed, 10 Jul 2024 22:20:23 +0800
Subject: [PATCH 003/226] Expose adapter driver version for DX12 backend
 (#5927)

---
 wgpu-hal/src/dx12/adapter.rs | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index 6c8ed1ccad..c05d9a8b3f 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -8,7 +8,8 @@ use winapi::{
     shared::{
         dxgi, dxgi1_2, dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, minwindef::DWORD, windef, winerror,
     },
-    um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser},
+    um::{d3d12 as d3d12_ty, d3d12sdklayers, winnt, winuser},
+    Interface,
 };
 
 impl Drop for super::Adapter {
@@ -130,7 +131,24 @@ impl super::Adapter {
             } else {
                 wgt::DeviceType::DiscreteGpu
             },
-            driver: String::new(),
+            driver: {
+                let mut i: winnt::LARGE_INTEGER = unsafe { mem::zeroed() };
+                if 0 == unsafe {
+                    adapter.CheckInterfaceSupport(&dxgi::IDXGIDevice::uuidof(), &mut i)
+                } {
+                    let quad_part = unsafe { *i.QuadPart() };
+                    const MASK: i64 = 0xFFFF;
+                    format!(
+                        "{}.{}.{}.{}",
+                        quad_part >> 48,
+                        (quad_part >> 32) & MASK,
+                        (quad_part >> 16) & MASK,
+                        quad_part & MASK
+                    )
+                } else {
+                    String::new()
+                }
+            },
             driver_info: String::new(),
         };
 

From ef0ce05d3a6ffae396c3335d71ef76069a2d480d Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 10 Jul 2024 12:12:12 +0200
Subject: [PATCH 004/226] [wgpu-core] fix `.zip()` usages

---
 wgpu-core/src/command/bind.rs  |  5 +++-
 wgpu-core/src/device/global.rs | 26 +++++++++++++---
 wgpu-core/src/lib.rs           |  1 +
 wgpu-core/src/utils.rs         | 54 ++++++++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 5 deletions(-)
 create mode 100644 wgpu-core/src/utils.rs

diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index a6176ac4c9..64d534b558 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -144,7 +144,10 @@ mod compat {
 
                         let mut expected_bgl_entries = expected_bgl.entries.iter();
                         let mut assigned_bgl_entries = assigned_bgl.entries.iter();
-                        let zipped = (&mut expected_bgl_entries).zip(&mut assigned_bgl_entries);
+                        let zipped = crate::utils::ZipWithProperAdvance::new(
+                            &mut expected_bgl_entries,
+                            &mut assigned_bgl_entries,
+                        );
 
                         for ((&binding, expected_entry), (_, assigned_entry)) in zipped {
                             if assigned_entry.visibility != expected_entry.visibility {
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index d74e34d386..499ba6ecca 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1532,8 +1532,17 @@ impl Global {
                 let mut pipeline_layout_guard = hub.pipeline_layouts.write();
                 let mut bgl_guard = hub.bind_group_layouts.write();
                 pipeline_layout_guard.insert(ids.root_id, pipeline.layout.clone());
-                let group_ids = &mut ids.group_ids.iter();
-                for (bgl_id, bgl) in group_ids.zip(pipeline.layout.bind_group_layouts.iter()) {
+                let mut group_ids = ids.group_ids.iter();
+                // NOTE: If the first iterator is longer than the second, the `.zip()` impl will still advance the
+                // the first iterator before realizing that the second iterator has finished.
+                // The `pipeline.layout.bind_group_layouts` iterator will always be shorter than `ids.group_ids`,
+                // so using it as the first iterator for `.zip()` will work properly.
+                for (bgl, bgl_id) in pipeline
+                    .layout
+                    .bind_group_layouts
+                    .iter()
+                    .zip(&mut group_ids)
+                {
                     bgl_guard.insert(*bgl_id, bgl.clone());
                 }
                 for bgl_id in group_ids {
@@ -1721,8 +1730,17 @@ impl Global {
                 let mut pipeline_layout_guard = hub.pipeline_layouts.write();
                 let mut bgl_guard = hub.bind_group_layouts.write();
                 pipeline_layout_guard.insert(ids.root_id, pipeline.layout.clone());
-                let group_ids = &mut ids.group_ids.iter();
-                for (bgl_id, bgl) in group_ids.zip(pipeline.layout.bind_group_layouts.iter()) {
+                let mut group_ids = ids.group_ids.iter();
+                // NOTE: If the first iterator is longer than the second, the `.zip()` impl will still advance the
+                // the first iterator before realizing that the second iterator has finished.
+                // The `pipeline.layout.bind_group_layouts` iterator will always be shorter than `ids.group_ids`,
+                // so using it as the first iterator for `.zip()` will work properly.
+                for (bgl, bgl_id) in pipeline
+                    .layout
+                    .bind_group_layouts
+                    .iter()
+                    .zip(&mut group_ids)
+                {
                     bgl_guard.insert(*bgl_id, bgl.clone());
                 }
                 for bgl_id in group_ids {
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 7600436bc4..36105c90e6 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -71,6 +71,7 @@ pub mod resource;
 mod snatch;
 pub mod storage;
 mod track;
+mod utils;
 // This is public for users who pre-compile shaders while still wanting to
 // preserve all run-time checks that `wgpu-core` does.
 // See <https://github.com/gfx-rs/wgpu/issues/3103>, after which this can be
diff --git a/wgpu-core/src/utils.rs b/wgpu-core/src/utils.rs
new file mode 100644
index 0000000000..cf61e797e2
--- /dev/null
+++ b/wgpu-core/src/utils.rs
@@ -0,0 +1,54 @@
+/// If the first iterator is longer than the second, the zip implementation
+/// in the standard library will still advance the the first iterator before
+/// realizing that the second iterator has finished.
+///
+/// This implementation will advance the shorter iterator first avoiding
+/// the issue above.
+///
+/// If you can guarantee that the first iterator is always shorter than the
+/// second, you should use the zip impl in stdlib.
+pub(crate) struct ZipWithProperAdvance<
+    A: ExactSizeIterator<Item = IA>,
+    B: ExactSizeIterator<Item = IB>,
+    IA,
+    IB,
+> {
+    a: A,
+    b: B,
+    iter_a_first: bool,
+}
+
+impl<A: ExactSizeIterator<Item = IA>, B: ExactSizeIterator<Item = IB>, IA, IB>
+    ZipWithProperAdvance<A, B, IA, IB>
+{
+    pub(crate) fn new(a: A, b: B) -> Self {
+        let iter_a_first = a.len() <= b.len();
+        Self { a, b, iter_a_first }
+    }
+}
+
+impl<A: ExactSizeIterator<Item = IA>, B: ExactSizeIterator<Item = IB>, IA, IB> Iterator
+    for ZipWithProperAdvance<A, B, IA, IB>
+{
+    type Item = (IA, IB);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.iter_a_first {
+            let a = self.a.next()?;
+            let b = self.b.next()?;
+            Some((a, b))
+        } else {
+            let b = self.b.next()?;
+            let a = self.a.next()?;
+            Some((a, b))
+        }
+    }
+}
+
+impl<A: ExactSizeIterator<Item = IA>, B: ExactSizeIterator<Item = IB>, IA, IB> ExactSizeIterator
+    for ZipWithProperAdvance<A, B, IA, IB>
+{
+    fn len(&self) -> usize {
+        self.a.len().min(self.b.len())
+    }
+}

From b5c33fc1a4704d7483bbbf9e171aec0b72b5c582 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Tue, 9 Jul 2024 18:52:48 -0700
Subject: [PATCH 005/226] [wgpu] Reorganize buffer mapping docs. Make example
 code work.

Consolidate the explanation of buffer mapping in the documentation for
`Buffer`. Change the example code to actually compile, given that the
`map_async` callback needs to share access to the buffer with
`map_async`'s caller. Mapping isn't pretty, but covering that up in
the docs doesn't improve matters.

For `BufferSlice`, `BufferView`, and `BufferViewMut`, consolidate
extended explanations and background in the docs for types, rather
than scattering it around in the docs for associated functions.

For `Buffer::slice`, `BufferSlice::get_mapped_range`, and
`BufferSlice::get_mapped_range_mut`, update documentation to provide
necessary details, but defer to types' docs for background.
---
 wgpu/src/lib.rs | 186 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 141 insertions(+), 45 deletions(-)

diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 7da27e355b..d895b696cf 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -293,6 +293,14 @@ impl MapContext {
 ///
 /// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
 ///
+/// A `Buffer`'s bytes have "interior mutability": functions like
+/// [`Queue::write_buffer`] or [mapping] a buffer for writing only require a
+/// `&Buffer`, not a `&mut Buffer`, even though they modify its contents. `wgpu`
+/// prevents simultaneous reads and writes of buffer contents using run-time
+/// checks.
+///
+/// [mapping]: Buffer#mapping-buffers
+///
 /// # Mapping buffers
 ///
 /// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
@@ -337,43 +345,83 @@ impl MapContext {
 /// attempt to access overlapping ranges, even for shared access only, these
 /// methods panic.
 ///
-/// For example:
+/// While a buffer is mapped, you may not submit any commands to the GPU that
+/// access it. You may record command buffers that use the buffer, but if you
+/// submit them while the buffer is mapped, submission will panic.
+///
+/// When you are done using the buffer on the CPU, you must call
+/// [`Buffer::unmap`] to make it available for use by the GPU again. All
+/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
+/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
+///
+/// # Example
+///
+/// If `buffer` was created with [`BufferUsages::MAP_WRITE`], we could fill it
+/// with `f32` values like this:
 ///
 /// ```no_run
+/// # mod bytemuck {
+/// #     pub fn cast_slice_mut(bytes: &mut [u8]) -> &mut [f32] { todo!() }
+/// # }
+/// # let device: wgpu::Device = todo!();
 /// # let buffer: wgpu::Buffer = todo!();
-/// let slice = buffer.slice(10..20);
-/// slice.map_async(wgpu::MapMode::Read, |result| {
-///     match result {
-///         Ok(()) => {
-///             let view = slice.get_mapped_range();
-///             // read data from `view`, which dereferences to `&[u8]`
-///         }
-///         Err(e) => {
-///             // handle mapping error
-///         }
+/// let buffer = std::sync::Arc::new(buffer);
+/// let capturable = buffer.clone();
+/// buffer.slice(..).map_async(wgpu::MapMode::Write, move |result| {
+///     if result.is_ok() {
+///         let mut view = capturable.slice(..).get_mapped_range_mut();
+///         let floats: &mut [f32] = bytemuck::cast_slice_mut(&mut view);
+///         floats.fill(42.0);
+///         drop(view);
+///         capturable.unmap();
 ///     }
 /// });
 /// ```
 ///
-/// This example calls `Buffer::slice` to obtain a [`BufferSlice`] referring to
-/// the second ten bytes of `buffer`. (To obtain access to the entire buffer,
-/// you could call `buffer.slice(..)`.) The code then calls `map_async` to wait
-/// for the buffer to be available, and finally calls `get_mapped_range` on the
-/// slice to actually get at the bytes.
+/// This code takes the following steps:
+///
+/// - First, it moves `buffer` into an [`Arc`], and makes a clone for capture by
+///   the callback passed to [`map_async`]. Since a [`map_async`] callback may be
+///   invoked from another thread, interaction between the callback and the
+///   thread calling [`map_async`] generally requires some sort of shared heap
+///   data like this. In real code, the [`Arc`] would probably own some larger
+///   structure that itself owns `buffer`.
 ///
-/// If using `map_async` directly is awkward, you may find it more convenient to
+/// - Then, it calls [`Buffer::slice`] to make a [`BufferSlice`] referring to
+///   the buffer's entire contents.
+///
+/// - Next, it calls [`BufferSlice::map_async`] to request that the bytes to
+///   which the slice refers be made accessible to the CPU ("mapped"). This may
+///   entail waiting for previously enqueued operations on `buffer` to finish.
+///   Although [`map_async`] itself always returns immediately, it saves the
+///   callback function to be invoked later.
+///
+/// - When some later call to [`Device::poll`] or [`Instance::poll_all`] (not
+///   shown in this example) determines that the buffer is mapped and ready for
+///   the CPU to use, it invokes the callback function.
+///
+/// - The callback function calls [`Buffer::slice`] and then
+///   [`BufferSlice::get_mapped_range_mut`] to obtain a [`BufferViewMut`], which
+///   dereferences to a `&mut [u8]` slice referring to the buffer's bytes.
+///
+/// - It then uses the [`bytemuck`] crate to turn the `&mut [u8]` into a `&mut
+///   [f32]`, and calls the slice [`fill`] method to fill the buffer with a
+///   useful value.
+///
+/// - Finally, the callback drops the view and calls [`Buffer::unmap`] to unmap
+///   the buffer. In real code, the callback would also need to do some sort of
+///   synchronization to let the rest of the program know that it has completed
+///   its work.
+///
+/// If using [`map_async`] directly is awkward, you may find it more convenient to
 /// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
 /// However, those each have their own tradeoffs; the asynchronous nature of GPU
 /// execution makes it hard to avoid friction altogether.
 ///
-/// While a buffer is mapped, you must not submit any commands to the GPU that
-/// access it. You may record command buffers that use the buffer, but you must
-/// not submit such command buffers.
-///
-/// When you are done using the buffer on the CPU, you must call
-/// [`Buffer::unmap`] to make it available for use by the GPU again. All
-/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
-/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
+/// [`Arc`]: std::sync::Arc
+/// [`map_async`]: BufferSlice::map_async
+/// [`bytemuck`]: https://crates.io/crates/bytemuck
+/// [`fill`]: slice::fill
 ///
 /// ## Mapping buffers on the web
 ///
@@ -428,15 +476,22 @@ static_assertions::assert_impl_all!(Buffer: Send, Sync);
 /// let whole_buffer_slice = buffer.slice(..);
 /// ```
 ///
-/// A [`BufferSlice`] is nothing more than a reference to the `Buffer` and a
-/// starting and ending position. To access the slice's contents on the CPU, you
-/// must first [map] the buffer, and then call [`BufferSlice::get_mapped_range`]
-/// or [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
-/// contents, which dereferences to a `&[u8]` or `&mut [u8]`.
+/// You can pass buffer slices to methods like [`RenderPass::set_vertex_buffer`]
+/// and [`RenderPass::set_index_buffer`] to indicate which portion of the buffer
+/// a draw call should consult.
+///
+/// To access the slice's contents on the CPU, you must first [map] the buffer,
+/// and then call [`BufferSlice::get_mapped_range`] or
+/// [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
+/// contents. See the documentation on [mapping][map] for more details,
+/// including example code.
 ///
-/// You can also pass buffer slices to methods like
-/// [`RenderPass::set_vertex_buffer`] and [`RenderPass::set_index_buffer`] to
-/// indicate which data a draw call should consume.
+/// Unlike a Rust shared slice `&[T]`, whose existence guarantees that
+/// nobody else is modifying the `T` values to which it refers, a
+/// [`BufferSlice`] doesn't guarantee that the buffer's contents aren't
+/// changing. You can still record and submit commands operating on the
+/// buffer while holding a [`BufferSlice`]. A [`BufferSlice`] simply
+/// represents a certain range of the buffer's bytes.
 ///
 /// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
 /// specification, an offset and size are specified as arguments to each call
@@ -3466,7 +3521,7 @@ fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
     (offset, size)
 }
 
-/// Read only view into a mapped buffer.
+/// A read-only view of a mapped buffer's bytes.
 ///
 /// To get a `BufferView`, first [map] the buffer, and then
 /// call `buffer.slice(range).get_mapped_range()`.
@@ -3475,17 +3530,20 @@ fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
 /// slice methods to access the buffer's contents. It also implements
 /// `AsRef<[u8]>`, if that's more convenient.
 ///
-/// If you try to create overlapping views of a buffer, mutable or
-/// otherwise, `get_mapped_range` will panic.
+/// Before the buffer can be unmapped, all `BufferView`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
 ///
 /// [map]: Buffer#mapping-buffers
+/// [`map_async`]: BufferSlice::map_async
 #[derive(Debug)]
 pub struct BufferView<'a> {
     slice: BufferSlice<'a>,
     data: Box<dyn crate::context::BufferMappedRange>,
 }
 
-/// Write only view into mapped buffer.
+/// A write-only view of a mapped buffer's bytes.
 ///
 /// To get a `BufferViewMut`, first [map] the buffer, and then
 /// call `buffer.slice(range).get_mapped_range_mut()`.
@@ -3497,8 +3555,10 @@ pub struct BufferView<'a> {
 /// It is possible to read the buffer using this view, but doing so is not
 /// recommended, as it is likely to be slow.
 ///
-/// If you try to create overlapping views of a buffer, mutable or
-/// otherwise, `get_mapped_range_mut` will panic.
+/// Before the buffer can be unmapped, all `BufferViewMut`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
 ///
 /// [map]: Buffer#mapping-buffers
 #[derive(Debug)]
@@ -3608,8 +3668,20 @@ impl Buffer {
         }
     }
 
-    /// Use only a portion of this Buffer for a given operation. Choosing a range with no end
-    /// will use the rest of the buffer. Using a totally unbounded range will use the entire buffer.
+    /// Return a slice of a [`Buffer`]'s bytes.
+    ///
+    /// Return a [`BufferSlice`] referring to the portion of `self`'s contents
+    /// indicated by `bounds`. Regardless of what sort of data `self` stores,
+    /// `bounds` start and end are given in bytes.
+    ///
+    /// A [`BufferSlice`] can be used to supply vertex and index data, or to map
+    /// buffer contents for access from the CPU. See the [`BufferSlice`]
+    /// documentation for details.
+    ///
+    /// The `range` argument can be half or fully unbounded: for example,
+    /// `buffer.slice(..)` refers to the entire buffer, and `buffer.slice(n..)`
+    /// refers to the portion starting at the `n`th byte and extending to the
+    /// end of the buffer.
     pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
         let (offset, size) = range_to_offset_size(bounds);
         BufferSlice {
@@ -3683,8 +3755,20 @@ impl<'a> BufferSlice<'a> {
         )
     }
 
-    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
+    /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferView`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferView`] for details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
     pub fn get_mapped_range(&self) -> BufferView<'a> {
         let end = self.buffer.map_context.lock().add(self.offset, self.size);
         let data = DynContext::buffer_get_mapped_range(
@@ -3717,8 +3801,20 @@ impl<'a> BufferSlice<'a> {
             })
     }
 
-    /// Synchronously and immediately map a buffer for writing. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
+    /// Gain write access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferViewMut`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferViewMut`] for more details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range_mut` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
     pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
         let end = self.buffer.map_context.lock().add(self.offset, self.size);
         let data = DynContext::buffer_get_mapped_range(

From ee16de1c6337e590f2baad04638759089a7f7bb1 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Thu, 11 Jul 2024 13:44:43 +0700
Subject: [PATCH 006/226] clippy: Fix `doc_lazy_continuation` lints (#5935)

These are in nightly builds.
---
 examples/src/uniform_values/mod.rs |  6 +++---
 naga/src/front/glsl/context.rs     |  2 +-
 naga/src/valid/analyzer.rs         | 13 ++++++-------
 naga/src/valid/handles.rs          |  6 +++---
 wgpu-core/src/track/buffer.rs      |  1 +
 wgpu-core/src/track/mod.rs         |  1 +
 wgpu-core/src/track/texture.rs     |  1 +
 wgpu-hal/src/lib.rs                |  2 +-
 wgpu-types/src/lib.rs              | 12 ++++++------
 9 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index 0adbf4e466..629aba4328 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -6,10 +6,10 @@
 //! 4. the bind group layout is attached to the pipeline layout.
 //! 5. the uniform buffer and the bind group are stored alongside the pipeline.
 //! 6. an instance of `AppState` is created. This variable will be modified
-//! to change parameters in the shader and modified by app events to preform and save
-//! those changes.
+//!    to change parameters in the shader and modified by app events to preform and save
+//!    those changes.
 //! 7. (7a and 7b) the `state` variable created at (6) is modified by commands such
-//! as pressing the arrow keys or zooming in or out.
+//!    as pressing the arrow keys or zooming in or out.
 //! 8. the contents of the `AppState` are loaded into the uniform buffer in preparation.
 //! 9. the bind group with the uniform buffer is attached to the render pass.
 //!
diff --git a/naga/src/front/glsl/context.rs b/naga/src/front/glsl/context.rs
index 6ba7df593a..ee1fcc04ba 100644
--- a/naga/src/front/glsl/context.rs
+++ b/naga/src/front/glsl/context.rs
@@ -393,7 +393,7 @@ impl<'a> Context<'a> {
     /// # Panics
     ///
     /// - If more than one [`StmtContext`] are active at the same time or if the
-    /// previous call didn't use it in lowering.
+    ///   previous call didn't use it in lowering.
     #[must_use]
     pub fn stmt_ctx(&mut self) -> StmtContext {
         self.stmt_ctx.take().unwrap()
diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs
index 058d91c63b..0322200493 100644
--- a/naga/src/valid/analyzer.rs
+++ b/naga/src/valid/analyzer.rs
@@ -1,10 +1,9 @@
-/*! Module analyzer.
-
-Figures out the following properties:
-  - control flow uniformity
-  - texture/sampler pairs
-  - expression reference counts
-!*/
+//! Module analyzer.
+//!
+//! Figures out the following properties:
+//! - control flow uniformity
+//! - texture/sampler pairs
+//! - expression reference counts
 
 use super::{ExpressionError, FunctionError, ModuleInfo, ShaderStages, ValidationFlags};
 use crate::span::{AddSpan as _, WithSpan};
diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs
index 4d46776a71..f8be76d026 100644
--- a/naga/src/valid/handles.rs
+++ b/naga/src/valid/handles.rs
@@ -16,10 +16,10 @@ impl super::Validator {
     /// Validates that all handles within `module` are:
     ///
     /// * Valid, in the sense that they contain indices within each arena structure inside the
-    /// [`crate::Module`] type.
+    ///   [`crate::Module`] type.
     /// * No arena contents contain any items that have forward dependencies; that is, the value
-    ///     associated with a handle only may contain references to handles in the same arena that
-    ///     were constructed before it.
+    ///   associated with a handle only may contain references to handles in the same arena that
+    ///   were constructed before it.
     ///
     /// By validating the above conditions, we free up subsequent logic to assume that handle
     /// accesses are infallible.
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index dbc761687e..a7ec8201fc 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -665,6 +665,7 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// - Uses the `start_state_provider` to populate `start_states`
 /// - Uses either `end_state_provider` or `start_state_provider`
 ///   to populate `current_states`.
+///
 /// If the resource is tracked
 /// - Inserts barriers from the state in `current_states`
 ///   to the state provided by `start_state_provider`.
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index f3a94f135b..be3534cdfb 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -141,6 +141,7 @@ impl TrackerIndex {
 /// - IDs of dead handles can be recycled while resources are internally held alive (and tracked).
 /// - The plan is to remove IDs in the long run
 ///   ([#5121](https://github.com/gfx-rs/wgpu/issues/5121)).
+///
 /// In order to produce these tracker indices, there is a shared TrackerIndexAllocator
 /// per resource type. Indices have the same lifetime as the internal resource they
 /// are associated to (alloc happens when creating the resource and free is called when
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index c3a2468633..d34f47e128 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -1038,6 +1038,7 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// - Uses the `start_state_provider` to populate `start_states`
 /// - Uses either `end_state_provider` or `start_state_provider`
 ///   to populate `current_states`.
+///
 /// If the resource is tracked
 /// - Inserts barriers from the state in `current_states`
 ///   to the state provided by `start_state_provider`.
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index e63f25ab07..9cf83bc7ce 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -2195,7 +2195,7 @@ pub struct BuildAccelerationStructureDescriptor<'a, A: Api> {
 /// - All buffers, buffer addresses and offsets will be ignored.
 /// - The build mode will be ignored.
 /// - Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Triangles/AABBs in corresponding groups),
-/// may result in reduced size requirements.
+///   may result in reduced size requirements.
 /// - Any other change may result in a bigger or smaller size requirement.
 #[derive(Clone, Debug)]
 pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index d60dbc2873..59b5bf57a0 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -5446,13 +5446,13 @@ pub struct SurfaceConfiguration<V> {
     ///
     /// Typical values range from 3 to 1, but higher values are possible:
     /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
-    /// to be queued up. This typically avoids starving the GPU's work queue.
-    /// Higher values are useful for achieving a constant flow of frames to the display under varying load.
+    ///   to be queued up. This typically avoids starving the GPU's work queue.
+    ///   Higher values are useful for achieving a constant flow of frames to the display under varying load.
     /// * Choose 1 for low latency from frame recording to frame display.
-    /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
-    /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
-    /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
-    /// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
+    ///   ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
+    ///   to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
+    ///   causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
+    ///   It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
     /// * A value of 0 is generally not supported and always clamped to a higher value.
     pub desired_maximum_frame_latency: u32,
     /// Specifies how the alpha channel of the textures should be handled during compositing.

From 6349250d74e272692d50307d8b27431fd64e93e6 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Thu, 11 Jul 2024 09:24:01 +0700
Subject: [PATCH 007/226] naga: Fix reference to `serde` feature.

naga doesn't have a `serde` feature, instead having separate
`serialize` and `deserialize` features, so things that want to
modify the serde handling must check for either of those, not
for `serde` itself.
---
 naga/src/back/msl/mod.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs
index 37e0b98d77..3b33ee7a71 100644
--- a/naga/src/back/msl/mod.rs
+++ b/naga/src/back/msl/mod.rs
@@ -295,7 +295,10 @@ pub enum VertexFormat {
     /// Four signed ints (i32). `vec4<i32>` in shaders.
     Sint32x4 = 29,
     /// Three unsigned 10-bit integers and one 2-bit integer, packed into a 32-bit integer (u32). [0, 1024] converted to float [0, 1] `vec4<f32>` in shaders.
-    #[cfg_attr(feature = "serde", serde(rename = "unorm10-10-10-2"))]
+    #[cfg_attr(
+        any(feature = "serialize", feature = "deserialize"),
+        serde(rename = "unorm10-10-10-2")
+    )]
     Unorm10_10_10_2 = 34,
 }
 

From 9796766e8e08bd81254b4dbfa83193f8e8a426e1 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Thu, 11 Jul 2024 09:27:47 +0200
Subject: [PATCH 008/226] Changelog cleanup round (#5936)

* Add missing changelog entry for `msl-out-if-target-apple`/`hlsl-out-if-target-windows` feature addition

* minor changelog reorganisation

* fix missing author attribution & pr link in changelog

* import patch release changelogs and remove redundant items

* move pipeline overridable constants from bugfixes to features
---
 CHANGELOG.md | 68 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e580c550d2..0bfe4577a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -143,14 +143,10 @@ to pass a compatible surface when targeting WebGL2, having `enumerate_adapters()
 By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 
 ### New features
-#### Vulkan
-
-- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
-
 #### General
 
 - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724)
-- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
+- `include_wgsl!` is now callable in const contexts by @9SMTM6 in [#5872](https://github.com/gfx-rs/wgpu/pull/5872)
 - Added memory allocation hints to `DeviceDescriptor` by @nical in [#5875](https://github.com/gfx-rs/wgpu/pull/5875)
     - `MemoryHints::Performance`, the default, favors performance over memory usage and will likely cause large amounts of VRAM to be allocated up-front. This hint is typically good for games.
     - `MemoryHints::MemoryUsage` favors memory usage over performance. This hint is typically useful for smaller applications or UI libraries.
@@ -170,15 +166,21 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
   -writer.write(&module, &module_info);
   +writer.write(&module, &module_info, None);
   ```
+- HLSL & MSL output can now be added conditionally on the target via the `msl-out-if-target-apple` and `hlsl-out-if-target-windows` features. This is used in wgpu-hal to no longer compile with MSL output when `metal` is enabled & MacOS isn't targeted and no longer compile with HLSL output when `dx12` is enabled & Windows isn't targeted. By @wumpf in [#5919](https://github.com/gfx-rs/wgpu/pull/5919)
+
+#### Vulkan
+
+- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
 
 #### WebGPU
 
-- `include_wgsl!` is now callable in const contexts by @9SMTM6 in [#5872](https://github.com/gfx-rs/wgpu/pull/5872)
+- Added support for pipeline-overridable constants to the WebGPU backend by @DouglasDwyer in [#5688](https://github.com/gfx-rs/wgpu/pull/5688)
 
 ### Changes
 
 #### General
 
+- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
 
@@ -190,6 +192,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
   [target.'cfg(target_vendor = "apple")']
   rustflags = ["-C", "link-args=-weak_framework Metal -weak_framework QuartzCore -weak_framework CoreGraphics"]
   ```
+  By @madsmtm in [#5752](https://github.com/gfx-rs/wgpu/pull/5752)
 
 ### Bug Fixes
 
@@ -198,11 +201,44 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Ensure render pipelines have at least 1 target. By @ErichDonGubler in [#5715](https://github.com/gfx-rs/wgpu/pull/5715)
 - `wgpu::ComputePass` now internally takes ownership of `QuerySet` for both `wgpu::ComputePassTimestampWrites` as well as timestamp writes and statistics query, fixing crashes when destroying `QuerySet` before ending the pass. By @wumpf in [#5671](https://github.com/gfx-rs/wgpu/pull/5671)
 - Validate resources passed during compute pass recording for mismatching device. By @wumpf in [#5779](https://github.com/gfx-rs/wgpu/pull/5779)
-- Fix a `CommandBuffer` leak. By @cwfitzgerald and @nical in [#5141](https://github.com/gfx-rs/wgpu/pull/5141)
+
+#### GLES / OpenGL
+
+- Fix `ClearColorF`, `ClearColorU` and `ClearColorI` commands being issued before `SetDrawColorBuffers` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
+- Replace `glClear` with `glClearBufferF` because `glDrawBuffers` requires that the ith buffer must be `COLOR_ATTACHMENTi` or `NONE` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
+- Return the unmodified version in driver_info. By @Valaphee in [#5753](https://github.com/gfx-rs/wgpu/pull/5753)
+
+#### Naga
+
+- In spv-out don't decorate a `BindingArray`'s type with `Block` if the type is a struct with a runtime array by @Vecvec in [#5776](https://github.com/gfx-rs/wgpu/pull/5776)
+- Add `packed` as a keyword for GLSL by @kjarosh in [#5855](https://github.com/gfx-rs/wgpu/pull/5855)
+
+## v0.20.2 (2024-06-12)
+
+This release force-bumps transitive dependencies of `wgpu` on `wgpu-core` and `wgpu-hal` to 0.21.1, to resolve some undefined behavior observable in the DX12 backend after upgrading to Rust 1.79 or later.
+
+### Bug Fixes
+
+#### General
+
+* Fix a `CommandBuffer` leak. By @cwfitzgerald and @nical in [#5141](https://github.com/gfx-rs/wgpu/pull/5141)
 
 #### DX12
 
-- Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812).
+* Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812).
+
+## v0.20.1 (2024-06-12)
+
+This release included v0.21.0 of `wgpu-core` and `wgpu-hal`, due to breaking changes needed to solve vulkan validation issues.
+
+### Bug Fixes
+
+This release fixes the validation errors whenever a surface is used with the vulkan backend. By @cwfitzgerald in [#5681](https://github.com/gfx-rs/wgpu/pull/5681).
+
+#### General
+
+- Clean up weak references to texture views and bind groups to prevent memory leaks. By @xiaopengli89 in [#5595](https://github.com/gfx-rs/wgpu/pull/5595).
+- Fix segfault on exit is queue & device are dropped before surface. By @sagudev in [#5640](https://github.com/gfx-rs/wgpu/pull/5640).
 
 #### Metal
 
@@ -212,21 +248,10 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 
 - Fix enablement of subgroup ops extension on Vulkan devices that don't support Vulkan 1.3. By @cwfitzgerald in [#5624](https://github.com/gfx-rs/wgpu/pull/5624).
 
-#### GLES / OpenGL
-
-- Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
-- Fix `ClearColorF`, `ClearColorU` and `ClearColorI` commands being issued before `SetDrawColorBuffers` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
-- Replace `glClear` with `glClearBufferF` because `glDrawBuffers` requires that the ith buffer must be `COLOR_ATTACHMENTi` or `NONE` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
-- Return the unmodified version in driver_info. By @Valaphee in [#5753](https://github.com/gfx-rs/wgpu/pull/5753)
-
-#### WebGPU
 
-- Added support for pipeline-overridable constants to the WebGPU backend by @DouglasDwyer in [#5688](https://github.com/gfx-rs/wgpu/pull/5688)
-
-#### Naga
+#### GLES / OpenGL
 
-- In spv-out don't decorate a `BindingArray`'s type with `Block` if the type is a struct with a runtime array by @Vecvec in [#5776](https://github.com/gfx-rs/wgpu/pull/5776)
-- Add `packed` as a keyword for GLSL by @kjarosh in [#5855](https://github.com/gfx-rs/wgpu/pull/5855)
+-  Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
 
 ## v0.20.0 (2024-04-28)
 
@@ -379,7 +404,6 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Fix deadlocks caused by recursive read-write lock acquisitions [#5426](https://github.com/gfx-rs/wgpu/pull/5426).
 - Remove exposed C symbols (`extern "C"` + [no_mangle]) from RenderPass & ComputePass recording. By @wumpf in [#5409](https://github.com/gfx-rs/wgpu/pull/5409).
 - Fix surfaces being only compatible with first backend enabled on an instance, causing failures when manually specifying an adapter. By @Wumpf in [#5535](https://github.com/gfx-rs/wgpu/pull/5535).
-- Clean up weak references to texture views and bind groups. By @xiaopengli89 [#5595](https://github.com/gfx-rs/wgpu/pull/5595).
 
 #### Naga
 

From 8fd08ac63885a3c67fb0abae759c7e81c09ae5a0 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Thu, 11 Jul 2024 16:08:11 +0700
Subject: [PATCH 009/226] Use `derive` feature on `serde` rather than
 `serde_derive`

The current code works, but `serde` documents that the feature to
use is `derive` (which then happens to use the `serde_derive`
implicit feature).
---
 wgpu-core/Cargo.toml  | 2 +-
 wgpu-types/Cargo.toml | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index 7a2b9ae15c..1acd24f9a3 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -114,7 +114,7 @@ profiling = { version = "1", default-features = false }
 raw-window-handle = { version = "0.6", optional = true }
 ron = { version = "0.8", optional = true }
 rustc-hash = "1.1"
-serde = { version = "1", features = ["serde_derive"], optional = true }
+serde = { version = "1", features = ["derive"], optional = true }
 smallvec = "1"
 thiserror = "1"
 
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index 7accda274a..b61ffb6328 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -35,7 +35,7 @@ counters = []
 
 [dependencies]
 bitflags = "2"
-serde = { version = "1", features = ["serde_derive"], optional = true }
+serde = { version = "1", features = ["derive"], optional = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 js-sys = "0.3.69"
@@ -47,5 +47,5 @@ web-sys = { version = "0.3.69", features = [
 ] }
 
 [dev-dependencies]
-serde = { version = "1", features = ["serde_derive"] }
+serde = { version = "1", features = ["derive"] }
 serde_json = "1.0.119"

From 750f72af8d62caa94310254b13a239838c6b9333 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Thu, 11 Jul 2024 16:33:20 +0700
Subject: [PATCH 010/226] wgc: Use explicit feature for `raw-window-handle`

This helps to prepare for the coming day when explicit features
will be required.
---
 wgpu-core/Cargo.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index 1acd24f9a3..2ad5e5a402 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -60,6 +60,9 @@ trace = ["dep:ron", "serde", "naga/serialize"]
 ## Enable API replaying
 replay = ["serde", "naga/deserialize"]
 
+## Enable creating instances using raw-window-handle
+raw-window-handle = ["dep:raw-window-handle"]
+
 ## Enable `ShaderModuleSource::Wgsl`
 wgsl = ["naga/wgsl-in"]
 

From 349f182966aff62e02f3ad7e29b375b9f477c93a Mon Sep 17 00:00:00 2001
From: Xiaopeng Li <lixiaopeng.jetspark@bytedance.com>
Date: Thu, 11 Jul 2024 19:07:30 +0800
Subject: [PATCH 011/226] [d3d12] Drop resource before free suballocation
 (#5943)

---
 wgpu-hal/src/dx12/device.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index eeb60acbf6..e8104abfbb 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -404,6 +404,9 @@ impl crate::Device for super::Device {
     unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) {
         // Only happens when it's using the windows_rs feature and there's an allocation
         if let Some(alloc) = buffer.allocation.take() {
+            // Resource should be dropped before free suballocation
+            drop(buffer);
+
             super::suballocation::free_buffer_allocation(
                 self,
                 alloc,
@@ -494,6 +497,9 @@ impl crate::Device for super::Device {
 
     unsafe fn destroy_texture(&self, mut texture: super::Texture) {
         if let Some(alloc) = texture.allocation.take() {
+            // Resource should be dropped before free suballocation
+            drop(texture);
+
             super::suballocation::free_texture_allocation(
                 self,
                 alloc,

From a0c185a28c232ee2ab63f72d6fd3a63a3f787309 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 12:38:16 +0200
Subject: [PATCH 012/226] [wgpu-core] fix trying to create 0 sized staging
 buffers when creating mapped_at_creation buffers

This issue was introduced by fabbca294ae6c4b271688a4d0d3456082f1cba3f.
---
 wgpu-core/src/device/queue.rs    | 52 ++++++++++++++++++--------------
 wgpu-core/src/device/resource.rs | 11 ++++---
 wgpu-core/src/resource.rs        |  2 +-
 wgpu/src/backend/wgpu_core.rs    |  2 +-
 4 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 1a4b2833c9..c118491800 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -314,19 +314,19 @@ impl<A: HalApi> PendingWrites<A> {
 
 pub(crate) fn prepare_staging_buffer<A: HalApi>(
     device: &Arc<Device<A>>,
-    size: wgt::BufferAddress,
+    size: wgt::BufferSize,
     instance_flags: wgt::InstanceFlags,
 ) -> Result<(StagingBuffer<A>, NonNull<u8>), DeviceError> {
     profiling::scope!("prepare_staging_buffer");
     let stage_desc = hal::BufferDescriptor {
         label: hal_label(Some("(wgpu internal) Staging"), instance_flags),
-        size,
+        size: size.get(),
         usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
         memory_flags: hal::MemoryFlags::TRANSIENT,
     };
 
     let buffer = unsafe { device.raw().create_buffer(&stage_desc)? };
-    let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?;
+    let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size.get()) }?;
 
     let staging_buffer = StagingBuffer {
         raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(buffer)),
@@ -344,7 +344,7 @@ impl<A: HalApi> StagingBuffer<A> {
             unsafe {
                 device.flush_mapped_ranges(
                     self.raw.lock().as_ref().unwrap(),
-                    iter::once(0..self.size),
+                    iter::once(0..self.size.get()),
                 )
             };
         }
@@ -435,10 +435,12 @@ impl Global {
 
         buffer.same_device_as(queue.as_ref())?;
 
-        if data_size == 0 {
+        let data_size = if let Some(data_size) = wgt::BufferSize::new(data_size) {
+            data_size
+        } else {
             log::trace!("Ignoring write_buffer of size 0");
             return Ok(());
-        }
+        };
 
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
@@ -450,7 +452,11 @@ impl Global {
 
         if let Err(flush_error) = unsafe {
             profiling::scope!("copy");
-            ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr.as_ptr(), data.len());
+            ptr::copy_nonoverlapping(
+                data.as_ptr(),
+                staging_buffer_ptr.as_ptr(),
+                data_size.get() as usize,
+            );
             staging_buffer.flush(device.raw())
         } {
             pending_writes.consume(staging_buffer);
@@ -487,7 +493,7 @@ impl Global {
         let device = &queue.device;
 
         let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, buffer_size.get(), device.instance_flags)?;
+            prepare_staging_buffer(device, buffer_size, device.instance_flags)?;
 
         let fid = hub.staging_buffers.prepare(id_in);
         let id = fid.assign(Arc::new(staging_buffer));
@@ -549,7 +555,7 @@ impl Global {
         _queue_id: QueueId,
         buffer_id: id::BufferId,
         buffer_offset: u64,
-        buffer_size: u64,
+        buffer_size: wgt::BufferSize,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::validate_write_buffer");
         let hub = A::hub(self);
@@ -568,19 +574,19 @@ impl Global {
         &self,
         buffer: &Buffer<A>,
         buffer_offset: u64,
-        buffer_size: u64,
+        buffer_size: wgt::BufferSize,
     ) -> Result<(), TransferError> {
         buffer.check_usage(wgt::BufferUsages::COPY_DST)?;
-        if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
-            return Err(TransferError::UnalignedCopySize(buffer_size));
+        if buffer_size.get() % wgt::COPY_BUFFER_ALIGNMENT != 0 {
+            return Err(TransferError::UnalignedCopySize(buffer_size.get()));
         }
         if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
             return Err(TransferError::UnalignedBufferOffset(buffer_offset));
         }
-        if buffer_offset + buffer_size > buffer.size {
+        if buffer_offset + buffer_size.get() > buffer.size {
             return Err(TransferError::BufferOverrun {
                 start_offset: buffer_offset,
-                end_offset: buffer_offset + buffer_size,
+                end_offset: buffer_offset + buffer_size.get(),
                 buffer_size: buffer.size,
                 side: CopySide::Destination,
             });
@@ -615,16 +621,15 @@ impl Global {
 
         dst.same_device_as(queue.as_ref())?;
 
-        let src_buffer_size = staging_buffer.size;
-        self.queue_validate_write_buffer_impl(&dst, buffer_offset, src_buffer_size)?;
+        self.queue_validate_write_buffer_impl(&dst, buffer_offset, staging_buffer.size)?;
 
         dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
-        let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy {
+        let region = hal::BufferCopy {
             src_offset: 0,
             dst_offset: buffer_offset,
-            size,
-        });
+            size: staging_buffer.size,
+        };
         let inner_buffer = staging_buffer.raw.lock();
         let barriers = iter::once(hal::BufferBarrier {
             buffer: inner_buffer.as_ref().unwrap(),
@@ -637,7 +642,7 @@ impl Global {
             encoder.copy_buffer_to_buffer(
                 inner_buffer.as_ref().unwrap(),
                 dst_raw,
-                region.into_iter(),
+                iter::once(region),
             );
         }
 
@@ -648,7 +653,7 @@ impl Global {
         {
             dst.initialization_status
                 .write()
-                .drain(buffer_offset..(buffer_offset + src_buffer_size));
+                .drain(buffer_offset..(buffer_offset + staging_buffer.size.get()));
         }
 
         Ok(())
@@ -760,7 +765,8 @@ impl Global {
 
         let block_rows_in_copy =
             (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
-        let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64;
+        let stage_size =
+            wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64).unwrap();
 
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
@@ -836,7 +842,7 @@ impl Global {
                 ptr::copy_nonoverlapping(
                     data.as_ptr().offset(data_layout.offset as isize),
                     staging_buffer_ptr.as_ptr(),
-                    stage_size as usize,
+                    stage_size.get() as usize,
                 );
             }
         } else {
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index dd7bae4ceb..68af1c3426 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -591,13 +591,16 @@ impl<A: HalApi> Device<A> {
             };
             hal::BufferUses::MAP_WRITE
         } else {
-            let (staging_buffer, staging_buffer_ptr) =
-                queue::prepare_staging_buffer(self, desc.size, self.instance_flags)?;
+            let (staging_buffer, staging_buffer_ptr) = queue::prepare_staging_buffer(
+                self,
+                wgt::BufferSize::new(aligned_size).unwrap(),
+                self.instance_flags,
+            )?;
 
             // Zero initialize memory and then mark the buffer as initialized
             // (it's guaranteed that this is the case by the time the buffer is usable)
-            unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, buffer.size as usize) };
-            buffer.initialization_status.write().drain(0..buffer.size);
+            unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, aligned_size as usize) };
+            buffer.initialization_status.write().drain(0..aligned_size);
 
             *buffer.map_state.lock() = resource::BufferMapState::Init {
                 staging_buffer,
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 8d794e9df4..25664bdc41 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -867,7 +867,7 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 pub struct StagingBuffer<A: HalApi> {
     pub(crate) raw: Mutex<Option<A::Buffer>>,
     pub(crate) device: Arc<Device<A>>,
-    pub(crate) size: wgt::BufferAddress,
+    pub(crate) size: wgt::BufferSize,
     pub(crate) is_coherent: bool,
 }
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 961f4970b8..6485aefcde 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -2203,7 +2203,7 @@ impl crate::Context for ContextWgpuCore {
         size: wgt::BufferSize,
     ) -> Option<()> {
         match wgc::gfx_select!(
-            *queue => self.0.queue_validate_write_buffer(*queue, *buffer, offset, size.get())
+            *queue => self.0.queue_validate_write_buffer(*queue, *buffer, offset, size)
         ) {
             Ok(()) => Some(()),
             Err(err) => {

From 4d285d8b616590ebacb2c9358736de992589829c Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 13:40:29 +0200
Subject: [PATCH 013/226] remove the `Mutex` around `StagingBuffer`'s internal
 buffer

---
 wgpu-core/src/device/queue.rs | 30 ++++++++++--------------------
 wgpu-core/src/lock/rank.rs    |  1 -
 wgpu-core/src/resource.rs     | 32 +++++++++++++++++---------------
 3 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index c118491800..de58014858 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -14,7 +14,7 @@ use crate::{
     hal_label,
     id::{self, QueueId},
     init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
-    lock::{rank, Mutex, RwLockWriteGuard},
+    lock::RwLockWriteGuard,
     resource::{
         Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError,
         DestroyedTexture, Labeled, ParentDevice, ResourceErrorIdent, StagingBuffer, Texture,
@@ -29,7 +29,8 @@ use hal::{CommandEncoder as _, Device as _, Queue as _};
 use smallvec::SmallVec;
 
 use std::{
-    iter, mem,
+    iter,
+    mem::{self, ManuallyDrop},
     ptr::{self, NonNull},
     sync::{atomic::Ordering, Arc},
 };
@@ -329,7 +330,7 @@ pub(crate) fn prepare_staging_buffer<A: HalApi>(
     let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size.get()) }?;
 
     let staging_buffer = StagingBuffer {
-        raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(buffer)),
+        raw: ManuallyDrop::new(buffer),
         device: device.clone(),
         size,
         is_coherent: mapping.is_coherent,
@@ -341,14 +342,9 @@ pub(crate) fn prepare_staging_buffer<A: HalApi>(
 impl<A: HalApi> StagingBuffer<A> {
     unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> {
         if !self.is_coherent {
-            unsafe {
-                device.flush_mapped_ranges(
-                    self.raw.lock().as_ref().unwrap(),
-                    iter::once(0..self.size.get()),
-                )
-            };
+            unsafe { device.flush_mapped_ranges(self.raw(), iter::once(0..self.size.get())) };
         }
-        unsafe { device.unmap_buffer(self.raw.lock().as_ref().unwrap())? };
+        unsafe { device.unmap_buffer(self.raw())? };
         Ok(())
     }
 }
@@ -630,20 +626,15 @@ impl Global {
             dst_offset: buffer_offset,
             size: staging_buffer.size,
         };
-        let inner_buffer = staging_buffer.raw.lock();
         let barriers = iter::once(hal::BufferBarrier {
-            buffer: inner_buffer.as_ref().unwrap(),
+            buffer: staging_buffer.raw(),
             usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
         })
         .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)));
         let encoder = pending_writes.activate();
         unsafe {
             encoder.transition_buffers(barriers);
-            encoder.copy_buffer_to_buffer(
-                inner_buffer.as_ref().unwrap(),
-                dst_raw,
-                iter::once(region),
-            );
+            encoder.copy_buffer_to_buffer(staging_buffer.raw(), dst_raw, iter::once(region));
         }
 
         pending_writes.insert_buffer(&dst);
@@ -890,9 +881,8 @@ impl Global {
         });
 
         {
-            let inner_buffer = staging_buffer.raw.lock();
             let barrier = hal::BufferBarrier {
-                buffer: inner_buffer.as_ref().unwrap(),
+                buffer: staging_buffer.raw(),
                 usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
             };
 
@@ -904,7 +894,7 @@ impl Global {
             unsafe {
                 encoder.transition_textures(transition.map(|pending| pending.into_hal(dst_raw)));
                 encoder.transition_buffers(iter::once(barrier));
-                encoder.copy_buffer_to_texture(inner_buffer.as_ref().unwrap(), dst_raw, regions);
+                encoder.copy_buffer_to_texture(staging_buffer.raw(), dst_raw, regions);
             }
         }
 
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index c01a621aa2..f960b3c028 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -143,7 +143,6 @@ define_lock_ranks! {
     rank RENDER_BUNDLE_SCOPE_QUERY_SETS "RenderBundleScope::query_sets" followed by { }
     rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
     rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
-    rank STAGING_BUFFER_RAW "StagingBuffer::raw" followed by { }
     rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { }
     rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
     rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 25664bdc41..dade39a220 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -24,7 +24,8 @@ use thiserror::Error;
 use std::{
     borrow::Borrow,
     fmt::Debug,
-    iter, mem,
+    iter,
+    mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
     sync::{
@@ -668,13 +669,11 @@ impl<A: HalApi> Buffer<A> {
                 }
                 let _ = ptr;
 
-                let raw_staging_buffer_guard = staging_buffer.raw.lock();
-                let raw_staging_buffer = raw_staging_buffer_guard.as_ref().unwrap();
                 if !staging_buffer.is_coherent {
                     unsafe {
                         device
                             .raw()
-                            .flush_mapped_ranges(raw_staging_buffer, iter::once(0..self.size));
+                            .flush_mapped_ranges(staging_buffer.raw(), iter::once(0..self.size));
                     }
                 }
 
@@ -685,7 +684,7 @@ impl<A: HalApi> Buffer<A> {
                     size,
                 });
                 let transition_src = hal::BufferBarrier {
-                    buffer: raw_staging_buffer,
+                    buffer: staging_buffer.raw(),
                     usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
                 };
                 let transition_dst = hal::BufferBarrier {
@@ -701,13 +700,12 @@ impl<A: HalApi> Buffer<A> {
                     );
                     if self.size > 0 {
                         encoder.copy_buffer_to_buffer(
-                            raw_staging_buffer,
+                            staging_buffer.raw(),
                             raw_buf,
                             region.into_iter(),
                         );
                     }
                 }
-                drop(raw_staging_buffer_guard);
                 pending_writes.consume_temp(queue::TempResource::StagingBuffer(staging_buffer));
                 pending_writes.insert_buffer(self);
             }
@@ -865,21 +863,25 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
 pub struct StagingBuffer<A: HalApi> {
-    pub(crate) raw: Mutex<Option<A::Buffer>>,
+    pub(crate) raw: ManuallyDrop<A::Buffer>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
     pub(crate) is_coherent: bool,
 }
 
+impl<A: HalApi> StagingBuffer<A> {
+    pub(crate) fn raw(&self) -> &A::Buffer {
+        &self.raw
+    }
+}
+
 impl<A: HalApi> Drop for StagingBuffer<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.lock().take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_buffer(raw);
-            }
-        }
+        use hal::Device;
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe { self.device.raw().destroy_buffer(raw) };
     }
 }
 

From a8b0f2f6a6abdb60cbd9ba58f41d07eaef502666 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 13:44:45 +0200
Subject: [PATCH 014/226] remove device arg from `StagingBuffer.flush()`

---
 wgpu-core/src/device/queue.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index de58014858..8cb99c177c 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -340,7 +340,8 @@ pub(crate) fn prepare_staging_buffer<A: HalApi>(
 }
 
 impl<A: HalApi> StagingBuffer<A> {
-    unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> {
+    unsafe fn flush(&self) -> Result<(), DeviceError> {
+        let device = self.device.raw();
         if !self.is_coherent {
             unsafe { device.flush_mapped_ranges(self.raw(), iter::once(0..self.size.get())) };
         }
@@ -453,7 +454,7 @@ impl Global {
                 staging_buffer_ptr.as_ptr(),
                 data_size.get() as usize,
             );
-            staging_buffer.flush(device.raw())
+            staging_buffer.flush()
         } {
             pending_writes.consume(staging_buffer);
             return Err(flush_error.into());
@@ -528,7 +529,7 @@ impl Global {
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        if let Err(flush_error) = unsafe { staging_buffer.flush(device.raw()) } {
+        if let Err(flush_error) = unsafe { staging_buffer.flush() } {
             pending_writes.consume(staging_buffer);
             return Err(flush_error.into());
         }
@@ -859,7 +860,7 @@ impl Global {
             }
         }
 
-        if let Err(e) = unsafe { staging_buffer.flush(device.raw()) } {
+        if let Err(e) = unsafe { staging_buffer.flush() } {
             pending_writes.consume(staging_buffer);
             return Err(e.into());
         }

From 9a7f44bf09642cf0df0efc675b579034d877519f Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 13:51:53 +0200
Subject: [PATCH 015/226] make `StagingBuffer` creation a method

---
 wgpu-core/src/device/queue.rs    | 45 +++-----------------------------
 wgpu-core/src/device/resource.rs |  6 ++---
 wgpu-core/src/resource.rs        | 41 +++++++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 8cb99c177c..093aa571bb 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -30,7 +30,7 @@ use smallvec::SmallVec;
 
 use std::{
     iter,
-    mem::{self, ManuallyDrop},
+    mem::{self},
     ptr::{self, NonNull},
     sync::{atomic::Ordering, Arc},
 };
@@ -313,43 +313,6 @@ impl<A: HalApi> PendingWrites<A> {
     }
 }
 
-pub(crate) fn prepare_staging_buffer<A: HalApi>(
-    device: &Arc<Device<A>>,
-    size: wgt::BufferSize,
-    instance_flags: wgt::InstanceFlags,
-) -> Result<(StagingBuffer<A>, NonNull<u8>), DeviceError> {
-    profiling::scope!("prepare_staging_buffer");
-    let stage_desc = hal::BufferDescriptor {
-        label: hal_label(Some("(wgpu internal) Staging"), instance_flags),
-        size: size.get(),
-        usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
-        memory_flags: hal::MemoryFlags::TRANSIENT,
-    };
-
-    let buffer = unsafe { device.raw().create_buffer(&stage_desc)? };
-    let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size.get()) }?;
-
-    let staging_buffer = StagingBuffer {
-        raw: ManuallyDrop::new(buffer),
-        device: device.clone(),
-        size,
-        is_coherent: mapping.is_coherent,
-    };
-
-    Ok((staging_buffer, mapping.ptr))
-}
-
-impl<A: HalApi> StagingBuffer<A> {
-    unsafe fn flush(&self) -> Result<(), DeviceError> {
-        let device = self.device.raw();
-        if !self.is_coherent {
-            unsafe { device.flush_mapped_ranges(self.raw(), iter::once(0..self.size.get())) };
-        }
-        unsafe { device.unmap_buffer(self.raw())? };
-        Ok(())
-    }
-}
-
 #[derive(Clone, Debug, Error)]
 #[error("Queue is invalid")]
 pub struct InvalidQueue;
@@ -443,7 +406,7 @@ impl Global {
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
         let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, data_size, device.instance_flags)?;
+            StagingBuffer::new(device, data_size, device.instance_flags)?;
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
 
@@ -490,7 +453,7 @@ impl Global {
         let device = &queue.device;
 
         let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, buffer_size, device.instance_flags)?;
+            StagingBuffer::new(device, buffer_size, device.instance_flags)?;
 
         let fid = hub.staging_buffers.prepare(id_in);
         let id = fid.assign(Arc::new(staging_buffer));
@@ -825,7 +788,7 @@ impl Global {
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
         let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, stage_size, device.instance_flags)?;
+            StagingBuffer::new(device, stage_size, device.instance_flags)?;
 
         if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 68af1c3426..6a8f0a5c3b 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -22,8 +22,8 @@ use crate::{
     pipeline,
     pool::ResourcePool,
     resource::{
-        self, Buffer, Labeled, ParentDevice, QuerySet, Sampler, Texture, TextureView,
-        TextureViewNotRenderableReason, TrackingData,
+        self, Buffer, Labeled, ParentDevice, QuerySet, Sampler, StagingBuffer, Texture,
+        TextureView, TextureViewNotRenderableReason, TrackingData,
     },
     resource_log,
     snatch::{SnatchGuard, SnatchLock, Snatchable},
@@ -591,7 +591,7 @@ impl<A: HalApi> Device<A> {
             };
             hal::BufferUses::MAP_WRITE
         } else {
-            let (staging_buffer, staging_buffer_ptr) = queue::prepare_staging_buffer(
+            let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(
                 self,
                 wgt::BufferSize::new(aligned_size).unwrap(),
                 self.instance_flags,
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index dade39a220..08941d6a2e 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -863,16 +863,53 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
 pub struct StagingBuffer<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::Buffer>,
-    pub(crate) device: Arc<Device<A>>,
+    raw: ManuallyDrop<A::Buffer>,
+    device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
     pub(crate) is_coherent: bool,
 }
 
 impl<A: HalApi> StagingBuffer<A> {
+    pub(crate) fn new(
+        device: &Arc<Device<A>>,
+        size: wgt::BufferSize,
+        instance_flags: wgt::InstanceFlags,
+    ) -> Result<(Self, NonNull<u8>), DeviceError> {
+        use hal::Device;
+        profiling::scope!("StagingBuffer::new");
+        let stage_desc = hal::BufferDescriptor {
+            label: crate::hal_label(Some("(wgpu internal) Staging"), instance_flags),
+            size: size.get(),
+            usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
+            memory_flags: hal::MemoryFlags::TRANSIENT,
+        };
+
+        let buffer = unsafe { device.raw().create_buffer(&stage_desc)? };
+        let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size.get()) }?;
+
+        let staging_buffer = StagingBuffer {
+            raw: ManuallyDrop::new(buffer),
+            device: device.clone(),
+            size,
+            is_coherent: mapping.is_coherent,
+        };
+
+        Ok((staging_buffer, mapping.ptr))
+    }
+
     pub(crate) fn raw(&self) -> &A::Buffer {
         &self.raw
     }
+
+    pub(crate) unsafe fn flush(&self) -> Result<(), DeviceError> {
+        use hal::Device;
+        let device = self.device.raw();
+        if !self.is_coherent {
+            unsafe { device.flush_mapped_ranges(self.raw(), iter::once(0..self.size.get())) };
+        }
+        unsafe { device.unmap_buffer(self.raw())? };
+        Ok(())
+    }
 }
 
 impl<A: HalApi> Drop for StagingBuffer<A> {

From 5e2df1406d7f2880e801d843fad1eaadf95c0e9e Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:03:04 +0200
Subject: [PATCH 016/226] use `StagingBuffer.flush()` in `Buffer.unmap_inner()`

We should have always unmapped the staging buffer as we are using it on the GPU after this point.
---
 wgpu-core/src/device/queue.rs |  2 +-
 wgpu-core/src/resource.rs     | 18 ++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 093aa571bb..36bff29bb3 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -256,7 +256,7 @@ impl<A: HalApi> PendingWrites<A> {
         self.temp_resources.push(resource);
     }
 
-    fn consume(&mut self, buffer: StagingBuffer<A>) {
+    pub fn consume(&mut self, buffer: StagingBuffer<A>) {
         self.temp_resources
             .push(TempResource::StagingBuffer(buffer));
     }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 08941d6a2e..794b219a91 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -669,12 +669,12 @@ impl<A: HalApi> Buffer<A> {
                 }
                 let _ = ptr;
 
-                if !staging_buffer.is_coherent {
-                    unsafe {
-                        device
-                            .raw()
-                            .flush_mapped_ranges(staging_buffer.raw(), iter::once(0..self.size));
-                    }
+                let mut pending_writes = device.pending_writes.lock();
+                let pending_writes = pending_writes.as_mut().unwrap();
+
+                if let Err(e) = unsafe { staging_buffer.flush() } {
+                    pending_writes.consume(staging_buffer);
+                    return Err(e.into());
                 }
 
                 self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
@@ -691,8 +691,6 @@ impl<A: HalApi> Buffer<A> {
                     buffer: raw_buf,
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
                 };
-                let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
                 let encoder = pending_writes.activate();
                 unsafe {
                     encoder.transition_buffers(
@@ -706,7 +704,7 @@ impl<A: HalApi> Buffer<A> {
                         );
                     }
                 }
-                pending_writes.consume_temp(queue::TempResource::StagingBuffer(staging_buffer));
+                pending_writes.consume(staging_buffer);
                 pending_writes.insert_buffer(self);
             }
             BufferMapState::Idle => {
@@ -866,7 +864,7 @@ pub struct StagingBuffer<A: HalApi> {
     raw: ManuallyDrop<A::Buffer>,
     device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
-    pub(crate) is_coherent: bool,
+    is_coherent: bool,
 }
 
 impl<A: HalApi> StagingBuffer<A> {

From 26f65ddffd7b805fa787c96f373e6a898c5538f0 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 16:45:40 +0200
Subject: [PATCH 017/226] [wgpu-hal] remove return type from
 `Device.unmap_buffer()`

It's already documented that to unmap a buffer it has to have been mapped.
Vulkan was the only backend that was returning an OOM on missing `Buffer.block` but `Buffer.map_buffer` already returns an error in this case.
---
 wgpu-core/src/device/global.rs                | 10 ++--------
 wgpu-core/src/device/queue.rs                 | 17 ++++-------------
 wgpu-core/src/resource.rs                     | 17 ++++-------------
 wgpu-hal/examples/halmark/main.rs             |  6 +++---
 wgpu-hal/examples/ray-traced-triangle/main.rs | 10 +++++-----
 wgpu-hal/src/dx12/device.rs                   |  3 +--
 wgpu-hal/src/empty.rs                         |  4 +---
 wgpu-hal/src/gles/device.rs                   |  3 +--
 wgpu-hal/src/lib.rs                           |  2 +-
 wgpu-hal/src/metal/device.rs                  |  4 +---
 wgpu-hal/src/vulkan/device.rs                 |  6 ++----
 11 files changed, 25 insertions(+), 57 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 499ba6ecca..94b59ad6cb 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -321,10 +321,7 @@ impl Global {
                     .raw()
                     .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64));
             }
-            device
-                .raw()
-                .unmap_buffer(raw_buf)
-                .map_err(DeviceError::from)?;
+            device.raw().unmap_buffer(raw_buf);
         }
 
         Ok(())
@@ -370,10 +367,7 @@ impl Global {
                 );
             }
             ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len());
-            device
-                .raw()
-                .unmap_buffer(raw_buf)
-                .map_err(DeviceError::from)?;
+            device.raw().unmap_buffer(raw_buf);
         }
 
         Ok(())
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 36bff29bb3..df87ecd9c4 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -410,17 +410,14 @@ impl Global {
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
 
-        if let Err(flush_error) = unsafe {
+        unsafe {
             profiling::scope!("copy");
             ptr::copy_nonoverlapping(
                 data.as_ptr(),
                 staging_buffer_ptr.as_ptr(),
                 data_size.get() as usize,
             );
-            staging_buffer.flush()
-        } {
-            pending_writes.consume(staging_buffer);
-            return Err(flush_error.into());
+            staging_buffer.flush();
         }
 
         let result = self.queue_write_staging_buffer_impl(
@@ -492,10 +489,7 @@ impl Global {
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        if let Err(flush_error) = unsafe { staging_buffer.flush() } {
-            pending_writes.consume(staging_buffer);
-            return Err(flush_error.into());
-        }
+        unsafe { staging_buffer.flush() };
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
@@ -823,10 +817,7 @@ impl Global {
             }
         }
 
-        if let Err(e) = unsafe { staging_buffer.flush() } {
-            pending_writes.consume(staging_buffer);
-            return Err(e.into());
-        }
+        unsafe { staging_buffer.flush() };
 
         let regions = (0..array_layer_count).map(|rel_array_layer| {
             let mut texture_base = dst_base.clone();
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 794b219a91..0b1f15cc49 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -672,10 +672,7 @@ impl<A: HalApi> Buffer<A> {
                 let mut pending_writes = device.pending_writes.lock();
                 let pending_writes = pending_writes.as_mut().unwrap();
 
-                if let Err(e) = unsafe { staging_buffer.flush() } {
-                    pending_writes.consume(staging_buffer);
-                    return Err(e.into());
-                }
+                unsafe { staging_buffer.flush() };
 
                 self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
@@ -730,12 +727,7 @@ impl<A: HalApi> Buffer<A> {
                     }
                     let _ = (ptr, range);
                 }
-                unsafe {
-                    device
-                        .raw()
-                        .unmap_buffer(raw_buf)
-                        .map_err(DeviceError::from)?
-                };
+                unsafe { device.raw().unmap_buffer(raw_buf) };
             }
         }
         Ok(None)
@@ -899,14 +891,13 @@ impl<A: HalApi> StagingBuffer<A> {
         &self.raw
     }
 
-    pub(crate) unsafe fn flush(&self) -> Result<(), DeviceError> {
+    pub(crate) unsafe fn flush(&self) {
         use hal::Device;
         let device = self.device.raw();
         if !self.is_coherent {
             unsafe { device.flush_mapped_ranges(self.raw(), iter::once(0..self.size.get())) };
         }
-        unsafe { device.unmap_buffer(self.raw())? };
-        Ok(())
+        unsafe { device.unmap_buffer(self.raw()) };
     }
 }
 
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index d61cec7380..a657b161b4 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -301,7 +301,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 texture_data.len(),
             );
-            device.unmap_buffer(&staging_buffer).unwrap();
+            device.unmap_buffer(&staging_buffer);
             assert!(mapping.is_coherent);
         }
 
@@ -410,7 +410,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 mem::size_of::<Globals>(),
             );
-            device.unmap_buffer(&buffer).unwrap();
+            device.unmap_buffer(&buffer);
             assert!(mapping.is_coherent);
             buffer
         };
@@ -647,7 +647,7 @@ impl<A: hal::Api> Example<A> {
                     size,
                 );
                 assert!(mapping.is_coherent);
-                self.device.unmap_buffer(&self.local_buffer).unwrap();
+                self.device.unmap_buffer(&self.local_buffer);
             }
         }
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index e6481aae64..1cde9fa251 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -413,7 +413,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 vertices_size_in_bytes,
             );
-            device.unmap_buffer(&vertices_buffer).unwrap();
+            device.unmap_buffer(&vertices_buffer);
             assert!(mapping.is_coherent);
 
             vertices_buffer
@@ -438,7 +438,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 indices_size_in_bytes,
             );
-            device.unmap_buffer(&indices_buffer).unwrap();
+            device.unmap_buffer(&indices_buffer);
             assert!(mapping.is_coherent);
 
             indices_buffer
@@ -537,7 +537,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 uniforms_size,
             );
-            device.unmap_buffer(&uniform_buffer).unwrap();
+            device.unmap_buffer(&uniform_buffer);
             assert!(mapping.is_coherent);
             uniform_buffer
         };
@@ -680,7 +680,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 instances_buffer_size,
             );
-            device.unmap_buffer(&instances_buffer).unwrap();
+            device.unmap_buffer(&instances_buffer);
             assert!(mapping.is_coherent);
 
             instances_buffer
@@ -848,7 +848,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 instances_buffer_size,
             );
-            self.device.unmap_buffer(&self.instances_buffer).unwrap();
+            self.device.unmap_buffer(&self.instances_buffer);
             assert!(mapping.is_coherent);
         }
 
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index e8104abfbb..27b3002431 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -437,9 +437,8 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
         unsafe { (*buffer.resource).Unmap(0, ptr::null()) };
-        Ok(())
     }
 
     unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 5d6c42ab85..89a04ce48b 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -151,9 +151,7 @@ impl crate::Device for Context {
     ) -> DeviceResult<crate::BufferMapping> {
         Err(crate::DeviceError::Lost)
     }
-    unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> {
-        Ok(())
-    }
+    unsafe fn unmap_buffer(&self, buffer: &Resource) {}
     unsafe fn flush_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {}
     unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {}
 
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 66b34bcd13..67d0a29713 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -691,7 +691,7 @@ impl crate::Device for super::Device {
             is_coherent,
         })
     }
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
         if let Some(raw) = buffer.raw {
             if buffer.data.is_none() {
                 let gl = &self.shared.context.lock();
@@ -700,7 +700,6 @@ impl crate::Device for super::Device {
                 unsafe { gl.bind_buffer(buffer.target, None) };
             }
         }
-        Ok(())
     }
     unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I)
     where
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 9cf83bc7ce..36dc9b0689 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -728,7 +728,7 @@ pub trait Device: WasmNotSendSync {
     /// # Safety
     ///
     /// - The given `buffer` must be currently mapped.
-    unsafe fn unmap_buffer(&self, buffer: &<Self::A as Api>::Buffer) -> Result<(), DeviceError>;
+    unsafe fn unmap_buffer(&self, buffer: &<Self::A as Api>::Buffer);
 
     /// Indicate that CPU writes to mapped buffer memory should be made visible to the GPU.
     ///
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 6af82e1e62..efafc98e1b 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -370,9 +370,7 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> {
-        Ok(())
-    }
+    unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) {}
     unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
     unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
 
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index d088314609..86bfa56442 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -951,12 +951,10 @@ impl crate::Device for super::Device {
             Err(crate::DeviceError::OutOfMemory)
         }
     }
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
+        // We can only unmap the buffer if it was already mapped successfully.
         if let Some(ref block) = buffer.block {
             unsafe { block.lock().unmap(&*self.shared) };
-            Ok(())
-        } else {
-            Err(crate::DeviceError::OutOfMemory)
         }
     }
 

From 2f282cdd067979736dd32b84378676def8328644 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 11 Jul 2024 16:53:38 +0200
Subject: [PATCH 018/226] remove `instance_flags` arg from `StagingBuffer::new`

---
 wgpu-core/src/device/queue.rs    | 9 +++------
 wgpu-core/src/device/resource.rs | 7 ++-----
 wgpu-core/src/resource.rs        | 3 +--
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index df87ecd9c4..aa4061f81b 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -405,8 +405,7 @@ impl Global {
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) =
-            StagingBuffer::new(device, data_size, device.instance_flags)?;
+        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, data_size)?;
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
 
@@ -449,8 +448,7 @@ impl Global {
 
         let device = &queue.device;
 
-        let (staging_buffer, staging_buffer_ptr) =
-            StagingBuffer::new(device, buffer_size, device.instance_flags)?;
+        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, buffer_size)?;
 
         let fid = hub.staging_buffers.prepare(id_in);
         let id = fid.assign(Arc::new(staging_buffer));
@@ -781,8 +779,7 @@ impl Global {
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) =
-            StagingBuffer::new(device, stage_size, device.instance_flags)?;
+        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, stage_size)?;
 
         if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 6a8f0a5c3b..3e3e5f9049 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -591,11 +591,8 @@ impl<A: HalApi> Device<A> {
             };
             hal::BufferUses::MAP_WRITE
         } else {
-            let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(
-                self,
-                wgt::BufferSize::new(aligned_size).unwrap(),
-                self.instance_flags,
-            )?;
+            let (staging_buffer, staging_buffer_ptr) =
+                StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
 
             // Zero initialize memory and then mark the buffer as initialized
             // (it's guaranteed that this is the case by the time the buffer is usable)
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 0b1f15cc49..927f741b18 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -863,12 +863,11 @@ impl<A: HalApi> StagingBuffer<A> {
     pub(crate) fn new(
         device: &Arc<Device<A>>,
         size: wgt::BufferSize,
-        instance_flags: wgt::InstanceFlags,
     ) -> Result<(Self, NonNull<u8>), DeviceError> {
         use hal::Device;
         profiling::scope!("StagingBuffer::new");
         let stage_desc = hal::BufferDescriptor {
-            label: crate::hal_label(Some("(wgpu internal) Staging"), instance_flags),
+            label: crate::hal_label(Some("(wgpu internal) Staging"), device.instance_flags),
             size: size.get(),
             usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
             memory_flags: hal::MemoryFlags::TRANSIENT,

From 347d902bcbc8f968032dc49160c7310689ce0808 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 12 Jul 2024 11:15:50 +0200
Subject: [PATCH 019/226] introduce `FlushedStagingBuffer`

---
 wgpu-core/src/device/queue.rs | 20 ++++++------
 wgpu-core/src/resource.rs     | 60 +++++++++++++++++++++--------------
 2 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index aa4061f81b..05f58f2078 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -17,8 +17,8 @@ use crate::{
     lock::RwLockWriteGuard,
     resource::{
         Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError,
-        DestroyedTexture, Labeled, ParentDevice, ResourceErrorIdent, StagingBuffer, Texture,
-        TextureInner, Trackable,
+        DestroyedTexture, FlushedStagingBuffer, Labeled, ParentDevice, ResourceErrorIdent,
+        StagingBuffer, Texture, TextureInner, Trackable,
     },
     resource_log,
     track::{self, Tracker, TrackerIndex},
@@ -136,7 +136,7 @@ pub struct WrappedSubmissionIndex {
 ///   submission, to be freed when it completes
 #[derive(Debug)]
 pub enum TempResource<A: HalApi> {
-    StagingBuffer(StagingBuffer<A>),
+    StagingBuffer(FlushedStagingBuffer<A>),
     DestroyedBuffer(DestroyedBuffer<A>),
     DestroyedTexture(DestroyedTexture<A>),
 }
@@ -256,7 +256,7 @@ impl<A: HalApi> PendingWrites<A> {
         self.temp_resources.push(resource);
     }
 
-    pub fn consume(&mut self, buffer: StagingBuffer<A>) {
+    pub fn consume(&mut self, buffer: FlushedStagingBuffer<A>) {
         self.temp_resources
             .push(TempResource::StagingBuffer(buffer));
     }
@@ -409,15 +409,15 @@ impl Global {
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
 
-        unsafe {
+        let staging_buffer = unsafe {
             profiling::scope!("copy");
             ptr::copy_nonoverlapping(
                 data.as_ptr(),
                 staging_buffer_ptr.as_ptr(),
                 data_size.get() as usize,
             );
-            staging_buffer.flush();
-        }
+            staging_buffer.flush()
+        };
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
@@ -487,7 +487,7 @@ impl Global {
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        unsafe { staging_buffer.flush() };
+        let staging_buffer = unsafe { staging_buffer.flush() };
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
@@ -552,7 +552,7 @@ impl Global {
         queue: &Arc<Queue<A>>,
         device: &Arc<Device<A>>,
         pending_writes: &mut PendingWrites<A>,
-        staging_buffer: &StagingBuffer<A>,
+        staging_buffer: &FlushedStagingBuffer<A>,
         buffer_id: id::BufferId,
         buffer_offset: u64,
     ) -> Result<(), QueueWriteError> {
@@ -814,7 +814,7 @@ impl Global {
             }
         }
 
-        unsafe { staging_buffer.flush() };
+        let staging_buffer = unsafe { staging_buffer.flush() };
 
         let regions = (0..array_layer_count).map(|rel_array_layer| {
             let mut texture_base = dst_base.clone();
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 927f741b18..612d68ff61 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -672,7 +672,7 @@ impl<A: HalApi> Buffer<A> {
                 let mut pending_writes = device.pending_writes.lock();
                 let pending_writes = pending_writes.as_mut().unwrap();
 
-                unsafe { staging_buffer.flush() };
+                let staging_buffer = unsafe { staging_buffer.flush() };
 
                 self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
@@ -853,7 +853,7 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
 pub struct StagingBuffer<A: HalApi> {
-    raw: ManuallyDrop<A::Buffer>,
+    raw: A::Buffer,
     device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
     is_coherent: bool,
@@ -873,11 +873,11 @@ impl<A: HalApi> StagingBuffer<A> {
             memory_flags: hal::MemoryFlags::TRANSIENT,
         };
 
-        let buffer = unsafe { device.raw().create_buffer(&stage_desc)? };
-        let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size.get()) }?;
+        let raw = unsafe { device.raw().create_buffer(&stage_desc)? };
+        let mapping = unsafe { device.raw().map_buffer(&raw, 0..size.get()) }?;
 
         let staging_buffer = StagingBuffer {
-            raw: ManuallyDrop::new(buffer),
+            raw,
             device: device.clone(),
             size,
             is_coherent: mapping.is_coherent,
@@ -886,40 +886,52 @@ impl<A: HalApi> StagingBuffer<A> {
         Ok((staging_buffer, mapping.ptr))
     }
 
-    pub(crate) fn raw(&self) -> &A::Buffer {
-        &self.raw
-    }
-
-    pub(crate) unsafe fn flush(&self) {
+    pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {
         use hal::Device;
         let device = self.device.raw();
         if !self.is_coherent {
-            unsafe { device.flush_mapped_ranges(self.raw(), iter::once(0..self.size.get())) };
+            unsafe { device.flush_mapped_ranges(&self.raw, iter::once(0..self.size.get())) };
+        }
+        unsafe { device.unmap_buffer(&self.raw) };
+
+        let StagingBuffer {
+            raw, device, size, ..
+        } = self;
+
+        FlushedStagingBuffer {
+            raw: ManuallyDrop::new(raw),
+            device,
+            size,
         }
-        unsafe { device.unmap_buffer(self.raw()) };
     }
 }
 
-impl<A: HalApi> Drop for StagingBuffer<A> {
+crate::impl_resource_type!(StagingBuffer);
+crate::impl_storage_item!(StagingBuffer);
+
+#[derive(Debug)]
+pub struct FlushedStagingBuffer<A: HalApi> {
+    raw: ManuallyDrop<A::Buffer>,
+    device: Arc<Device<A>>,
+    pub(crate) size: wgt::BufferSize,
+}
+
+impl<A: HalApi> FlushedStagingBuffer<A> {
+    pub(crate) fn raw(&self) -> &A::Buffer {
+        &self.raw
+    }
+}
+
+impl<A: HalApi> Drop for FlushedStagingBuffer<A> {
     fn drop(&mut self) {
         use hal::Device;
-        resource_log!("Destroy raw {}", self.error_ident());
+        resource_log!("Destroy raw StagingBuffer");
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe { self.device.raw().destroy_buffer(raw) };
     }
 }
 
-crate::impl_resource_type!(StagingBuffer);
-// TODO: add label
-impl<A: HalApi> Labeled for StagingBuffer<A> {
-    fn label(&self) -> &str {
-        ""
-    }
-}
-crate::impl_parent_device!(StagingBuffer);
-crate::impl_storage_item!(StagingBuffer);
-
 pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, Vec<wgt::TextureFormat>>;
 
 #[derive(Debug)]

From 6f16ea460ab437173e14d2f5f3584ca7e1c9841d Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 12 Jul 2024 14:38:38 +0200
Subject: [PATCH 020/226] make the `StagingBuffer` implementation more robust

---
 wgpu-core/src/device/global.rs   | 11 ++---
 wgpu-core/src/device/queue.rs    | 49 ++++++++------------
 wgpu-core/src/device/resource.rs |  9 ++--
 wgpu-core/src/resource.rs        | 76 ++++++++++++++++++++++++--------
 wgpu-hal/src/lib.rs              |  6 ++-
 5 files changed, 89 insertions(+), 62 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 94b59ad6cb..e5643a3da9 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2519,7 +2519,7 @@ impl Global {
         }
         let map_state = &*buffer.map_state.lock();
         match *map_state {
-            resource::BufferMapState::Init { ref ptr, .. } => {
+            resource::BufferMapState::Init { ref staging_buffer } => {
                 // offset (u64) can not be < 0, so no need to validate the lower bound
                 if offset + range_size > buffer.size {
                     return Err(BufferAccessError::OutOfBoundsOverrun {
@@ -2527,12 +2527,9 @@ impl Global {
                         max: buffer.size,
                     });
                 }
-                unsafe {
-                    Ok((
-                        NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)),
-                        range_size,
-                    ))
-                }
+                let ptr = unsafe { staging_buffer.ptr() };
+                let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)) };
+                Ok((ptr, range_size))
             }
             resource::BufferMapState::Active {
                 ref ptr, ref range, ..
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 05f58f2078..52edb528a3 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -31,7 +31,7 @@ use smallvec::SmallVec;
 use std::{
     iter,
     mem::{self},
-    ptr::{self, NonNull},
+    ptr::NonNull,
     sync::{atomic::Ordering, Arc},
 };
 use thiserror::Error;
@@ -405,17 +405,13 @@ impl Global {
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, data_size)?;
+        let mut staging_buffer = StagingBuffer::new(device, data_size)?;
         let mut pending_writes = device.pending_writes.lock();
         let pending_writes = pending_writes.as_mut().unwrap();
 
-        let staging_buffer = unsafe {
+        let staging_buffer = {
             profiling::scope!("copy");
-            ptr::copy_nonoverlapping(
-                data.as_ptr(),
-                staging_buffer_ptr.as_ptr(),
-                data_size.get() as usize,
-            );
+            staging_buffer.write(data);
             staging_buffer.flush()
         };
 
@@ -448,13 +444,14 @@ impl Global {
 
         let device = &queue.device;
 
-        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, buffer_size)?;
+        let staging_buffer = StagingBuffer::new(device, buffer_size)?;
+        let ptr = unsafe { staging_buffer.ptr() };
 
         let fid = hub.staging_buffers.prepare(id_in);
         let id = fid.assign(Arc::new(staging_buffer));
         resource_log!("Queue::create_staging_buffer {id:?}");
 
-        Ok((id, staging_buffer_ptr))
+        Ok((id, ptr))
     }
 
     pub fn queue_write_staging_buffer<A: HalApi>(
@@ -487,7 +484,7 @@ impl Global {
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let staging_buffer = unsafe { staging_buffer.flush() };
+        let staging_buffer = staging_buffer.flush();
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
@@ -779,42 +776,34 @@ impl Global {
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, stage_size)?;
+        let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
 
         if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
             // Fast path if the data is already being aligned optimally.
-            unsafe {
-                ptr::copy_nonoverlapping(
-                    data.as_ptr().offset(data_layout.offset as isize),
-                    staging_buffer_ptr.as_ptr(),
-                    stage_size.get() as usize,
-                );
-            }
+            staging_buffer.write(&data[data_layout.offset as usize..]);
         } else {
             profiling::scope!("copy chunked");
             // Copy row by row into the optimal alignment.
             let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
             for layer in 0..size.depth_or_array_layers {
                 let rows_offset = layer * block_rows_per_image;
-                for row in 0..height_blocks {
+                for row in rows_offset..rows_offset + height_blocks {
+                    let src_offset = data_layout.offset as u32 + row * bytes_per_row;
+                    let dst_offset = row * stage_bytes_per_row;
                     unsafe {
-                        ptr::copy_nonoverlapping(
-                            data.as_ptr().offset(
-                                data_layout.offset as isize
-                                    + (rows_offset + row) as isize * bytes_per_row as isize,
-                            ),
-                            staging_buffer_ptr.as_ptr().offset(
-                                (rows_offset + row) as isize * stage_bytes_per_row as isize,
-                            ),
+                        staging_buffer.write_with_offset(
+                            data,
+                            src_offset as isize,
+                            dst_offset as isize,
                             copy_bytes_per_row,
-                        );
+                        )
                     }
                 }
             }
         }
 
-        let staging_buffer = unsafe { staging_buffer.flush() };
+        let staging_buffer = staging_buffer.flush();
 
         let regions = (0..array_layer_count).map(|rel_array_layer| {
             let mut texture_base = dst_base.clone();
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 3e3e5f9049..2ec3a3e9eb 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -591,18 +591,15 @@ impl<A: HalApi> Device<A> {
             };
             hal::BufferUses::MAP_WRITE
         } else {
-            let (staging_buffer, staging_buffer_ptr) =
+            let mut staging_buffer =
                 StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
 
             // Zero initialize memory and then mark the buffer as initialized
             // (it's guaranteed that this is the case by the time the buffer is usable)
-            unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, aligned_size as usize) };
+            staging_buffer.write_zeros();
             buffer.initialization_status.write().drain(0..aligned_size);
 
-            *buffer.map_state.lock() = resource::BufferMapState::Init {
-                staging_buffer,
-                ptr: staging_buffer_ptr,
-            };
+            *buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer };
             hal::BufferUses::COPY_DST
         };
 
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 612d68ff61..ced9edbb56 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -256,10 +256,7 @@ pub enum BufferMapAsyncStatus {
 #[derive(Debug)]
 pub(crate) enum BufferMapState<A: HalApi> {
     /// Mapped at creation.
-    Init {
-        staging_buffer: StagingBuffer<A>,
-        ptr: NonNull<u8>,
-    },
+    Init { staging_buffer: StagingBuffer<A> },
     /// Waiting for GPU to be done before mapping
     Waiting(BufferPendingMapping<A>),
     /// Mapped
@@ -651,15 +648,10 @@ impl<A: HalApi> Buffer<A> {
         let raw_buf = self.try_raw(&snatch_guard)?;
         log::debug!("{} map state -> Idle", self.error_ident());
         match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
-            BufferMapState::Init {
-                staging_buffer,
-                ptr,
-            } => {
+            BufferMapState::Init { staging_buffer } => {
                 #[cfg(feature = "trace")]
                 if let Some(ref mut trace) = *device.trace.lock() {
-                    let data = trace.make_binary("bin", unsafe {
-                        std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize)
-                    });
+                    let data = trace.make_binary("bin", staging_buffer.get_data());
                     trace.add(trace::Action::WriteBuffer {
                         id: buffer_id,
                         data,
@@ -667,12 +659,11 @@ impl<A: HalApi> Buffer<A> {
                         queued: true,
                     });
                 }
-                let _ = ptr;
 
                 let mut pending_writes = device.pending_writes.lock();
                 let pending_writes = pending_writes.as_mut().unwrap();
 
-                let staging_buffer = unsafe { staging_buffer.flush() };
+                let staging_buffer = staging_buffer.flush();
 
                 self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
@@ -832,6 +823,11 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
     }
 }
 
+#[cfg(send_sync)]
+unsafe impl<A: HalApi> Send for StagingBuffer<A> {}
+#[cfg(send_sync)]
+unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
+
 /// A temporary buffer, consumed by the command that uses it.
 ///
 /// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It
@@ -857,13 +853,11 @@ pub struct StagingBuffer<A: HalApi> {
     device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
     is_coherent: bool,
+    ptr: NonNull<u8>,
 }
 
 impl<A: HalApi> StagingBuffer<A> {
-    pub(crate) fn new(
-        device: &Arc<Device<A>>,
-        size: wgt::BufferSize,
-    ) -> Result<(Self, NonNull<u8>), DeviceError> {
+    pub(crate) fn new(device: &Arc<Device<A>>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
         use hal::Device;
         profiling::scope!("StagingBuffer::new");
         let stage_desc = hal::BufferDescriptor {
@@ -881,9 +875,55 @@ impl<A: HalApi> StagingBuffer<A> {
             device: device.clone(),
             size,
             is_coherent: mapping.is_coherent,
+            ptr: mapping.ptr,
         };
 
-        Ok((staging_buffer, mapping.ptr))
+        Ok(staging_buffer)
+    }
+
+    /// SAFETY: You must not call any functions of `self`
+    /// until you stopped using the returned pointer.
+    pub(crate) unsafe fn ptr(&self) -> NonNull<u8> {
+        self.ptr
+    }
+
+    #[cfg(feature = "trace")]
+    pub(crate) fn get_data(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size.get() as usize) }
+    }
+
+    pub(crate) fn write_zeros(&mut self) {
+        unsafe { core::ptr::write_bytes(self.ptr.as_ptr(), 0, self.size.get() as usize) };
+    }
+
+    pub(crate) fn write(&mut self, data: &[u8]) {
+        assert!(data.len() >= self.size.get() as usize);
+        // SAFETY: With the assert above, all of `copy_nonoverlapping`'s
+        // requirements are satisfied.
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr(),
+                self.ptr.as_ptr(),
+                self.size.get() as usize,
+            );
+        }
+    }
+
+    /// SAFETY: The offsets and size must be in-bounds.
+    pub(crate) unsafe fn write_with_offset(
+        &mut self,
+        data: &[u8],
+        src_offset: isize,
+        dst_offset: isize,
+        size: usize,
+    ) {
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr().offset(src_offset),
+                self.ptr.as_ptr().offset(dst_offset),
+                size,
+            );
+        }
     }
 
     pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 36dc9b0689..6f470f4ddc 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -714,9 +714,13 @@ pub trait Device: WasmNotSendSync {
     ///   be ordered, so it is meaningful to talk about what must occur
     ///   "between" them.
     ///
+    /// - Zero-sized mappings are not allowed.
+    ///
+    /// - The returned [`BufferMapping::ptr`] must not be used after a call to
+    /// [`Device::unmap_buffer`].
+    ///
     /// [`MAP_READ`]: BufferUses::MAP_READ
     /// [`MAP_WRITE`]: BufferUses::MAP_WRITE
-    //TODO: clarify if zero-sized mapping is allowed
     unsafe fn map_buffer(
         &self,
         buffer: &<Self::A as Api>::Buffer,

From 17fcb194258b05205d21001e8473762141ebda26 Mon Sep 17 00:00:00 2001
From: JMS55 <47158642+JMS55@users.noreply.github.com>
Date: Sat, 13 Jul 2024 19:17:59 -0700
Subject: [PATCH 021/226] [naga, hal] miscellaneous fixes for Atomic64 support
 (#5952)

In `naga::back:hlsl`:

- Generate calls to `Interlocked{op}64` when necessary. not
  `Interlocked{op}`.

- Make atomic operations that do not produce a value emit their
  operands properly.

In the Naga snapshot tests:

- Adapt `atomicOps-int64-min-max.wgsl` to include cases that
  cover non-trivial atomic operation operand emitting.

In `wgpu_hal::vulkan::adapter`:

- When retrieving physical device features, be sure to include
  the `PhysicalDeviceShaderAtomicInt64Features` extending struct
  in the chain whenever the `VK_KHR_shader_atomic_int64` extension
  is available.

- Request both `shader_{buffer,shared}_int64_atomics` in the
  `PhysicalDeviceShaderAtomicInt64Features` extending struct when either of
  `wgpu_types::Features::SHADER_INT64_ATOMIC_{ALL_OPS,MIN_MAX}` is requested.

---------

Co-authored-by: Jim Blandy <jimb@red-bean.com>
---
 naga/src/back/hlsl/writer.rs                  | 14 ++-
 naga/src/front/wgsl/lower/mod.rs              |  4 +
 naga/tests/in/atomicOps-int64-min-max.wgsl    | 14 +--
 .../out/hlsl/atomicOps-int64-min-max.hlsl     | 21 ++--
 naga/tests/out/hlsl/atomicOps-int64.hlsl      | 64 ++++++------
 .../tests/out/msl/atomicOps-int64-min-max.msl | 17 ++--
 .../out/spv/atomicOps-int64-min-max.spvasm    | 99 +++++++++++--------
 .../out/wgsl/atomicOps-int64-min-max.wgsl     | 18 ++--
 wgpu-hal/src/vulkan/adapter.rs                | 21 +++-
 9 files changed, 165 insertions(+), 107 deletions(-)

diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index d40b9b24c2..afa12cccab 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -9,7 +9,7 @@ use super::{
 use crate::{
     back::{self, Baked},
     proc::{self, NameKey},
-    valid, Handle, Module, ScalarKind, ShaderStage, TypeInner,
+    valid, Handle, Module, Scalar, ScalarKind, ShaderStage, TypeInner,
 };
 use std::{fmt, mem};
 
@@ -2013,7 +2013,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                         // ownership of our reusable access chain buffer.
                         let chain = mem::take(&mut self.temp_access_chain);
                         let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
-                        write!(self.out, "{var_name}.Interlocked{fun_str}(")?;
+                        let width = match func_ctx.resolve_type(value, &module.types) {
+                            &TypeInner::Scalar(Scalar { width: 8, .. }) => "64",
+                            _ => "",
+                        };
+                        write!(self.out, "{var_name}.Interlocked{fun_str}{width}(")?;
                         self.write_storage_address(module, &chain, func_ctx)?;
                         self.temp_access_chain = chain;
                     }
@@ -2852,7 +2856,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 let inner = func_ctx.resolve_type(expr, &module.types);
                 let close_paren = match convert {
                     Some(dst_width) => {
-                        let scalar = crate::Scalar {
+                        let scalar = Scalar {
                             kind,
                             width: dst_width,
                         };
@@ -3213,7 +3217,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // as non-32bit types are DXC only.
                     Function::MissingIntOverload(fun_name) => {
                         let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
-                        if let Some(crate::Scalar {
+                        if let Some(Scalar {
                             kind: ScalarKind::Sint,
                             width: 4,
                         }) = scalar_kind
@@ -3231,7 +3235,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // as non-32bit types are DXC only.
                     Function::MissingIntReturnType(fun_name) => {
                         let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
-                        if let Some(crate::Scalar {
+                        if let Some(Scalar {
                             kind: ScalarKind::Sint,
                             width: 4,
                         }) = scalar_kind
diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs
index 7c5954d065..34f8daf506 100644
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -2482,6 +2482,10 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
                     crate::TypeInner::Scalar(crate::Scalar { width: 8, .. })
                 );
         let result = if is_64_bit_min_max && is_statement {
+            let rctx = ctx.runtime_expression_ctx(span)?;
+            rctx.block
+                .extend(rctx.emitter.finish(&rctx.function.expressions));
+            rctx.emitter.start(&rctx.function.expressions);
             None
         } else {
             let ty = ctx.register_type(value)?;
diff --git a/naga/tests/in/atomicOps-int64-min-max.wgsl b/naga/tests/in/atomicOps-int64-min-max.wgsl
index 94e6aa6862..fdedd8b4da 100644
--- a/naga/tests/in/atomicOps-int64-min-max.wgsl
+++ b/naga/tests/in/atomicOps-int64-min-max.wgsl
@@ -9,19 +9,21 @@ var<storage, read_write> storage_atomic_scalar: atomic<u64>;
 var<storage, read_write> storage_atomic_arr: array<atomic<u64>, 2>;
 @group(0) @binding(2)
 var<storage, read_write> storage_struct: Struct;
+@group(0) @binding(3)
+var<uniform> input: u64;
 
 @compute
 @workgroup_size(2)
 fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
-    atomicMax(&storage_atomic_scalar, 1lu);
-    atomicMax(&storage_atomic_arr[1], 1lu);
+    atomicMax(&storage_atomic_scalar, input);
+    atomicMax(&storage_atomic_arr[1], 1 + input);
     atomicMax(&storage_struct.atomic_scalar, 1lu);
-    atomicMax(&storage_struct.atomic_arr[1], 1lu);
+    atomicMax(&storage_struct.atomic_arr[1], u64(id.x));
 
     workgroupBarrier();
 
-    atomicMin(&storage_atomic_scalar, 1lu);
-    atomicMin(&storage_atomic_arr[1], 1lu);
+    atomicMin(&storage_atomic_scalar, input);
+    atomicMin(&storage_atomic_arr[1], 1 + input);
     atomicMin(&storage_struct.atomic_scalar, 1lu);
-    atomicMin(&storage_struct.atomic_arr[1], 1lu);
+    atomicMin(&storage_struct.atomic_arr[1], u64(id.x));
 }
diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
index 8c52e5b3b3..989a52b78b 100644
--- a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
+++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
@@ -13,18 +13,23 @@ struct Struct {
 RWByteAddressBuffer storage_atomic_scalar : register(u0);
 RWByteAddressBuffer storage_atomic_arr : register(u1);
 RWByteAddressBuffer storage_struct : register(u2);
+cbuffer input : register(b3) { uint64_t input; }
 
 [numthreads(2, 1, 1)]
 void cs_main(uint3 id : SV_GroupThreadID)
 {
-    storage_atomic_scalar.InterlockedMax(0, 1uL);
-    storage_atomic_arr.InterlockedMax(8, 1uL);
-    storage_struct.InterlockedMax(0, 1uL);
-    storage_struct.InterlockedMax(8+8, 1uL);
+    uint64_t _e3 = input;
+    storage_atomic_scalar.InterlockedMax64(0, _e3);
+    uint64_t _e7 = input;
+    storage_atomic_arr.InterlockedMax64(8, (1uL + _e7));
+    storage_struct.InterlockedMax64(0, 1uL);
+    storage_struct.InterlockedMax64(8+8, uint64_t(id.x));
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMin(0, 1uL);
-    storage_atomic_arr.InterlockedMin(8, 1uL);
-    storage_struct.InterlockedMin(0, 1uL);
-    storage_struct.InterlockedMin(8+8, 1uL);
+    uint64_t _e20 = input;
+    storage_atomic_scalar.InterlockedMin64(0, _e20);
+    uint64_t _e24 = input;
+    storage_atomic_arr.InterlockedMin64(8, (1uL + _e24));
+    storage_struct.InterlockedMin64(0, 1uL);
+    storage_struct.InterlockedMin64(8+8, uint64_t(id.x));
     return;
 }
diff --git a/naga/tests/out/hlsl/atomicOps-int64.hlsl b/naga/tests/out/hlsl/atomicOps-int64.hlsl
index 973cf07309..ea88f81753 100644
--- a/naga/tests/out/hlsl/atomicOps-int64.hlsl
+++ b/naga/tests/out/hlsl/atomicOps-int64.hlsl
@@ -44,72 +44,72 @@ void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_Group
     uint64_t l6_ = workgroup_struct.atomic_scalar;
     int64_t l7_ = workgroup_struct.atomic_arr[1];
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e51; storage_atomic_scalar.InterlockedAdd(0, 1uL, _e51);
-    int64_t _e55; storage_atomic_arr.InterlockedAdd(8, 1L, _e55);
-    uint64_t _e59; storage_struct.InterlockedAdd(0, 1uL, _e59);
-    int64_t _e64; storage_struct.InterlockedAdd(8+8, 1L, _e64);
+    uint64_t _e51; storage_atomic_scalar.InterlockedAdd64(0, 1uL, _e51);
+    int64_t _e55; storage_atomic_arr.InterlockedAdd64(8, 1L, _e55);
+    uint64_t _e59; storage_struct.InterlockedAdd64(0, 1uL, _e59);
+    int64_t _e64; storage_struct.InterlockedAdd64(8+8, 1L, _e64);
     uint64_t _e67; InterlockedAdd(workgroup_atomic_scalar, 1uL, _e67);
     int64_t _e71; InterlockedAdd(workgroup_atomic_arr[1], 1L, _e71);
     uint64_t _e75; InterlockedAdd(workgroup_struct.atomic_scalar, 1uL, _e75);
     int64_t _e80; InterlockedAdd(workgroup_struct.atomic_arr[1], 1L, _e80);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e83; storage_atomic_scalar.InterlockedAdd(0, -1uL, _e83);
-    int64_t _e87; storage_atomic_arr.InterlockedAdd(8, -1L, _e87);
-    uint64_t _e91; storage_struct.InterlockedAdd(0, -1uL, _e91);
-    int64_t _e96; storage_struct.InterlockedAdd(8+8, -1L, _e96);
+    uint64_t _e83; storage_atomic_scalar.InterlockedAdd64(0, -1uL, _e83);
+    int64_t _e87; storage_atomic_arr.InterlockedAdd64(8, -1L, _e87);
+    uint64_t _e91; storage_struct.InterlockedAdd64(0, -1uL, _e91);
+    int64_t _e96; storage_struct.InterlockedAdd64(8+8, -1L, _e96);
     uint64_t _e99; InterlockedAdd(workgroup_atomic_scalar, -1uL, _e99);
     int64_t _e103; InterlockedAdd(workgroup_atomic_arr[1], -1L, _e103);
     uint64_t _e107; InterlockedAdd(workgroup_struct.atomic_scalar, -1uL, _e107);
     int64_t _e112; InterlockedAdd(workgroup_struct.atomic_arr[1], -1L, _e112);
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMax(0, 1uL);
-    storage_atomic_arr.InterlockedMax(8, 1L);
-    storage_struct.InterlockedMax(0, 1uL);
-    storage_struct.InterlockedMax(8+8, 1L);
+    storage_atomic_scalar.InterlockedMax64(0, 1uL);
+    storage_atomic_arr.InterlockedMax64(8, 1L);
+    storage_struct.InterlockedMax64(0, 1uL);
+    storage_struct.InterlockedMax64(8+8, 1L);
     InterlockedMax(workgroup_atomic_scalar, 1uL);
     InterlockedMax(workgroup_atomic_arr[1], 1L);
     InterlockedMax(workgroup_struct.atomic_scalar, 1uL);
     InterlockedMax(workgroup_struct.atomic_arr[1], 1L);
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMin(0, 1uL);
-    storage_atomic_arr.InterlockedMin(8, 1L);
-    storage_struct.InterlockedMin(0, 1uL);
-    storage_struct.InterlockedMin(8+8, 1L);
+    storage_atomic_scalar.InterlockedMin64(0, 1uL);
+    storage_atomic_arr.InterlockedMin64(8, 1L);
+    storage_struct.InterlockedMin64(0, 1uL);
+    storage_struct.InterlockedMin64(8+8, 1L);
     InterlockedMin(workgroup_atomic_scalar, 1uL);
     InterlockedMin(workgroup_atomic_arr[1], 1L);
     InterlockedMin(workgroup_struct.atomic_scalar, 1uL);
     InterlockedMin(workgroup_struct.atomic_arr[1], 1L);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e163; storage_atomic_scalar.InterlockedAnd(0, 1uL, _e163);
-    int64_t _e167; storage_atomic_arr.InterlockedAnd(8, 1L, _e167);
-    uint64_t _e171; storage_struct.InterlockedAnd(0, 1uL, _e171);
-    int64_t _e176; storage_struct.InterlockedAnd(8+8, 1L, _e176);
+    uint64_t _e163; storage_atomic_scalar.InterlockedAnd64(0, 1uL, _e163);
+    int64_t _e167; storage_atomic_arr.InterlockedAnd64(8, 1L, _e167);
+    uint64_t _e171; storage_struct.InterlockedAnd64(0, 1uL, _e171);
+    int64_t _e176; storage_struct.InterlockedAnd64(8+8, 1L, _e176);
     uint64_t _e179; InterlockedAnd(workgroup_atomic_scalar, 1uL, _e179);
     int64_t _e183; InterlockedAnd(workgroup_atomic_arr[1], 1L, _e183);
     uint64_t _e187; InterlockedAnd(workgroup_struct.atomic_scalar, 1uL, _e187);
     int64_t _e192; InterlockedAnd(workgroup_struct.atomic_arr[1], 1L, _e192);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e195; storage_atomic_scalar.InterlockedOr(0, 1uL, _e195);
-    int64_t _e199; storage_atomic_arr.InterlockedOr(8, 1L, _e199);
-    uint64_t _e203; storage_struct.InterlockedOr(0, 1uL, _e203);
-    int64_t _e208; storage_struct.InterlockedOr(8+8, 1L, _e208);
+    uint64_t _e195; storage_atomic_scalar.InterlockedOr64(0, 1uL, _e195);
+    int64_t _e199; storage_atomic_arr.InterlockedOr64(8, 1L, _e199);
+    uint64_t _e203; storage_struct.InterlockedOr64(0, 1uL, _e203);
+    int64_t _e208; storage_struct.InterlockedOr64(8+8, 1L, _e208);
     uint64_t _e211; InterlockedOr(workgroup_atomic_scalar, 1uL, _e211);
     int64_t _e215; InterlockedOr(workgroup_atomic_arr[1], 1L, _e215);
     uint64_t _e219; InterlockedOr(workgroup_struct.atomic_scalar, 1uL, _e219);
     int64_t _e224; InterlockedOr(workgroup_struct.atomic_arr[1], 1L, _e224);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e227; storage_atomic_scalar.InterlockedXor(0, 1uL, _e227);
-    int64_t _e231; storage_atomic_arr.InterlockedXor(8, 1L, _e231);
-    uint64_t _e235; storage_struct.InterlockedXor(0, 1uL, _e235);
-    int64_t _e240; storage_struct.InterlockedXor(8+8, 1L, _e240);
+    uint64_t _e227; storage_atomic_scalar.InterlockedXor64(0, 1uL, _e227);
+    int64_t _e231; storage_atomic_arr.InterlockedXor64(8, 1L, _e231);
+    uint64_t _e235; storage_struct.InterlockedXor64(0, 1uL, _e235);
+    int64_t _e240; storage_struct.InterlockedXor64(8+8, 1L, _e240);
     uint64_t _e243; InterlockedXor(workgroup_atomic_scalar, 1uL, _e243);
     int64_t _e247; InterlockedXor(workgroup_atomic_arr[1], 1L, _e247);
     uint64_t _e251; InterlockedXor(workgroup_struct.atomic_scalar, 1uL, _e251);
     int64_t _e256; InterlockedXor(workgroup_struct.atomic_arr[1], 1L, _e256);
-    uint64_t _e259; storage_atomic_scalar.InterlockedExchange(0, 1uL, _e259);
-    int64_t _e263; storage_atomic_arr.InterlockedExchange(8, 1L, _e263);
-    uint64_t _e267; storage_struct.InterlockedExchange(0, 1uL, _e267);
-    int64_t _e272; storage_struct.InterlockedExchange(8+8, 1L, _e272);
+    uint64_t _e259; storage_atomic_scalar.InterlockedExchange64(0, 1uL, _e259);
+    int64_t _e263; storage_atomic_arr.InterlockedExchange64(8, 1L, _e263);
+    uint64_t _e267; storage_struct.InterlockedExchange64(0, 1uL, _e267);
+    int64_t _e272; storage_struct.InterlockedExchange64(8+8, 1L, _e272);
     uint64_t _e275; InterlockedExchange(workgroup_atomic_scalar, 1uL, _e275);
     int64_t _e279; InterlockedExchange(workgroup_atomic_arr[1], 1L, _e279);
     uint64_t _e283; InterlockedExchange(workgroup_struct.atomic_scalar, 1uL, _e283);
diff --git a/naga/tests/out/msl/atomicOps-int64-min-max.msl b/naga/tests/out/msl/atomicOps-int64-min-max.msl
index a5dd1c97f0..f69a2a49bd 100644
--- a/naga/tests/out/msl/atomicOps-int64-min-max.msl
+++ b/naga/tests/out/msl/atomicOps-int64-min-max.msl
@@ -19,15 +19,20 @@ kernel void cs_main(
 , device metal::atomic_ulong& storage_atomic_scalar [[user(fake0)]]
 , device type_1& storage_atomic_arr [[user(fake0)]]
 , device Struct& storage_struct [[user(fake0)]]
+, constant ulong& input [[user(fake0)]]
 ) {
-    metal::atomic_max_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    ulong _e3 = input;
+    metal::atomic_max_explicit(&storage_atomic_scalar, _e3, metal::memory_order_relaxed);
+    ulong _e7 = input;
+    metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL + _e7, metal::memory_order_relaxed);
     metal::atomic_max_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], static_cast<ulong>(id.x), metal::memory_order_relaxed);
     metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
-    metal::atomic_min_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    ulong _e20 = input;
+    metal::atomic_min_explicit(&storage_atomic_scalar, _e20, metal::memory_order_relaxed);
+    ulong _e24 = input;
+    metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL + _e24, metal::memory_order_relaxed);
     metal::atomic_min_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], static_cast<ulong>(id.x), metal::memory_order_relaxed);
     return;
 }
diff --git a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
index aa798f546f..2d31197b3b 100644
--- a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
+++ b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 52
+; Bound: 67
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
 OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %22 "cs_main" %19
-OpExecutionMode %22 LocalSize 2 1 1
+OpEntryPoint GLCompute %25 "cs_main" %22
+OpExecutionMode %25 LocalSize 2 1 1
 OpDecorate %4 ArrayStride 8
 OpMemberDecorate %7 0 Offset 0
 OpMemberDecorate %7 1 Offset 8
@@ -25,7 +25,11 @@ OpDecorate %15 DescriptorSet 0
 OpDecorate %15 Binding 2
 OpDecorate %16 Block
 OpMemberDecorate %16 0 Offset 0
-OpDecorate %19 BuiltIn LocalInvocationId
+OpDecorate %18 DescriptorSet 0
+OpDecorate %18 Binding 3
+OpDecorate %19 Block
+OpMemberDecorate %19 0 Offset 0
+OpDecorate %22 BuiltIn LocalInvocationId
 %2 = OpTypeVoid
 %3 = OpTypeInt 64 0
 %6 = OpTypeInt 32 0
@@ -42,41 +46,56 @@ OpDecorate %19 BuiltIn LocalInvocationId
 %16 = OpTypeStruct %7
 %17 = OpTypePointer StorageBuffer %16
 %15 = OpVariable  %17  StorageBuffer
-%20 = OpTypePointer Input %8
-%19 = OpVariable  %20  Input
-%23 = OpTypeFunction %2
-%24 = OpTypePointer StorageBuffer %3
-%25 = OpConstant  %6  0
-%27 = OpTypePointer StorageBuffer %4
-%29 = OpTypePointer StorageBuffer %7
-%31 = OpConstant  %3  1
-%35 = OpTypeInt 32 1
-%34 = OpConstant  %35  1
-%36 = OpConstant  %6  64
-%38 = OpConstant  %6  1
-%44 = OpConstant  %6  264
-%22 = OpFunction  %2  None %23
-%18 = OpLabel
-%21 = OpLoad  %8  %19
-%26 = OpAccessChain  %24  %9 %25
-%28 = OpAccessChain  %27  %12 %25
-%30 = OpAccessChain  %29  %15 %25
-OpBranch %32
-%32 = OpLabel
-%33 = OpAtomicUMax  %3  %26 %34 %36 %31
-%39 = OpAccessChain  %24  %28 %38
-%37 = OpAtomicUMax  %3  %39 %34 %36 %31
-%41 = OpAccessChain  %24  %30 %25
-%40 = OpAtomicUMax  %3  %41 %34 %36 %31
-%43 = OpAccessChain  %24  %30 %38 %38
-%42 = OpAtomicUMax  %3  %43 %34 %36 %31
-OpControlBarrier %5 %5 %44
-%45 = OpAtomicUMin  %3  %26 %34 %36 %31
-%47 = OpAccessChain  %24  %28 %38
-%46 = OpAtomicUMin  %3  %47 %34 %36 %31
-%49 = OpAccessChain  %24  %30 %25
-%48 = OpAtomicUMin  %3  %49 %34 %36 %31
-%51 = OpAccessChain  %24  %30 %38 %38
-%50 = OpAtomicUMin  %3  %51 %34 %36 %31
+%19 = OpTypeStruct %3
+%20 = OpTypePointer Uniform %19
+%18 = OpVariable  %20  Uniform
+%23 = OpTypePointer Input %8
+%22 = OpVariable  %23  Input
+%26 = OpTypeFunction %2
+%27 = OpTypePointer StorageBuffer %3
+%28 = OpConstant  %6  0
+%30 = OpTypePointer StorageBuffer %4
+%32 = OpTypePointer StorageBuffer %7
+%34 = OpTypePointer Uniform %3
+%36 = OpConstant  %3  1
+%41 = OpTypeInt 32 1
+%40 = OpConstant  %41  1
+%42 = OpConstant  %6  64
+%46 = OpConstant  %6  1
+%54 = OpConstant  %6  264
+%25 = OpFunction  %2  None %26
+%21 = OpLabel
+%24 = OpLoad  %8  %22
+%29 = OpAccessChain  %27  %9 %28
+%31 = OpAccessChain  %30  %12 %28
+%33 = OpAccessChain  %32  %15 %28
+%35 = OpAccessChain  %34  %18 %28
+OpBranch %37
+%37 = OpLabel
+%38 = OpLoad  %3  %35
+%39 = OpAtomicUMax  %3  %29 %40 %42 %38
+%43 = OpLoad  %3  %35
+%44 = OpIAdd  %3  %36 %43
+%47 = OpAccessChain  %27  %31 %46
+%45 = OpAtomicUMax  %3  %47 %40 %42 %44
+%49 = OpAccessChain  %27  %33 %28
+%48 = OpAtomicUMax  %3  %49 %40 %42 %36
+%50 = OpCompositeExtract  %6  %24 0
+%51 = OpUConvert  %3  %50
+%53 = OpAccessChain  %27  %33 %46 %46
+%52 = OpAtomicUMax  %3  %53 %40 %42 %51
+OpControlBarrier %5 %5 %54
+%55 = OpLoad  %3  %35
+%56 = OpAtomicUMin  %3  %29 %40 %42 %55
+%57 = OpLoad  %3  %35
+%58 = OpIAdd  %3  %36 %57
+%60 = OpAccessChain  %27  %31 %46
+%59 = OpAtomicUMin  %3  %60 %40 %42 %58
+%62 = OpAccessChain  %27  %33 %28
+%61 = OpAtomicUMin  %3  %62 %40 %42 %36
+%63 = OpCompositeExtract  %6  %24 0
+%64 = OpUConvert  %3  %63
+%66 = OpAccessChain  %27  %33 %46 %46
+%65 = OpAtomicUMin  %3  %66 %40 %42 %64
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
index 37bbb680f5..126758b0b5 100644
--- a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
+++ b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
@@ -9,17 +9,23 @@ var<storage, read_write> storage_atomic_scalar: atomic<u64>;
 var<storage, read_write> storage_atomic_arr: array<atomic<u64>, 2>;
 @group(0) @binding(2) 
 var<storage, read_write> storage_struct: Struct;
+@group(0) @binding(3) 
+var<uniform> input: u64;
 
 @compute @workgroup_size(2, 1, 1) 
 fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
-    atomicMax((&storage_atomic_scalar), 1lu);
-    atomicMax((&storage_atomic_arr[1]), 1lu);
+    let _e3 = input;
+    atomicMax((&storage_atomic_scalar), _e3);
+    let _e7 = input;
+    atomicMax((&storage_atomic_arr[1]), (1lu + _e7));
     atomicMax((&storage_struct.atomic_scalar), 1lu);
-    atomicMax((&storage_struct.atomic_arr[1]), 1lu);
+    atomicMax((&storage_struct.atomic_arr[1]), u64(id.x));
     workgroupBarrier();
-    atomicMin((&storage_atomic_scalar), 1lu);
-    atomicMin((&storage_atomic_arr[1]), 1lu);
+    let _e20 = input;
+    atomicMin((&storage_atomic_scalar), _e20);
+    let _e24 = input;
+    atomicMin((&storage_atomic_arr[1]), (1lu + _e24));
     atomicMin((&storage_struct.atomic_scalar), 1lu);
-    atomicMin((&storage_struct.atomic_arr[1]), 1lu);
+    atomicMin((&storage_struct.atomic_arr[1]), u64(id.x));
     return;
 }
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 81205c6293..1a89aa807a 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -428,12 +428,14 @@ impl PhysicalDeviceFeatures {
             shader_atomic_int64: if device_api_version >= vk::API_VERSION_1_2
                 || enabled_extensions.contains(&khr::shader_atomic_int64::NAME)
             {
+                let needed = requested_features.intersects(
+                    wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
+                        | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
+                );
                 Some(
                     vk::PhysicalDeviceShaderAtomicInt64Features::default()
-                        .shader_buffer_int64_atomics(requested_features.intersects(
-                            wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
-                                | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
-                        )),
+                        .shader_buffer_int64_atomics(needed)
+                        .shader_shared_int64_atomics(needed),
                 )
             } else {
                 None
@@ -1231,6 +1233,17 @@ impl super::InstanceShared {
                 features2 = features2.push_next(next);
             }
 
+            // `VK_KHR_shader_atomic_int64` is promoted to 1.2, but has no
+            // changes, so we can keep using the extension unconditionally.
+            if capabilities.device_api_version >= vk::API_VERSION_1_2
+                || capabilities.supports_extension(khr::shader_atomic_int64::NAME)
+            {
+                let next = features
+                    .shader_atomic_int64
+                    .insert(vk::PhysicalDeviceShaderAtomicInt64Features::default());
+                features2 = features2.push_next(next);
+            }
+
             if capabilities.supports_extension(ext::image_robustness::NAME) {
                 let next = features
                     .image_robustness

From 586215ab2e4b33fffa8a53ac4c77ed00144303c3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 14 Jul 2024 22:13:25 +0200
Subject: [PATCH 022/226] build(deps): bump crate-ci/typos from 1.22.9 to
 1.23.1 (#5922)

* build(deps): bump crate-ci/typos from 1.22.9 to 1.23.1

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.22.9 to 1.23.1.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.22.9...v1.23.1)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* typo fixes

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Andreas Reich <r_andreas2@web.de>
---
 .github/workflows/ci.yml  | 2 +-
 d3d12/src/query.rs        | 2 +-
 tests/src/expectations.rs | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fd0102cf4d..3cbd5858a0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -628,7 +628,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.22.9
+        uses: crate-ci/typos@v1.23.1
 
   check-cts-runner:
     # runtime is normally 2 minutes
diff --git a/d3d12/src/query.rs b/d3d12/src/query.rs
index a9dca262bc..68901de942 100644
--- a/d3d12/src/query.rs
+++ b/d3d12/src/query.rs
@@ -8,7 +8,7 @@ pub enum QueryHeapType {
     Timestamp = d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP,
     PipelineStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS,
     SOStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_SO_STATISTICS,
-    // VideoDecodeStatistcs = d3d12::D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS,
+    // VideoDecodeStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS,
     // CopyQueueTimestamp = d3d12::D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP,
 }
 
diff --git a/tests/src/expectations.rs b/tests/src/expectations.rs
index eb5523905d..a3c90eac0b 100644
--- a/tests/src/expectations.rs
+++ b/tests/src/expectations.rs
@@ -53,7 +53,7 @@ pub struct FailureCase {
     /// [`AdapterInfo::device`]: wgt::AdapterInfo::device
     pub vendor: Option<u32>,
 
-    /// Name of adaper expected to fail, or `None` for any adapter name.
+    /// Name of adapter expected to fail, or `None` for any adapter name.
     ///
     /// If this is `Some(s)` and `s` is a substring of
     /// [`AdapterInfo::name`], then this `FailureCase` applies. If

From d3edbc57a9df91816dfe4915c380a311c19c2106 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 14 Jul 2024 22:13:50 +0200
Subject: [PATCH 023/226] Compute pass benchmark (#5767)

Adds a benchmark for compute pass recording, very similar to what we have for render passes.
---
 CHANGELOG.md                              |   1 +
 benches/Cargo.toml                        |   2 +-
 benches/README.md                         |  15 +
 benches/benches/computepass-bindless.wgsl |  26 +
 benches/benches/computepass.rs            | 574 ++++++++++++++++++++++
 benches/benches/computepass.wgsl          |  26 +
 benches/benches/renderpass.rs             |   5 +
 benches/benches/root.rs                   |   4 +-
 wgpu-core/src/binding_model.rs            |   2 +-
 9 files changed, 652 insertions(+), 3 deletions(-)
 create mode 100644 benches/benches/computepass-bindless.wgsl
 create mode 100644 benches/benches/computepass.rs
 create mode 100644 benches/benches/computepass.wgsl

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0bfe4577a3..c52dbac34c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -183,6 +183,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
+- Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767)
 
 #### Metal
 - Removed the `link` Cargo feature.
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index 65ac0eefdb..1dba81434b 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -43,4 +43,4 @@ pollster.workspace = true
 profiling.workspace = true
 rayon.workspace = true
 tracy-client = { workspace = true, optional = true }
-wgpu.workspace = true
+wgpu = { workspace = true, features = ["wgsl"] }
diff --git a/benches/README.md b/benches/README.md
index 3f20cbba7d..55af5fe18e 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -24,6 +24,21 @@ By default it measures 10k draw calls, with 90k total resources.
 
 Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
 the render pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all textures at once instead of switching
+the bind group for every draw call.
+
+#### `Computepass`
+
+This benchmark measures the performance of recording and submitting a compute pass with a large
+number of dispatches and resources.
+By default it measures 10k dispatch calls, with 60k total resources, emulating an unusually complex and sequential compute workload.
+
+Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
+the compute pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all resources at once instead of switching
+the bind group for every draw call.
+TODO(https://github.com/gfx-rs/wgpu/issues/5766): The bindless version uses only 1k dispatches with 6k resources since it would be too slow for a reasonable benchmarking time otherwise.
+
 
 #### `Resource Creation`
 
diff --git a/benches/benches/computepass-bindless.wgsl b/benches/benches/computepass-bindless.wgsl
new file mode 100644
index 0000000000..402ff94489
--- /dev/null
+++ b/benches/benches/computepass-bindless.wgsl
@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex: binding_array<texture_2d<f32>>;
+
+@group(0) @binding(1)
+// TODO(https://github.com/gfx-rs/wgpu/issues/5765): The extra whitespace between the angle brackets is needed to workaround a parsing bug.
+var images: binding_array<texture_storage_2d<r32float, read_write> >;
+struct BufferElement {
+    element: vec4f,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> buffers: binding_array<BufferElement>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let offset = global_invocation_id.x; // Would be nice to offset this dynamically (it's just 0 always in the current setup)
+    
+    let idx0 = offset * 2 + 0;
+    let idx1 = offset * 2 + 1;
+    
+    let tex = textureLoad(tex[idx0], vec2u(0), 0) + textureLoad(tex[idx0], vec2u(0), 0);
+    let image = textureLoad(images[idx0], vec2u(0)) + textureLoad(images[idx1], vec2u(0));
+    buffers[idx0].element = tex.rrrr;
+    buffers[idx1].element = image.rrrr;
+}
\ No newline at end of file
diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
new file mode 100644
index 0000000000..6ddbf55620
--- /dev/null
+++ b/benches/benches/computepass.rs
@@ -0,0 +1,574 @@
+use std::{
+    num::{NonZeroU32, NonZeroU64},
+    time::{Duration, Instant},
+};
+
+use criterion::{criterion_group, Criterion, Throughput};
+use nanorand::{Rng, WyRand};
+use once_cell::sync::Lazy;
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
+use crate::DeviceState;
+
+#[cfg(not(test))]
+const DISPATCH_COUNT: usize = 10_000;
+#[cfg(test)]
+const DISPATCH_COUNT: usize = 8; // Running with up to 8 threads.
+
+// Currently bindless is _much_ slower than with regularly resources,
+// since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
+// This is in fact so slow that it makes the benchmark unusable when we use the same amount of
+// resources as the regular benchmark.
+// For details see https://github.com/gfx-rs/wgpu/issues/5766
+#[cfg(not(test))]
+const DISPATCH_COUNT_BINDLESS: usize = 1_000;
+#[cfg(test)]
+const DISPATCH_COUNT_BINDLESS: usize = 8; // Running with up to 8 threads.
+
+// Must match the number of textures in the computepass.wgsl shader
+const TEXTURES_PER_DISPATCH: usize = 2;
+const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
+const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
+
+const TEXTURE_COUNT: usize = DISPATCH_COUNT * TEXTURES_PER_DISPATCH;
+const STORAGE_TEXTURE_COUNT: usize = DISPATCH_COUNT * STORAGE_TEXTURES_PER_DISPATCH;
+const STORAGE_BUFFER_COUNT: usize = DISPATCH_COUNT * STORAGE_BUFFERS_PER_DISPATCH;
+
+const BUFFER_SIZE: u64 = 16;
+
+struct ComputepassState {
+    device_state: DeviceState,
+    pipeline: wgpu::ComputePipeline,
+    bind_groups: Vec<wgpu::BindGroup>,
+
+    // Bindless resources
+    bindless_bind_group: Option<wgpu::BindGroup>,
+    bindless_pipeline: Option<wgpu::ComputePipeline>,
+}
+
+impl ComputepassState {
+    /// Create and prepare all the resources needed for the computepass benchmark.
+    fn new() -> Self {
+        let device_state = DeviceState::new();
+
+        let supports_bindless = device_state.device.features().contains(
+            wgpu::Features::BUFFER_BINDING_ARRAY
+                | wgpu::Features::TEXTURE_BINDING_ARRAY
+                | wgpu::Features::STORAGE_RESOURCE_BINDING_ARRAY
+                | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
+        )
+        // TODO: as of writing llvmpipe segfaults the bindless benchmark on ci
+        && device_state.adapter_info.driver != "llvmpipe";
+
+        // Performance gets considerably worse if the resources are shuffled.
+        //
+        // This more closely matches the real-world use case where resources have no
+        // well defined usage order.
+        let mut random = WyRand::new_seed(0x8BADF00D);
+
+        let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
+        for i in 0..TEXTURES_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: i as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            });
+        }
+        for i in 0..STORAGE_TEXTURES_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: (TEXTURES_PER_DISPATCH + i) as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::StorageTexture {
+                    access: wgpu::StorageTextureAccess::ReadWrite,
+                    format: wgpu::TextureFormat::R32Float,
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                },
+                count: None,
+            });
+        }
+        for i in 0..STORAGE_BUFFERS_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + i) as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(BUFFER_SIZE),
+                },
+                count: None,
+            });
+        }
+
+        let bind_group_layout =
+            device_state
+                .device
+                .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                    label: None,
+                    entries: &bind_group_layout_entries,
+                });
+
+        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
+        for i in 0..TEXTURE_COUNT {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("Texture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::Rgba8UnormSrgb,
+                    usage: wgpu::TextureUsages::TEXTURE_BINDING,
+                    view_formats: &[],
+                });
+            texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("Texture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut texture_views);
+        let texture_view_refs: Vec<_> = texture_views.iter().collect();
+
+        let mut storage_texture_views = Vec::with_capacity(STORAGE_TEXTURE_COUNT);
+        for i in 0..TEXTURE_COUNT {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("StorageTexture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::R32Float,
+                    usage: wgpu::TextureUsages::STORAGE_BINDING,
+                    view_formats: &[],
+                });
+            storage_texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("StorageTexture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut storage_texture_views);
+        let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
+
+        let mut storage_buffers = Vec::with_capacity(STORAGE_BUFFER_COUNT);
+        for i in 0..STORAGE_BUFFER_COUNT {
+            storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
+                label: Some(&format!("Buffer {i}")),
+                size: BUFFER_SIZE,
+                usage: wgpu::BufferUsages::STORAGE,
+                mapped_at_creation: false,
+            }));
+        }
+        random.shuffle(&mut storage_buffers);
+        let storage_buffer_bindings: Vec<_> = storage_buffers
+            .iter()
+            .map(|b| b.as_entire_buffer_binding())
+            .collect();
+
+        let mut bind_groups = Vec::with_capacity(DISPATCH_COUNT);
+        for dispatch_idx in 0..DISPATCH_COUNT {
+            let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
+            for tex_idx in 0..TEXTURES_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: tex_idx as u32,
+                    resource: wgpu::BindingResource::TextureView(
+                        &texture_views[dispatch_idx * TEXTURES_PER_DISPATCH + tex_idx],
+                    ),
+                });
+            }
+            for tex_idx in 0..STORAGE_TEXTURES_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: (TEXTURES_PER_DISPATCH + tex_idx) as u32,
+                    resource: wgpu::BindingResource::TextureView(
+                        &storage_texture_views
+                            [dispatch_idx * STORAGE_TEXTURES_PER_DISPATCH + tex_idx],
+                    ),
+                });
+            }
+            for buffer_idx in 0..STORAGE_BUFFERS_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + buffer_idx)
+                        as u32,
+                    resource: wgpu::BindingResource::Buffer(
+                        storage_buffers[dispatch_idx * STORAGE_BUFFERS_PER_DISPATCH + buffer_idx]
+                            .as_entire_buffer_binding(),
+                    ),
+                });
+            }
+
+            bind_groups.push(
+                device_state
+                    .device
+                    .create_bind_group(&wgpu::BindGroupDescriptor {
+                        label: None,
+                        layout: &bind_group_layout,
+                        entries: &entries,
+                    }),
+            );
+        }
+        random.shuffle(&mut bind_groups);
+
+        let sm = device_state
+            .device
+            .create_shader_module(wgpu::include_wgsl!("computepass.wgsl"));
+
+        let pipeline_layout =
+            device_state
+                .device
+                .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                    label: None,
+                    bind_group_layouts: &[&bind_group_layout],
+                    push_constant_ranges: &[],
+                });
+
+        let pipeline =
+            device_state
+                .device
+                .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                    label: Some("Compute Pipeline"),
+                    layout: Some(&pipeline_layout),
+                    module: &sm,
+                    entry_point: "cs_main",
+                    compilation_options: wgpu::PipelineCompilationOptions::default(),
+                    cache: None,
+                });
+
+        let (bindless_bind_group, bindless_pipeline) = if supports_bindless {
+            let bindless_bind_group_layout =
+                device_state
+                    .device
+                    .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                        label: None,
+                        entries: &[
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 0,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::Texture {
+                                    sample_type: wgpu::TextureSampleType::Float {
+                                        filterable: true,
+                                    },
+                                    view_dimension: wgpu::TextureViewDimension::D2,
+                                    multisampled: false,
+                                },
+                                count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                            },
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 1,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::StorageTexture {
+                                    access: wgpu::StorageTextureAccess::ReadWrite,
+                                    format: wgpu::TextureFormat::R32Float,
+                                    view_dimension: wgpu::TextureViewDimension::D2,
+                                },
+                                count: Some(NonZeroU32::new(STORAGE_TEXTURE_COUNT as u32).unwrap()),
+                            },
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 2,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::Buffer {
+                                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                                    has_dynamic_offset: false,
+                                    min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
+                                },
+                                count: Some(NonZeroU32::new(STORAGE_BUFFER_COUNT as u32).unwrap()),
+                            },
+                        ],
+                    });
+
+            let bindless_bind_group =
+                device_state
+                    .device
+                    .create_bind_group(&wgpu::BindGroupDescriptor {
+                        label: None,
+                        layout: &bindless_bind_group_layout,
+                        entries: &[
+                            wgpu::BindGroupEntry {
+                                binding: 0,
+                                resource: wgpu::BindingResource::TextureViewArray(
+                                    &texture_view_refs[..DISPATCH_COUNT_BINDLESS],
+                                ),
+                            },
+                            wgpu::BindGroupEntry {
+                                binding: 1,
+                                resource: wgpu::BindingResource::TextureViewArray(
+                                    &storage_texture_view_refs[..DISPATCH_COUNT_BINDLESS],
+                                ),
+                            },
+                            wgpu::BindGroupEntry {
+                                binding: 2,
+                                resource: wgpu::BindingResource::BufferArray(
+                                    &storage_buffer_bindings[..DISPATCH_COUNT_BINDLESS],
+                                ),
+                            },
+                        ],
+                    });
+
+            let bindless_sm = device_state
+                .device
+                .create_shader_module(wgpu::include_wgsl!("computepass-bindless.wgsl"));
+
+            let bindless_pipeline_layout =
+                device_state
+                    .device
+                    .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                        label: None,
+                        bind_group_layouts: &[&bindless_bind_group_layout],
+                        push_constant_ranges: &[],
+                    });
+
+            let bindless_pipeline =
+                device_state
+                    .device
+                    .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                        label: Some("Compute Pipeline bindless"),
+                        layout: Some(&bindless_pipeline_layout),
+                        module: &bindless_sm,
+                        entry_point: "cs_main",
+                        compilation_options: wgpu::PipelineCompilationOptions::default(),
+                        cache: None,
+                    });
+
+            (Some(bindless_bind_group), Some(bindless_pipeline))
+        } else {
+            (None, None)
+        };
+
+        Self {
+            device_state,
+            pipeline,
+            bind_groups,
+
+            bindless_bind_group,
+            bindless_pipeline,
+        }
+    }
+
+    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+        profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
+
+        let dispatch_per_pass = DISPATCH_COUNT / total_passes;
+
+        let mut encoder = self
+            .device_state
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
+
+        let start_idx = pass_number * dispatch_per_pass;
+        let end_idx = start_idx + dispatch_per_pass;
+        for dispatch_idx in start_idx..end_idx {
+            compute_pass.set_pipeline(&self.pipeline);
+            compute_pass.set_bind_group(0, &self.bind_groups[dispatch_idx], &[]);
+            compute_pass.dispatch_workgroups(1, 1, 1);
+        }
+
+        drop(compute_pass);
+
+        encoder.finish()
+    }
+
+    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+        profiling::scope!("Bindless Computepass");
+
+        let mut encoder = self
+            .device_state
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
+
+        compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
+        compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
+        for _ in 0..DISPATCH_COUNT_BINDLESS {
+            compute_pass.dispatch_workgroups(1, 1, 1);
+        }
+
+        drop(compute_pass);
+
+        encoder.finish()
+    }
+}
+
+fn run_bench(ctx: &mut Criterion) {
+    let state = Lazy::new(ComputepassState::new);
+
+    // Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
+    let mut group = ctx.benchmark_group("Computepass: Single Threaded");
+    group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
+
+    for time_submit in [false, true] {
+        for cpasses in [1, 2, 4, 8] {
+            let dispatch_per_pass = DISPATCH_COUNT / cpasses;
+
+            let label = if time_submit {
+                "Submit Time"
+            } else {
+                "Computepass Time"
+            };
+
+            group.bench_function(
+                &format!("{cpasses} computepasses x {dispatch_per_pass} dispatches ({label})"),
+                |b| {
+                    Lazy::force(&state);
+
+                    b.iter_custom(|iters| {
+                        profiling::scope!("benchmark invocation");
+
+                        let mut duration = Duration::ZERO;
+
+                        for _ in 0..iters {
+                            profiling::scope!("benchmark iteration");
+
+                            let mut start = Instant::now();
+
+                            let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(cpasses);
+                            for i in 0..cpasses {
+                                buffers.push(state.run_subpass(i, cpasses));
+                            }
+
+                            if time_submit {
+                                start = Instant::now();
+                            } else {
+                                duration += start.elapsed();
+                            }
+
+                            state.device_state.queue.submit(buffers);
+
+                            if time_submit {
+                                duration += start.elapsed();
+                            }
+
+                            state.device_state.device.poll(wgpu::Maintain::Wait);
+                        }
+
+                        duration
+                    })
+                },
+            );
+        }
+    }
+    group.finish();
+
+    // Test 10k dispatch calls split up over 2, 4, and 8 threads.
+    let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
+    group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
+
+    for threads in [2, 4, 8] {
+        let dispatch_per_pass = DISPATCH_COUNT / threads;
+        group.bench_function(
+            &format!("{threads} threads x {dispatch_per_pass} dispatch"),
+            |b| {
+                Lazy::force(&state);
+
+                b.iter_custom(|iters| {
+                    profiling::scope!("benchmark invocation");
+
+                    // This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
+                    if state.device_state.adapter_info.name.contains("Paravirtual") {
+                        return Duration::from_secs_f32(1.0);
+                    }
+
+                    let mut duration = Duration::ZERO;
+
+                    for _ in 0..iters {
+                        profiling::scope!("benchmark iteration");
+
+                        let start = Instant::now();
+
+                        let buffers = (0..threads)
+                            .into_par_iter()
+                            .map(|i| state.run_subpass(i, threads))
+                            .collect::<Vec<_>>();
+
+                        duration += start.elapsed();
+
+                        state.device_state.queue.submit(buffers);
+                        state.device_state.device.poll(wgpu::Maintain::Wait);
+                    }
+
+                    duration
+                })
+            },
+        );
+    }
+    group.finish();
+
+    // Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
+    let mut group = ctx.benchmark_group("Computepass: Bindless");
+    group.throughput(Throughput::Elements(DISPATCH_COUNT_BINDLESS as _));
+
+    group.bench_function(&format!("{DISPATCH_COUNT_BINDLESS} dispatch"), |b| {
+        Lazy::force(&state);
+
+        b.iter_custom(|iters| {
+            profiling::scope!("benchmark invocation");
+
+            // This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
+            if state.device_state.adapter_info.name.contains("Paravirtual") {
+                return Duration::from_secs_f32(1.0);
+            }
+
+            // Need bindless to run this benchmark
+            if state.bindless_bind_group.is_none() {
+                return Duration::from_secs_f32(1.0);
+            }
+
+            let mut duration = Duration::ZERO;
+
+            for _ in 0..iters {
+                profiling::scope!("benchmark iteration");
+
+                let start = Instant::now();
+
+                let buffer = state.run_bindless_pass();
+
+                duration += start.elapsed();
+
+                state.device_state.queue.submit([buffer]);
+                state.device_state.device.poll(wgpu::Maintain::Wait);
+            }
+
+            duration
+        })
+    });
+    group.finish();
+
+    ctx.bench_function(
+        &format!(
+            "Computepass: Empty Submit with {} Resources",
+            TEXTURE_COUNT + STORAGE_TEXTURE_COUNT + STORAGE_BUFFER_COUNT
+        ),
+        |b| {
+            Lazy::force(&state);
+
+            b.iter(|| state.device_state.queue.submit([]));
+        },
+    );
+}
+
+criterion_group! {
+    name = computepass;
+    config = Criterion::default().measurement_time(Duration::from_secs(10));
+    targets = run_bench,
+}
diff --git a/benches/benches/computepass.wgsl b/benches/benches/computepass.wgsl
new file mode 100644
index 0000000000..83d7d49785
--- /dev/null
+++ b/benches/benches/computepass.wgsl
@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex_0: texture_2d<f32>;
+
+@group(0) @binding(1)
+var tex_1: texture_2d<f32>;
+
+@group(0) @binding(2)
+var image_0: texture_storage_2d<r32float, read_write>;
+
+@group(0) @binding(3)
+var image_1: texture_storage_2d<r32float, read_write>;
+
+@group(0) @binding(4)
+var<storage, read_write> buffer0 : array<vec4f>;
+
+@group(0) @binding(5)
+var<storage, read_write> buffer1 : array<vec4f>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let tex = textureLoad(tex_0, vec2u(0), 0) + textureLoad(tex_1, vec2u(0), 0);
+    let image = textureLoad(image_0, vec2u(0)) + textureLoad(image_1, vec2u(0));
+    buffer0[0] = tex.rrrr;
+    buffer1[0] = image.rrrr;
+}
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index fcb35c3864..9a204c0f79 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -10,7 +10,12 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
+#[cfg(test)]
+const DRAW_COUNT: usize = 8; // Running with up to 8 threads.
+
+#[cfg(not(test))]
 const DRAW_COUNT: usize = 10_000;
+
 // Must match the number of textures in the renderpass.wgsl shader
 const TEXTURES_PER_DRAW: usize = 7;
 const VERTEX_BUFFERS_PER_DRAW: usize = 2;
diff --git a/benches/benches/root.rs b/benches/benches/root.rs
index 6ef2efabc2..064617783d 100644
--- a/benches/benches/root.rs
+++ b/benches/benches/root.rs
@@ -1,6 +1,7 @@
 use criterion::criterion_main;
 use pollster::block_on;
 
+mod computepass;
 mod renderpass;
 mod resource_creation;
 mod shader;
@@ -45,7 +46,7 @@ impl DeviceState {
                 required_features: adapter.features(),
                 required_limits: adapter.limits(),
                 memory_hints: wgpu::MemoryHints::Performance,
-                label: Some("RenderPass Device"),
+                label: Some("Compute/RenderPass Device"),
             },
             None,
         ))
@@ -61,6 +62,7 @@ impl DeviceState {
 
 criterion_main!(
     renderpass::renderpass,
+    computepass::computepass,
     resource_creation::resource_creation,
     shader::shader
 );
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 729618995d..91952a8f8a 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -66,7 +66,7 @@ pub enum CreateBindGroupLayoutError {
     },
     #[error(transparent)]
     TooManyBindings(BindingTypeMaxCountError),
-    #[error("Binding index {binding} is greater than the maximum index {maximum}")]
+    #[error("Binding index {binding} is greater than the maximum number {maximum}")]
     InvalidBindingIndex { binding: u32, maximum: u32 },
     #[error("Invalid visibility {0:?}")]
     InvalidVisibility(wgt::ShaderStages),

From 12e07eb1bf96f1fccde004717e6b186d8067f925 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 14 Jul 2024 22:14:40 +0200
Subject: [PATCH 024/226] build(deps): bump the patch-updates group across 1
 directory with 23 updates (#5944)

* build(deps): bump the patch-updates group across 1 directory with 23 updates

Bumps the patch-updates group with 18 updates in the / directory:

| Package | From | To |
| --- | --- | --- |
| [document-features](https://github.com/slint-ui/document-features) | `0.2.8` | `0.2.9` |
| [glam](https://github.com/bitshifter/glam-rs) | `0.27.0` | `0.28.0` |
| [serde](https://github.com/serde-rs/serde) | `1.0.203` | `1.0.204` |
| [serde_json](https://github.com/serde-rs/json) | `1.0.119` | `1.0.120` |
| [metal](https://github.com/gfx-rs/metal-rs) | `0.28.0` | `0.29.0` |
| [syn](https://github.com/dtolnay/syn) | `2.0.68` | `2.0.70` |
| [ab_glyph](https://github.com/alexheretic/ab-glyph) | `0.2.27` | `0.2.28` |
| [async-trait](https://github.com/dtolnay/async-trait) | `0.1.80` | `0.1.81` |
| [cc](https://github.com/rust-lang/cc-rs) | `1.0.103` | `1.1.0` |
| [clap](https://github.com/clap-rs/clap) | `4.5.8` | `4.5.9` |
| [deno_unsync](https://github.com/denoland/deno_unsync) | `0.3.5` | `0.3.10` |
| oorandom | `11.1.3` | `11.1.4` |
| [tinyvec](https://github.com/Lokathor/tinyvec) | `1.6.1` | `1.8.0` |
| [unicode-id-start](https://github.com/Boshen/unicode-id-start) | `1.1.2` | `1.2.0` |
| [uuid](https://github.com/uuid-rs/uuid) | `1.9.1` | `1.10.0` |
| [wayland-backend](https://github.com/smithay/wayland-rs) | `0.3.4` | `0.3.5` |
| [windows_i686_gnullvm](https://github.com/microsoft/windows-rs) | `0.52.5` | `0.52.6` |
| [zerocopy](https://github.com/google/zerocopy) | `0.7.34` | `0.7.35` |



Updates `document-features` from 0.2.8 to 0.2.9
- [Release notes](https://github.com/slint-ui/document-features/releases)
- [Changelog](https://github.com/slint-ui/document-features/blob/master/CHANGELOG.md)
- [Commits](https://github.com/slint-ui/document-features/compare/v0.2.8...v0.2.9)

Updates `glam` from 0.27.0 to 0.28.0
- [Changelog](https://github.com/bitshifter/glam-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/bitshifter/glam-rs/compare/0.27.0...0.28.0)

Updates `serde` from 1.0.203 to 1.0.204
- [Release notes](https://github.com/serde-rs/serde/releases)
- [Commits](https://github.com/serde-rs/serde/compare/v1.0.203...v1.0.204)

Updates `serde_json` from 1.0.119 to 1.0.120
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.119...v1.0.120)

Updates `metal` from 0.28.0 to 0.29.0
- [Release notes](https://github.com/gfx-rs/metal-rs/releases)
- [Commits](https://github.com/gfx-rs/metal-rs/commits)

Updates `syn` from 2.0.68 to 2.0.70
- [Release notes](https://github.com/dtolnay/syn/releases)
- [Commits](https://github.com/dtolnay/syn/compare/2.0.68...2.0.70)

Updates `ab_glyph` from 0.2.27 to 0.2.28
- [Release notes](https://github.com/alexheretic/ab-glyph/releases)
- [Commits](https://github.com/alexheretic/ab-glyph/compare/ab-glyph-0.2.27...ab-glyph-0.2.28)

Updates `async-trait` from 0.1.80 to 0.1.81
- [Release notes](https://github.com/dtolnay/async-trait/releases)
- [Commits](https://github.com/dtolnay/async-trait/compare/0.1.80...0.1.81)

Updates `cc` from 1.0.103 to 1.1.0
- [Release notes](https://github.com/rust-lang/cc-rs/releases)
- [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.0.103...cc-v1.1.0)

Updates `clap` from 4.5.8 to 4.5.9
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.8...v4.5.9)

Updates `clap_builder` from 4.5.8 to 4.5.9
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.5.8...v4.5.9)

Updates `deno_unsync` from 0.3.5 to 0.3.10
- [Commits](https://github.com/denoland/deno_unsync/commits)

Updates `oorandom` from 11.1.3 to 11.1.4

Updates `owned_ttf_parser` from 0.21.0 to 0.24.0
- [Release notes](https://github.com/alexheretic/owned-ttf-parser/releases)
- [Changelog](https://github.com/alexheretic/owned-ttf-parser/blob/main/CHANGELOG.md)
- [Commits](https://github.com/alexheretic/owned-ttf-parser/compare/0.21.0...0.24.0)

Updates `serde_derive` from 1.0.203 to 1.0.204
- [Release notes](https://github.com/serde-rs/serde/releases)
- [Commits](https://github.com/serde-rs/serde/compare/v1.0.203...v1.0.204)

Updates `tinyvec` from 1.6.1 to 1.8.0
- [Changelog](https://github.com/Lokathor/tinyvec/blob/main/CHANGELOG.md)
- [Commits](https://github.com/Lokathor/tinyvec/compare/v1.6.1...v1.8.0)

Updates `ttf-parser` from 0.21.1 to 0.24.0
- [Changelog](https://github.com/RazrFalcon/ttf-parser/blob/master/CHANGELOG.md)
- [Commits](https://github.com/RazrFalcon/ttf-parser/compare/v0.21.1...v0.24.0)

Updates `unicode-id-start` from 1.1.2 to 1.2.0
- [Commits](https://github.com/Boshen/unicode-id-start/commits)

Updates `uuid` from 1.9.1 to 1.10.0
- [Release notes](https://github.com/uuid-rs/uuid/releases)
- [Commits](https://github.com/uuid-rs/uuid/compare/1.9.1...1.10.0)

Updates `wayland-backend` from 0.3.4 to 0.3.5
- [Release notes](https://github.com/smithay/wayland-rs/releases)
- [Changelog](https://github.com/Smithay/wayland-rs/blob/master/historical_changelog.md)
- [Commits](https://github.com/smithay/wayland-rs/commits)

Updates `windows_i686_gnullvm` from 0.52.5 to 0.52.6
- [Release notes](https://github.com/microsoft/windows-rs/releases)
- [Commits](https://github.com/microsoft/windows-rs/commits)

Updates `zerocopy` from 0.7.34 to 0.7.35
- [Release notes](https://github.com/google/zerocopy/releases)
- [Changelog](https://github.com/google/zerocopy/blob/main/CHANGELOG.md)
- [Commits](https://github.com/google/zerocopy/commits)

Updates `zerocopy-derive` from 0.7.34 to 0.7.35
- [Release notes](https://github.com/google/zerocopy/releases)
- [Changelog](https://github.com/google/zerocopy/blob/main/CHANGELOG.md)
- [Commits](https://github.com/google/zerocopy/commits)

---
updated-dependencies:
- dependency-name: document-features
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: glam
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: serde
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: serde_json
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: metal
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: syn
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: ab_glyph
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: async-trait
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: cc
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: clap
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_builder
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: deno_unsync
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: oorandom
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: owned_ttf_parser
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: serde_derive
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: tinyvec
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: ttf-parser
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: unicode-id-start
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: uuid
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: wayland-backend
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: windows_i686_gnullvm
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: zerocopy
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: zerocopy-derive
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update encase to resolve glam dependency issue

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Andreas Reich <r_andreas2@web.de>
---
 Cargo.lock            | 161 +++++++++++++++++++++---------------------
 Cargo.toml            |  10 +--
 naga/Cargo.toml       |   2 +-
 wgpu-hal/Cargo.toml   |   4 +-
 wgpu-types/Cargo.toml |   2 +-
 5 files changed, 90 insertions(+), 89 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 245273bb62..86391670e3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "ab_glyph"
-version = "0.2.27"
+version = "0.2.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c3a1cbc201cc13ed06cf875efb781f2249b3677f5c74571b67d817877f9d697"
+checksum = "79faae4620f45232f599d9bc7b290f88247a0834162c4495ab2f02d60004adfb"
 dependencies = [
  "ab_glyph_rasterizer",
  "owned_ttf_parser",
@@ -186,7 +186,7 @@ dependencies = [
  "argh_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -236,13 +236,13 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.80"
+version = "0.1.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
+checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -385,7 +385,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -448,9 +448,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.0.103"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2755ff20a1d93490d26ba33a6f092a38a508398a5320df5d4b3014fcccce9410"
+checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8"
 dependencies = [
  "jobserver",
  "libc",
@@ -513,9 +513,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.8"
+version = "4.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d"
+checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -523,9 +523,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.8"
+version = "4.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708"
+checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942"
 dependencies = [
  "anstream",
  "anstyle",
@@ -542,7 +542,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -887,7 +887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1034,16 +1034,17 @@ dependencies = [
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.68",
+ "syn 2.0.70",
  "thiserror",
 ]
 
 [[package]]
 name = "deno_unsync"
-version = "0.3.5"
+version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6cfb230b6e1965cd2695f7c4082adb278e0b999175a0fbb0852c7e67d26654b1"
+checksum = "c3c8b95582c2023dbb66fccc37421b374026f5915fa507d437cb566904db9a3a"
 dependencies = [
+ "parking_lot",
  "tokio",
 ]
 
@@ -1106,7 +1107,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1119,7 +1120,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustc_version 0.4.0",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1145,9 +1146,9 @@ dependencies = [
 
 [[package]]
 name = "document-features"
-version = "0.2.8"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef5282ad69563b5fc40319526ba27e0e7363d552a896f0297d54f767717f9b95"
+checksum = "4a344f0a78e998787823fe12c8245b9e1fcdb1da9eca625082c2e3d641297fa3"
 dependencies = [
  "litrs",
 ]
@@ -1180,9 +1181,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
 [[package]]
 name = "encase"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a9299a95fa5671ddf29ecc22b00e121843a65cb9ff24911e394b4ae556baf36"
+checksum = "0265fa0e7bcdb058128cdf7597cdacea42e33911713663a04d971a39cad16afa"
 dependencies = [
  "const_panic",
  "encase_derive",
@@ -1192,22 +1193,22 @@ dependencies = [
 
 [[package]]
 name = "encase_derive"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e09decb3beb1fe2db6940f598957b2e1f7df6206a804d438ff6cb2a9cddc10"
+checksum = "e3b6f7502bafc52a60b5582560a2aaee16921eef79a742ae48dd411fe7a9263b"
 dependencies = [
  "encase_derive_impl",
 ]
 
 [[package]]
 name = "encase_derive_impl"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd31dbbd9743684d339f907a87fe212cb7b51d75b9e8e74181fe363199ee9b47"
+checksum = "b36f2ddfca91251bed7f931f24b192e4eaf0a0e0fa70cf81cfb1416a1973620e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1353,7 +1354,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1478,7 +1479,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1567,9 +1568,9 @@ dependencies = [
 
 [[package]]
 name = "glam"
-version = "0.27.0"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e05e7e6723e3455f4818c7b26e855439f7546cf617ef669d1adedb8669e5cb9"
+checksum = "779ae4bf7e8421cf91c0b3b64e7e8b40b862fba4d393f59150042de7c4965a94"
 
 [[package]]
 name = "glow"
@@ -2014,7 +2015,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
 dependencies = [
  "cfg-if",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.5",
 ]
 
 [[package]]
@@ -2142,9 +2143,9 @@ dependencies = [
 
 [[package]]
 name = "metal"
-version = "0.28.0"
+version = "0.29.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5637e166ea14be6063a3f8ba5ccb9a4159df7d8f6d61c02fc3d480b1f90dcfcb"
+checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
 dependencies = [
  "bitflags 2.6.0",
  "block",
@@ -2455,7 +2456,7 @@ dependencies = [
  "proc-macro-crate 3.1.0",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -2530,9 +2531,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
 [[package]]
 name = "oorandom"
-version = "11.1.3"
+version = "11.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
 
 [[package]]
 name = "orbclient"
@@ -2572,9 +2573,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
 
 [[package]]
 name = "owned_ttf_parser"
-version = "0.21.0"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b41438d2fc63c46c74a2203bf5ccd82c41ba04347b2fcf5754f230b167067d5"
+checksum = "490d3a563d3122bf7c911a59b0add9389e5ec0f5f0c3ac6b91ff235a0e6a7f90"
 dependencies = [
  "ttf-parser",
 ]
@@ -2656,7 +2657,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -2795,7 +2796,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f"
 dependencies = [
  "proc-macro-rules-macros",
  "proc-macro2",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -2807,7 +2808,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3145,29 +3146,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.203"
+version = "1.0.204"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
+checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.203"
+version = "1.0.204"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
+checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.119"
+version = "1.0.120"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8eddb61f0697cc3989c5d64b452f5488e2b8a60fd7d5076a3045076ffef8cb0"
+checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
 dependencies = [
  "indexmap",
  "itoa",
@@ -3427,7 +3428,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3443,9 +3444,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.68"
+version = "2.0.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
+checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3478,7 +3479,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3572,9 +3573,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.6.1"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c55115c6fbe2d2bef26eb09ad74bde02d8255476fc0c7b515ef09fbb35742d82"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -3612,7 +3613,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3714,9 +3715,9 @@ dependencies = [
 
 [[package]]
 name = "ttf-parser"
-version = "0.21.1"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c591d83f69777866b9126b24c6dd9a18351f177e49d625920d19f989fd31cf8"
+checksum = "8686b91785aff82828ed725225925b33b4fde44c4bb15876e5f7c832724c420a"
 
 [[package]]
 name = "unic-char-property"
@@ -3767,9 +3768,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-id-start"
-version = "1.1.2"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8f73150333cb58412db36f2aca8f2875b013049705cc77b94ded70a1ab1f5da"
+checksum = "bc3882f69607a2ac8cc4de3ee7993d8f68bb06f2974271195065b3bd07f2edea"
 
 [[package]]
 name = "unicode-ident"
@@ -3837,9 +3838,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.9.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439"
+checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
  "getrandom",
  "serde",
@@ -3921,7 +3922,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
  "wasm-bindgen-shared",
 ]
 
@@ -3955,7 +3956,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3988,21 +3989,21 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
 name = "wayland-backend"
-version = "0.3.4"
+version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34e9e6b6d4a2bb4e7e69433e0b35c7923b95d4dc8503a84d25ec917a4bbfdf07"
+checksum = "269c04f203640d0da2092d1b8d89a2d081714ae3ac2f1b53e99f205740517198"
 dependencies = [
  "cc",
  "downcast-rs",
  "rustix",
  "scoped-tls",
  "smallvec",
- "wayland-sys 0.31.2",
+ "wayland-sys 0.31.3",
 ]
 
 [[package]]
@@ -4173,9 +4174,9 @@ dependencies = [
 
 [[package]]
 name = "wayland-sys"
-version = "0.31.2"
+version = "0.31.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105b1842da6554f91526c14a2a2172897b7f745a805d62af4ce698706be79c12"
+checksum = "4a6754825230fa5b27bafaa28c30b3c9e72c55530581220cef401fa422c0fae7"
 dependencies = [
  "dlib",
  "log",
@@ -4368,7 +4369,7 @@ version = "0.20.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -4669,9 +4670,9 @@ checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
 
 [[package]]
 name = "windows_i686_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -4927,20 +4928,20 @@ checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193"
 
 [[package]]
 name = "zerocopy"
-version = "0.7.34"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.34"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index ce5ebcce1d..a9bb351a2b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,14 +81,14 @@ cfg-if = "1"
 criterion = "0.5"
 codespan-reporting = "0.11"
 ctor = "0.2"
-document-features = "0.2.8"
-encase = "0.8"
+document-features = "0.2.9"
+encase = "0.9"
 env_logger = "0.11"
 fern = "0.6"
 flume = "0.11"
 futures-lite = "2"
 getrandom = "0.2"
-glam = "0.27"
+glam = "0.28"
 heck = "0.5.0"
 image = { version = "0.24", default-features = false, features = ["png"] }
 itertools = { version = "0.10.5" }
@@ -119,7 +119,7 @@ renderdoc-sys = "1.1.0"
 ron = "0.8"
 rustc-hash = "1.1.0"
 serde = "1"
-serde_json = "1.0.119"
+serde_json = "1.0.120"
 smallvec = "1"
 static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }
@@ -136,7 +136,7 @@ winit = { version = "0.29", features = ["android-native-activity"] }
 # Metal dependencies
 block = "0.1"
 core-graphics-types = "0.1"
-metal = { version = "0.28.0" }
+metal = { version = "0.29.0" }
 objc = "0.2.5"
 
 # Vulkan dependencies
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 8478cc6f7b..f9e7f766fa 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -71,7 +71,7 @@ indexmap = { version = "2", features = ["std"] }
 log = "0.4"
 spirv = { version = "0.3", optional = true }
 thiserror = "1.0.61"
-serde = { version = "1.0.203", features = ["derive"], optional = true }
+serde = { version = "1.0.204", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }
 hexf-parse = { version = "0.2.1", optional = true }
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index 5b1fcb7261..b079fef630 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -169,7 +169,7 @@ d3d12 = { path = "../d3d12/", version = "0.20.0", optional = true, features = [
 # backend: Metal
 block = { version = "0.1", optional = true }
 
-metal = { version = "0.28.0" }
+metal = { version = "0.29.0" }
 objc = "0.2.5"
 core-graphics-types = "0.1"
 
@@ -206,7 +206,7 @@ features = ["wgsl-in"]
 [dev-dependencies]
 cfg-if = "1"
 env_logger = "0.11"
-glam = "0.27.0" # for ray-traced-triangle example
+glam.workspace = true # for ray-traced-triangle example
 winit = { version = "0.29", features = [
     "android-native-activity",
 ] } # for "halmark" example
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index b61ffb6328..3c2e6e68bd 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -48,4 +48,4 @@ web-sys = { version = "0.3.69", features = [
 
 [dev-dependencies]
 serde = { version = "1", features = ["derive"] }
-serde_json = "1.0.119"
+serde_json = "1.0.120"

From 05c0656fa4234ce43ddbc60bdfa04f03a4b5861e Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Sun, 14 Jul 2024 13:03:36 -0700
Subject: [PATCH 025/226] [core] Correct docs for `LifetimeTracker` and its
 `mapped` field.

---
 wgpu-core/src/device/life.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 4ef57e4d16..118e1498b4 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -95,15 +95,14 @@ pub enum WaitIdleError {
 ///         submission index.
 ///
 ///     3)  `handle_mapping` drains `self.ready_to_map` and actually maps the
-///         buffers, collecting a list of notification closures to call. But any
-///         buffers that were dropped by the user get moved to
-///         `self.free_resources`.
+///         buffers, collecting a list of notification closures to call.
 ///
 /// Only calling `Global::buffer_map_async` clones a new `Arc` for the
 /// buffer. This new `Arc` is only dropped by `handle_mapping`.
 pub(crate) struct LifetimeTracker<A: HalApi> {
-    /// Resources that the user has requested be mapped, but which are used by
-    /// queue submissions still in flight.
+    /// Buffers for which a call to [`Buffer::map_async`] has succeeded, but
+    /// which haven't been examined by `triage_mapped` yet to decide when they
+    /// can be mapped.
     mapped: Vec<Arc<Buffer<A>>>,
 
     /// Resources used by queue submissions still in flight. One entry per

From d02e2949b22edbc4b4371de6a8f577cd4fc91ef2 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Sat, 13 Jul 2024 20:33:36 -0700
Subject: [PATCH 026/226] [core] Correctly check mipmap-filtering samplers
 against the layout.

Ensure that samplers using non-`Nearest` mipmap filtering are
considered "filtering samplers" when deciding bind group layout
compatibility.

Add tests for layout `NonFiltering` validation.

Fixes #5948.
---
 tests/tests/bind_groups.rs       | 116 +++++++++++++++++++++++++++++++
 tests/tests/root.rs              |   1 +
 wgpu-core/src/device/resource.rs |   3 +-
 3 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 tests/tests/bind_groups.rs

diff --git a/tests/tests/bind_groups.rs b/tests/tests/bind_groups.rs
new file mode 100644
index 0000000000..fab1c065f0
--- /dev/null
+++ b/tests/tests/bind_groups.rs
@@ -0,0 +1,116 @@
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// Test `descriptor` against a bind group layout that requires non-filtering sampler.
+fn try_sampler_nonfiltering_layout(
+    ctx: TestingContext,
+    descriptor: &wgpu::SamplerDescriptor,
+    good: bool,
+) {
+    let label = descriptor.label;
+    let bind_group_layout = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+                count: None,
+            }],
+        });
+
+    let sampler = ctx.device.create_sampler(descriptor);
+
+    let create_bind_group = || {
+        ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label,
+            layout: &bind_group_layout,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: wgpu::BindingResource::Sampler(&sampler),
+            }],
+        });
+    };
+
+    if good {
+        wgpu_test::valid(&ctx.device, create_bind_group);
+    } else {
+        wgpu_test::fail(
+            &ctx.device,
+            create_bind_group,
+            Some("but given a sampler with filtering"),
+        );
+    }
+}
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_NONFILTERING_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_nonfiltering_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                true,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MIN_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_min_sampler"),
+                    min_filter: wgpu::FilterMode::Linear,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MAG_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_mag_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Linear,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MIPMAP_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_mipmap_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Linear,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 088d663a12..6ceb3818df 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -8,6 +8,7 @@ mod regression {
 
 mod bgra8unorm_storage;
 mod bind_group_layout_dedup;
+mod bind_groups;
 mod buffer;
 mod buffer_copy;
 mod buffer_usages;
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 2ec3a3e9eb..c364711f5d 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1380,7 +1380,8 @@ impl<A: HalApi> Device<A> {
             tracking_data: TrackingData::new(self.tracker_indices.samplers.clone()),
             comparison: desc.compare.is_some(),
             filtering: desc.min_filter == wgt::FilterMode::Linear
-                || desc.mag_filter == wgt::FilterMode::Linear,
+                || desc.mag_filter == wgt::FilterMode::Linear
+                || desc.mipmap_filter == wgt::FilterMode::Linear,
         };
 
         let sampler = Arc::new(sampler);

From 1b4e8ada630a2e54e51c981d2b4b47bc631ce911 Mon Sep 17 00:00:00 2001
From: Dzmitry Malyshau <kvark@fastmail.com>
Date: Sun, 14 Jul 2024 22:16:50 -0700
Subject: [PATCH 027/226] spv-out: fix acceleration structure in a function
 argument

---
 CHANGELOG.md                        |   1 +
 naga/src/back/mod.rs                |   4 +-
 naga/tests/in/ray-query.wgsl        |  25 ++-
 naga/tests/out/msl/ray-query.msl    |  59 +++---
 naga/tests/out/spv/ray-query.spvasm | 266 +++++++++++++++-------------
 5 files changed, 193 insertions(+), 162 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c52dbac34c..9ce370d808 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -160,6 +160,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Implement `WGSL`'s `unpack4xI8`,`unpack4xU8`,`pack4xI8` and `pack4xU8`. By @VlaDexa in [#5424](https://github.com/gfx-rs/wgpu/pull/5424)
 - Began work adding support for atomics to the SPIR-V frontend. Tracking issue is [here](https://github.com/gfx-rs/wgpu/issues/4489). By @schell in [#5702](https://github.com/gfx-rs/wgpu/pull/5702).
 - In hlsl-out, allow passing information about the fragment entry point to omit vertex outputs that are not in the fragment inputs. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
+- In spv-out, allow passing `acceleration_structure` as a function argument. By @kvark in [#5961](https://github.com/gfx-rs/wgpu/pull/5961)
 
   ```diff
   let writer: naga::back::hlsl::Writer = /* ... */;
diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs
index 364d0f2506..cd9496e3ff 100644
--- a/naga/src/back/mod.rs
+++ b/naga/src/back/mod.rs
@@ -254,7 +254,9 @@ impl crate::TypeInner {
     /// Returns true if this is a handle to a type rather than the type directly.
     pub const fn is_handle(&self) -> bool {
         match *self {
-            crate::TypeInner::Image { .. } | crate::TypeInner::Sampler { .. } => true,
+            crate::TypeInner::Image { .. }
+            | crate::TypeInner::Sampler { .. }
+            | crate::TypeInner::AccelerationStructure { .. } => true,
             _ => false,
         }
     }
diff --git a/naga/tests/in/ray-query.wgsl b/naga/tests/in/ray-query.wgsl
index 4826547ded..0af8c7c95f 100644
--- a/naga/tests/in/ray-query.wgsl
+++ b/naga/tests/in/ray-query.wgsl
@@ -1,6 +1,3 @@
-@group(0) @binding(0)
-var acc_struct: acceleration_structure;
-
 /*
 let RAY_FLAG_NONE = 0x00u;
 let RAY_FLAG_OPAQUE = 0x01u;
@@ -43,6 +40,18 @@ struct RayIntersection {
 }
 */
 
+fn query_loop(pos: vec3<f32>, dir: vec3<f32>, acs: acceleration_structure) -> RayIntersection {
+    var rq: ray_query;
+    rayQueryInitialize(&rq, acs, RayDesc(RAY_FLAG_TERMINATE_ON_FIRST_HIT, 0xFFu, 0.1, 100.0, pos, dir));
+
+    while (rayQueryProceed(&rq)) {}
+
+    return rayQueryGetCommittedIntersection(&rq);
+}
+
+@group(0) @binding(0)
+var acc_struct: acceleration_structure;
+
 struct Output {
     visible: u32,
     normal: vec3<f32>,
@@ -58,16 +67,14 @@ fn get_torus_normal(world_point: vec3<f32>, intersection: RayIntersection) -> ve
     return normalize(world_point - world_point_on_guiding_line);
 }
 
+
+
 @compute @workgroup_size(1)
 fn main() {
-    var rq: ray_query;
-
+    let pos = vec3<f32>(0.0);
     let dir = vec3<f32>(0.0, 1.0, 0.0);
-    rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_TERMINATE_ON_FIRST_HIT, 0xFFu, 0.1, 100.0, vec3<f32>(0.0), dir));
-
-    while (rayQueryProceed(&rq)) {}
+    let intersection = query_loop(pos, dir, acc_struct);
 
-    let intersection = rayQueryGetCommittedIntersection(&rq);
     output.visible = u32(intersection.kind == RAY_QUERY_INTERSECTION_NONE);
     output.normal = get_torus_normal(dir * intersection.t, intersection);
 }
diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl
index 17b856427f..fbdaef5484 100644
--- a/naga/tests/out/msl/ray-query.msl
+++ b/naga/tests/out/msl/ray-query.msl
@@ -13,11 +13,6 @@ constexpr metal::uint _map_intersection_type(const metal::raytracing::intersecti
         ty==metal::raytracing::intersection_type::bounding_box ? 4 : 0;
 }
 
-struct Output {
-    uint visible;
-    char _pad1[12];
-    metal::float3 normal;
-};
 struct RayIntersection {
     uint kind;
     float t;
@@ -40,6 +35,34 @@ struct RayDesc {
     metal::float3 origin;
     metal::float3 dir;
 };
+struct Output {
+    uint visible;
+    char _pad1[12];
+    metal::float3 normal;
+};
+
+RayIntersection query_loop(
+    metal::float3 pos,
+    metal::float3 dir,
+    metal::raytracing::instance_acceleration_structure acs
+) {
+    _RayQuery rq = {};
+    RayDesc _e8 = RayDesc {4u, 255u, 0.1, 100.0, pos, dir};
+    rq.intersector.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+    rq.intersector.set_opacity_cull_mode((_e8.flags & 64) != 0 ? metal::raytracing::opacity_cull_mode::opaque : (_e8.flags & 128) != 0 ? metal::raytracing::opacity_cull_mode::non_opaque : metal::raytracing::opacity_cull_mode::none);
+    rq.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none);
+    rq.intersector.accept_any_intersection((_e8.flags & 4) != 0);
+    rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask);    rq.ready = true;
+    while(true) {
+        bool _e9 = rq.ready;
+        rq.ready = false;
+        if (_e9) {
+        } else {
+            break;
+        }
+    }
+    return RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform};
+}
 
 metal::float3 get_torus_normal(
     metal::float3 world_point,
@@ -55,25 +78,11 @@ kernel void main_(
   metal::raytracing::instance_acceleration_structure acc_struct [[user(fake0)]]
 , device Output& output [[user(fake0)]]
 ) {
-    _RayQuery rq = {};
-    metal::float3 dir = metal::float3(0.0, 1.0, 0.0);
-    RayDesc _e12 = RayDesc {4u, 255u, 0.1, 100.0, metal::float3(0.0), dir};
-    rq.intersector.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-    rq.intersector.set_opacity_cull_mode((_e12.flags & 64) != 0 ? metal::raytracing::opacity_cull_mode::opaque : (_e12.flags & 128) != 0 ? metal::raytracing::opacity_cull_mode::non_opaque : metal::raytracing::opacity_cull_mode::none);
-    rq.intersector.force_opacity((_e12.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e12.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none);
-    rq.intersector.accept_any_intersection((_e12.flags & 4) != 0);
-    rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e12.origin, _e12.dir, _e12.tmin, _e12.tmax), acc_struct, _e12.cull_mask);    rq.ready = true;
-    while(true) {
-        bool _e13 = rq.ready;
-        rq.ready = false;
-        if (_e13) {
-        } else {
-            break;
-        }
-    }
-    RayIntersection intersection_1 = RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform};
-    output.visible = static_cast<uint>(intersection_1.kind == 0u);
-    metal::float3 _e25 = get_torus_normal(dir * intersection_1.t, intersection_1);
-    output.normal = _e25;
+    metal::float3 pos_1 = metal::float3(0.0);
+    metal::float3 dir_1 = metal::float3(0.0, 1.0, 0.0);
+    RayIntersection _e7 = query_loop(pos_1, dir_1, acc_struct);
+    output.visible = static_cast<uint>(_e7.kind == 0u);
+    metal::float3 _e18 = get_torus_normal(dir_1 * _e7.t, _e7);
+    output.normal = _e18;
     return;
 }
diff --git a/naga/tests/out/spv/ray-query.spvasm b/naga/tests/out/spv/ray-query.spvasm
index 23d5dd1baa..328c820fea 100644
--- a/naga/tests/out/spv/ray-query.spvasm
+++ b/naga/tests/out/spv/ray-query.spvasm
@@ -1,37 +1,37 @@
 ; SPIR-V
 ; Version: 1.4
 ; Generator: rspirv
-; Bound: 95
+; Bound: 104
 OpCapability Shader
 OpCapability RayQueryKHR
 OpExtension "SPV_KHR_ray_query"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %41 "main" %15 %17
-OpExecutionMode %41 LocalSize 1 1 1
-OpMemberDecorate %7 0 Offset 0
-OpMemberDecorate %7 1 Offset 16
-OpMemberDecorate %11 0 Offset 0
-OpMemberDecorate %11 1 Offset 4
-OpMemberDecorate %11 2 Offset 8
-OpMemberDecorate %11 3 Offset 12
-OpMemberDecorate %11 4 Offset 16
-OpMemberDecorate %11 5 Offset 20
-OpMemberDecorate %11 6 Offset 24
-OpMemberDecorate %11 7 Offset 28
-OpMemberDecorate %11 8 Offset 36
-OpMemberDecorate %11 9 Offset 48
-OpMemberDecorate %11 9 ColMajor
-OpMemberDecorate %11 9 MatrixStride 16
-OpMemberDecorate %11 10 Offset 112
-OpMemberDecorate %11 10 ColMajor
-OpMemberDecorate %11 10 MatrixStride 16
-OpMemberDecorate %14 0 Offset 0
-OpMemberDecorate %14 1 Offset 4
-OpMemberDecorate %14 2 Offset 8
-OpMemberDecorate %14 3 Offset 12
-OpMemberDecorate %14 4 Offset 16
-OpMemberDecorate %14 5 Offset 32
+OpEntryPoint GLCompute %84 "main" %15 %17
+OpExecutionMode %84 LocalSize 1 1 1
+OpMemberDecorate %10 0 Offset 0
+OpMemberDecorate %10 1 Offset 4
+OpMemberDecorate %10 2 Offset 8
+OpMemberDecorate %10 3 Offset 12
+OpMemberDecorate %10 4 Offset 16
+OpMemberDecorate %10 5 Offset 20
+OpMemberDecorate %10 6 Offset 24
+OpMemberDecorate %10 7 Offset 28
+OpMemberDecorate %10 8 Offset 36
+OpMemberDecorate %10 9 Offset 48
+OpMemberDecorate %10 9 ColMajor
+OpMemberDecorate %10 9 MatrixStride 16
+OpMemberDecorate %10 10 Offset 112
+OpMemberDecorate %10 10 ColMajor
+OpMemberDecorate %10 10 MatrixStride 16
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 12
+OpMemberDecorate %12 4 Offset 16
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %13 0 Offset 0
+OpMemberDecorate %13 1 Offset 16
 OpDecorate %15 DescriptorSet 0
 OpDecorate %15 Binding 0
 OpDecorate %17 DescriptorSet 0
@@ -39,114 +39,126 @@ OpDecorate %17 Binding 1
 OpDecorate %18 Block
 OpMemberDecorate %18 0 Offset 0
 %2 = OpTypeVoid
-%3 = OpTypeAccelerationStructureNV
-%4 = OpTypeInt 32 0
-%6 = OpTypeFloat 32
-%5 = OpTypeVector %6 3
-%7 = OpTypeStruct %4 %5
-%8 = OpTypeVector %6 2
-%9 = OpTypeBool
-%10 = OpTypeMatrix %5 4
-%11 = OpTypeStruct %4 %6 %4 %4 %4 %4 %4 %8 %9 %10 %10
-%12 = OpTypeVector %6 4
-%13 = OpTypeRayQueryKHR
-%14 = OpTypeStruct %4 %4 %6 %6 %5 %5
-%16 = OpTypePointer UniformConstant %3
+%4 = OpTypeFloat 32
+%3 = OpTypeVector %4 3
+%5 = OpTypeAccelerationStructureNV
+%6 = OpTypeInt 32 0
+%7 = OpTypeVector %4 2
+%8 = OpTypeBool
+%9 = OpTypeMatrix %3 4
+%10 = OpTypeStruct %6 %4 %6 %6 %6 %6 %6 %7 %8 %9 %9
+%11 = OpTypeRayQueryKHR
+%12 = OpTypeStruct %6 %6 %4 %4 %3 %3
+%13 = OpTypeStruct %6 %3
+%14 = OpTypeVector %4 4
+%16 = OpTypePointer UniformConstant %5
 %15 = OpVariable  %16  UniformConstant
-%18 = OpTypeStruct %7
+%18 = OpTypeStruct %13
 %19 = OpTypePointer StorageBuffer %18
 %17 = OpVariable  %19  StorageBuffer
-%24 = OpTypeFunction %5 %5 %11
-%25 = OpConstant  %6  1.0
-%26 = OpConstant  %6  2.4
-%27 = OpConstant  %6  0.0
-%42 = OpTypeFunction %2
-%44 = OpTypePointer StorageBuffer %7
-%45 = OpConstant  %4  0
-%47 = OpConstantComposite  %5  %27 %25 %27
-%48 = OpConstant  %4  4
-%49 = OpConstant  %4  255
-%50 = OpConstantComposite  %5  %27 %27 %27
-%51 = OpConstant  %6  0.1
-%52 = OpConstant  %6  100.0
-%53 = OpConstantComposite  %14  %48 %49 %51 %52 %50 %47
-%55 = OpTypePointer Function %13
-%72 = OpConstant  %4  1
-%85 = OpTypePointer StorageBuffer %4
-%90 = OpTypePointer StorageBuffer %5
-%23 = OpFunction  %5  None %24
-%21 = OpFunctionParameter  %5
-%22 = OpFunctionParameter  %11
+%26 = OpTypeFunction %10 %3 %3 %16
+%27 = OpConstant  %6  4
+%28 = OpConstant  %6  255
+%29 = OpConstant  %4  0.1
+%30 = OpConstant  %4  100.0
+%32 = OpTypePointer Function %11
+%50 = OpConstant  %6  1
+%67 = OpTypeFunction %3 %3 %10
+%68 = OpConstant  %4  1.0
+%69 = OpConstant  %4  2.4
+%70 = OpConstant  %4  0.0
+%85 = OpTypeFunction %2
+%87 = OpTypePointer StorageBuffer %13
+%88 = OpConstant  %6  0
+%90 = OpConstantComposite  %3  %70 %70 %70
+%91 = OpConstantComposite  %3  %70 %68 %70
+%94 = OpTypePointer StorageBuffer %6
+%99 = OpTypePointer StorageBuffer %3
+%25 = OpFunction  %10  None %26
+%21 = OpFunctionParameter  %3
+%22 = OpFunctionParameter  %3
+%23 = OpFunctionParameter  %16
 %20 = OpLabel
-OpBranch %28
-%28 = OpLabel
-%29 = OpCompositeExtract  %10  %22 10
-%30 = OpCompositeConstruct  %12  %21 %25
-%31 = OpMatrixTimesVector  %5  %29 %30
-%32 = OpVectorShuffle  %8  %31 %31 0 1
-%33 = OpExtInst  %8  %1 Normalize %32
-%34 = OpVectorTimesScalar  %8  %33 %26
-%35 = OpCompositeExtract  %10  %22 9
-%36 = OpCompositeConstruct  %12  %34 %27 %25
-%37 = OpMatrixTimesVector  %5  %35 %36
-%38 = OpFSub  %5  %21 %37
-%39 = OpExtInst  %5  %1 Normalize %38
-OpReturnValue %39
+%31 = OpVariable  %32  Function
+%24 = OpLoad  %5  %23
+OpBranch %33
+%33 = OpLabel
+%34 = OpCompositeConstruct  %12  %27 %28 %29 %30 %21 %22
+%35 = OpCompositeExtract  %6  %34 0
+%36 = OpCompositeExtract  %6  %34 1
+%37 = OpCompositeExtract  %4  %34 2
+%38 = OpCompositeExtract  %4  %34 3
+%39 = OpCompositeExtract  %3  %34 4
+%40 = OpCompositeExtract  %3  %34 5
+OpRayQueryInitializeKHR %31 %24 %35 %36 %39 %37 %40 %38
+OpBranch %41
+%41 = OpLabel
+OpLoopMerge %42 %44 None
+OpBranch %43
+%43 = OpLabel
+%45 = OpRayQueryProceedKHR  %8  %31
+OpSelectionMerge %46 None
+OpBranchConditional %45 %46 %47
+%47 = OpLabel
+OpBranch %42
+%46 = OpLabel
+OpBranch %48
+%48 = OpLabel
+OpBranch %49
+%49 = OpLabel
+OpBranch %44
+%44 = OpLabel
+OpBranch %41
+%42 = OpLabel
+%51 = OpRayQueryGetIntersectionTypeKHR  %6  %31 %50
+%52 = OpRayQueryGetIntersectionInstanceCustomIndexKHR  %6  %31 %50
+%53 = OpRayQueryGetIntersectionInstanceIdKHR  %6  %31 %50
+%54 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR  %6  %31 %50
+%55 = OpRayQueryGetIntersectionGeometryIndexKHR  %6  %31 %50
+%56 = OpRayQueryGetIntersectionPrimitiveIndexKHR  %6  %31 %50
+%57 = OpRayQueryGetIntersectionTKHR  %4  %31 %50
+%58 = OpRayQueryGetIntersectionBarycentricsKHR  %7  %31 %50
+%59 = OpRayQueryGetIntersectionFrontFaceKHR  %8  %31 %50
+%60 = OpRayQueryGetIntersectionObjectToWorldKHR  %9  %31 %50
+%61 = OpRayQueryGetIntersectionWorldToObjectKHR  %9  %31 %50
+%62 = OpCompositeConstruct  %10  %51 %57 %52 %53 %54 %55 %56 %58 %59 %60 %61
+OpReturnValue %62
 OpFunctionEnd
-%41 = OpFunction  %2  None %42
-%40 = OpLabel
-%54 = OpVariable  %55  Function
-%43 = OpLoad  %3  %15
-%46 = OpAccessChain  %44  %17 %45
-OpBranch %56
-%56 = OpLabel
-%57 = OpCompositeExtract  %4  %53 0
-%58 = OpCompositeExtract  %4  %53 1
-%59 = OpCompositeExtract  %6  %53 2
-%60 = OpCompositeExtract  %6  %53 3
-%61 = OpCompositeExtract  %5  %53 4
-%62 = OpCompositeExtract  %5  %53 5
-OpRayQueryInitializeKHR %54 %43 %57 %58 %61 %59 %62 %60
-OpBranch %63
+%66 = OpFunction  %3  None %67
+%64 = OpFunctionParameter  %3
+%65 = OpFunctionParameter  %10
 %63 = OpLabel
-OpLoopMerge %64 %66 None
-OpBranch %65
-%65 = OpLabel
-%67 = OpRayQueryProceedKHR  %9  %54
-OpSelectionMerge %68 None
-OpBranchConditional %67 %68 %69
-%69 = OpLabel
-OpBranch %64
-%68 = OpLabel
-OpBranch %70
-%70 = OpLabel
 OpBranch %71
 %71 = OpLabel
-OpBranch %66
-%66 = OpLabel
-OpBranch %63
-%64 = OpLabel
-%73 = OpRayQueryGetIntersectionTypeKHR  %4  %54 %72
-%74 = OpRayQueryGetIntersectionInstanceCustomIndexKHR  %4  %54 %72
-%75 = OpRayQueryGetIntersectionInstanceIdKHR  %4  %54 %72
-%76 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR  %4  %54 %72
-%77 = OpRayQueryGetIntersectionGeometryIndexKHR  %4  %54 %72
-%78 = OpRayQueryGetIntersectionPrimitiveIndexKHR  %4  %54 %72
-%79 = OpRayQueryGetIntersectionTKHR  %6  %54 %72
-%80 = OpRayQueryGetIntersectionBarycentricsKHR  %8  %54 %72
-%81 = OpRayQueryGetIntersectionFrontFaceKHR  %9  %54 %72
-%82 = OpRayQueryGetIntersectionObjectToWorldKHR  %10  %54 %72
-%83 = OpRayQueryGetIntersectionWorldToObjectKHR  %10  %54 %72
-%84 = OpCompositeConstruct  %11  %73 %79 %74 %75 %76 %77 %78 %80 %81 %82 %83
-%86 = OpCompositeExtract  %4  %84 0
-%87 = OpIEqual  %9  %86 %45
-%88 = OpSelect  %4  %87 %72 %45
-%89 = OpAccessChain  %85  %46 %45
-OpStore %89 %88
-%91 = OpCompositeExtract  %6  %84 1
-%92 = OpVectorTimesScalar  %5  %47 %91
-%93 = OpFunctionCall  %5  %23 %92 %84
-%94 = OpAccessChain  %90  %46 %72
-OpStore %94 %93
+%72 = OpCompositeExtract  %9  %65 10
+%73 = OpCompositeConstruct  %14  %64 %68
+%74 = OpMatrixTimesVector  %3  %72 %73
+%75 = OpVectorShuffle  %7  %74 %74 0 1
+%76 = OpExtInst  %7  %1 Normalize %75
+%77 = OpVectorTimesScalar  %7  %76 %69
+%78 = OpCompositeExtract  %9  %65 9
+%79 = OpCompositeConstruct  %14  %77 %70 %68
+%80 = OpMatrixTimesVector  %3  %78 %79
+%81 = OpFSub  %3  %64 %80
+%82 = OpExtInst  %3  %1 Normalize %81
+OpReturnValue %82
+OpFunctionEnd
+%84 = OpFunction  %2  None %85
+%83 = OpLabel
+%86 = OpLoad  %5  %15
+%89 = OpAccessChain  %87  %17 %88
+OpBranch %92
+%92 = OpLabel
+%93 = OpFunctionCall  %10  %25 %90 %91 %15
+%95 = OpCompositeExtract  %6  %93 0
+%96 = OpIEqual  %8  %95 %88
+%97 = OpSelect  %6  %96 %50 %88
+%98 = OpAccessChain  %94  %89 %88
+OpStore %98 %97
+%100 = OpCompositeExtract  %4  %93 1
+%101 = OpVectorTimesScalar  %3  %91 %100
+%102 = OpFunctionCall  %3  %66 %101 %93
+%103 = OpAccessChain  %99  %89 %50
+OpStore %103 %102
 OpReturn
 OpFunctionEnd
\ No newline at end of file

From 95c604e4419b1c4409a9816fbe6dc487e1490469 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jul 2024 12:08:51 +0200
Subject: [PATCH 028/226] build(deps): bump the patch-updates group with 6
 updates (#5959)

Bumps the patch-updates group with 6 updates:

| Package | From | To |
| --- | --- | --- |
| [document-features](https://github.com/slint-ui/document-features) | `0.2.9` | `0.2.10` |
| [thiserror](https://github.com/dtolnay/thiserror) | `1.0.61` | `1.0.62` |
| [syn](https://github.com/dtolnay/syn) | `2.0.70` | `2.0.71` |
| [bytes](https://github.com/tokio-rs/bytes) | `1.6.0` | `1.6.1` |
| [cc](https://github.com/rust-lang/cc-rs) | `1.1.0` | `1.1.5` |
| [thiserror-impl](https://github.com/dtolnay/thiserror) | `1.0.61` | `1.0.62` |


Updates `document-features` from 0.2.9 to 0.2.10
- [Release notes](https://github.com/slint-ui/document-features/releases)
- [Changelog](https://github.com/slint-ui/document-features/blob/master/CHANGELOG.md)
- [Commits](https://github.com/slint-ui/document-features/commits)

Updates `thiserror` from 1.0.61 to 1.0.62
- [Release notes](https://github.com/dtolnay/thiserror/releases)
- [Commits](https://github.com/dtolnay/thiserror/compare/1.0.61...1.0.62)

Updates `syn` from 2.0.70 to 2.0.71
- [Release notes](https://github.com/dtolnay/syn/releases)
- [Commits](https://github.com/dtolnay/syn/compare/2.0.70...2.0.71)

Updates `bytes` from 1.6.0 to 1.6.1
- [Release notes](https://github.com/tokio-rs/bytes/releases)
- [Changelog](https://github.com/tokio-rs/bytes/blob/master/CHANGELOG.md)
- [Commits](https://github.com/tokio-rs/bytes/compare/v1.6.0...v1.6.1)

Updates `cc` from 1.1.0 to 1.1.5
- [Release notes](https://github.com/rust-lang/cc-rs/releases)
- [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.1.0...cc-v1.1.5)

Updates `thiserror-impl` from 1.0.61 to 1.0.62
- [Release notes](https://github.com/dtolnay/thiserror/releases)
- [Commits](https://github.com/dtolnay/thiserror/compare/1.0.61...1.0.62)

---
updated-dependencies:
- dependency-name: document-features
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: thiserror
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: syn
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: bytes
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: cc
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: thiserror-impl
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock      | 73 ++++++++++++++++++++++++-------------------------
 Cargo.toml      |  2 +-
 naga/Cargo.toml |  2 +-
 3 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 86391670e3..5b2f904534 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -186,7 +186,7 @@ dependencies = [
  "argh_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -242,7 +242,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -385,7 +385,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -396,9 +396,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.6.0"
+version = "1.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952"
 
 [[package]]
 name = "calloop"
@@ -448,13 +448,12 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.0"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8"
+checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
 dependencies = [
  "jobserver",
  "libc",
- "once_cell",
 ]
 
 [[package]]
@@ -542,7 +541,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -887,7 +886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1034,7 +1033,7 @@ dependencies = [
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.70",
+ "syn 2.0.71",
  "thiserror",
 ]
 
@@ -1107,7 +1106,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1120,7 +1119,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustc_version 0.4.0",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1146,9 +1145,9 @@ dependencies = [
 
 [[package]]
 name = "document-features"
-version = "0.2.9"
+version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a344f0a78e998787823fe12c8245b9e1fcdb1da9eca625082c2e3d641297fa3"
+checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0"
 dependencies = [
  "litrs",
 ]
@@ -1208,7 +1207,7 @@ checksum = "b36f2ddfca91251bed7f931f24b192e4eaf0a0e0fa70cf81cfb1416a1973620e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1354,7 +1353,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1479,7 +1478,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -2456,7 +2455,7 @@ dependencies = [
  "proc-macro-crate 3.1.0",
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -2657,7 +2656,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -2796,7 +2795,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f"
 dependencies = [
  "proc-macro-rules-macros",
  "proc-macro2",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -2808,7 +2807,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3161,7 +3160,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3428,7 +3427,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3444,9 +3443,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.70"
+version = "2.0.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16"
+checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3464,22 +3463,22 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.61"
+version = "1.0.62"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
+checksum = "f2675633b1499176c2dff06b0856a27976a8f9d436737b4cf4f312d4d91d8bbb"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.61"
+version = "1.0.62"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
+checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3613,7 +3612,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3922,7 +3921,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
  "wasm-bindgen-shared",
 ]
 
@@ -3956,7 +3955,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3989,7 +3988,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -4369,7 +4368,7 @@ version = "0.20.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -4943,5 +4942,5 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index a9bb351a2b..654ed1660d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,7 +81,7 @@ cfg-if = "1"
 criterion = "0.5"
 codespan-reporting = "0.11"
 ctor = "0.2"
-document-features = "0.2.9"
+document-features = "0.2.10"
 encase = "0.9"
 env_logger = "0.11"
 fern = "0.6"
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index f9e7f766fa..255d93f32d 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -70,7 +70,7 @@ rustc-hash = "1.1.0"
 indexmap = { version = "2", features = ["std"] }
 log = "0.4"
 spirv = { version = "0.3", optional = true }
-thiserror = "1.0.61"
+thiserror = "1.0.62"
 serde = { version = "1.0.204", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }

From f44f52a85ddb3e7b93fe195fb98a8990b05575d8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jul 2024 12:09:11 +0200
Subject: [PATCH 029/226] build(deps): bump crate-ci/typos from 1.23.1 to
 1.23.2 (#5958)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.1 to 1.23.2.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.23.1...v1.23.2)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3cbd5858a0..a8ffaf1dfd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -628,7 +628,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.23.1
+        uses: crate-ci/typos@v1.23.2
 
   check-cts-runner:
     # runtime is normally 2 minutes

From 32acb207fa8bf2f8150eec10bb9102eb587483ec Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Tue, 16 Jul 2024 11:11:43 -0400
Subject: [PATCH 030/226] docs(CHANGELOG): backport 0.19.5 entries (#5966)

---
 CHANGELOG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ce370d808..cb893260dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -437,6 +437,17 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Refactor tests to read feature flags by name instead of a hardcoded hexadecimal u64. By @atlv24 in [#5155](https://github.com/gfx-rs/wgpu/pull/5155).
 - Add test that verifies that we can drop the queue before using the device to create a command encoder. By @Davidster in [#5211](https://github.com/gfx-rs/wgpu/pull/5211)
 
+## 0.19.5 (2024-07-16)
+
+This release only releases `wgpu-hal` 0.19.5, which contains an important fix
+for DX12.
+
+### Bug Fixes
+
+#### DX12
+
+- Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812), backported by @Elabajaba in [#5833](https://github.com/gfx-rs/wgpu/pull/5833).
+
 ## v0.19.4 (2024-04-17)
 
 ### Bug Fixes

From 167f005c1759cbf053e6760396fd23e3485335b5 Mon Sep 17 00:00:00 2001
From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
Date: Tue, 16 Jul 2024 18:30:35 +0200
Subject: [PATCH 031/226] [tests] delete outdated comment on
 `DEVICE_DESTROY_THEN_MORE` test (#5967)

This was fixed by 6e21f7a9291db4395192d6b510d906978ae2d251.
---
 tests/tests/device.rs | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index e2ed9f5b60..f932faa2f1 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -147,14 +147,6 @@ async fn request_device_error_message() {
 
 // This is a test of device behavior after device.destroy. Specifically, all operations
 // should trigger errors since the device is lost.
-//
-// On DX12 this test fails with a validation error in the very artificial actions taken
-// after lose the device. The error is "ID3D12CommandAllocator::Reset: The command
-// allocator cannot be reset because a command list is currently being recorded with the
-// allocator." That may indicate that DX12 doesn't like opened command buffers staying
-// open even after they return an error. For now, this test is skipped on DX12.
-//
-// The DX12 issue may be related to https://github.com/gfx-rs/wgpu/issues/3193.
 #[gpu_test]
 static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().features(wgpu::Features::CLEAR_TEXTURE))

From a3d2d31d3d9db6098d7842d2d76cd70e54fd12b0 Mon Sep 17 00:00:00 2001
From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
Date: Tue, 16 Jul 2024 18:30:53 +0200
Subject: [PATCH 032/226] [test] allow WARP to run the zero-init workgroup
 memory test (#5968)

I pinpointed this to 438d6394efd5a41d62c07d18c8fff58dd0243a74 (https://github.com/gfx-rs/wgpu/pull/3512).
I'm not sure why I didn't remove this one in 30064ead9fd7639f9fd842b0a3188baf9dc8bee3 (https://github.com/gfx-rs/wgpu/pull/3515) as well, maybe I thought it was still failing due to early frees.
---
 tests/tests/shader/zero_init_workgroup_mem.rs | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs
index 0dcb81959b..eb774f7b35 100644
--- a/tests/tests/shader/zero_init_workgroup_mem.rs
+++ b/tests/tests/shader/zero_init_workgroup_mem.rs
@@ -1,28 +1,21 @@
 use std::num::NonZeroU64;
 
 use wgpu::{
-    include_wgsl, Backends, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
+    include_wgsl, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
     BindGroupLayoutEntry, BindingResource, BindingType, BufferBinding, BufferBindingType,
     BufferDescriptor, BufferUsages, CommandEncoderDescriptor, ComputePassDescriptor,
     ComputePipelineDescriptor, DownlevelFlags, Limits, Maintain, MapMode, PipelineLayoutDescriptor,
     ShaderStages,
 };
 
-use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters};
 
 #[gpu_test]
 static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults())
-            // remove once we get to https://github.com/gfx-rs/wgpu/issues/3193
-            .skip(FailureCase {
-                backends: Some(Backends::DX12),
-                vendor: Some(5140),
-                adapter: Some("Microsoft Basic Render Driver"),
-                ..FailureCase::default()
-            }),
+            .limits(Limits::downlevel_defaults()),
     )
     .run_async(|ctx| async move {
         let bgl = ctx

From 241b52f7eadbc51c2ad331933febdffdc4545fd2 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 12:25:59 +0200
Subject: [PATCH 033/226] [example] add instructions to halmark

---
 wgpu-hal/examples/halmark/main.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index a657b161b4..daed0c1d35 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -814,6 +814,8 @@ fn main() {
     let example_result = Example::<Api>::init(&window);
     let mut example = Some(example_result.expect("Selected backend is not supported"));
 
+    println!("Press space to spawn bunnies.");
+
     let mut last_frame_inst = Instant::now();
     let (mut frame_count, mut accum_time) = (0, 0.0);
 

From 7e112ca4c0686c9626be4c8ddd113e954a2dab21 Mon Sep 17 00:00:00 2001
From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:33:48 +0200
Subject: [PATCH 034/226] [wgpu-core] fix length of copy in
 `queue_write_texture` (#5973)

The size of the given `data` might be less than the size of the staging buffer.
This issue became apparent with the refactor in 6f16ea460ab437173e14d2f5f3584ca7e1c9841d (https://github.com/gfx-rs/wgpu/pull/5946) since there is now an assert in `StagingBuffer.write()`.

Ruffle ran into this in https://github.com/gfx-rs/wgpu/issues/3193#issuecomment-2231209711.
---
 wgpu-core/src/device/queue.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 52edb528a3..291fb6456f 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -781,7 +781,14 @@ impl Global {
         if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
             // Fast path if the data is already being aligned optimally.
-            staging_buffer.write(&data[data_layout.offset as usize..]);
+            unsafe {
+                staging_buffer.write_with_offset(
+                    data,
+                    data_layout.offset as isize,
+                    0,
+                    (data.len() as u64 - data_layout.offset) as usize,
+                );
+            }
         } else {
             profiling::scope!("copy chunked");
             // Copy row by row into the optimal alignment.

From 91924fb6034408c14a7e75691fd9bedf24bf03e1 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 11:29:18 +0200
Subject: [PATCH 035/226] [wgpu-core] make `implicit_pipeline_ids` arg optional
 for users that don't provide IDs

---
 deno_webgpu/pipeline.rs        |  25 +-------
 player/src/lib.rs              |   8 +--
 wgpu-core/src/device/global.rs | 114 ++++++++++++++++-----------------
 wgpu-core/src/device/mod.rs    |   8 +--
 wgpu-core/src/id.rs            |  12 ----
 wgpu-core/src/pipeline.rs      |   2 +
 wgpu/src/backend/wgpu_core.rs  |  18 +-----
 7 files changed, 70 insertions(+), 117 deletions(-)

diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index 75bd9b3ef2..f925705119 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -14,8 +14,6 @@ use std::rc::Rc;
 use super::error::WebGpuError;
 use super::error::WebGpuResult;
 
-const MAX_BIND_GROUPS: usize = 8;
-
 pub(crate) struct WebGpuPipelineLayout(
     pub(crate) crate::Instance,
     pub(crate) wgpu_core::id::PipelineLayoutId,
@@ -118,21 +116,12 @@ pub fn op_webgpu_create_compute_pipeline(
         },
         cache: None,
     };
-    let implicit_pipelines = match layout {
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Auto(GPUAutoLayoutMode::Auto) => {
-            Some(wgpu_core::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; MAX_BIND_GROUPS],
-            })
-        }
-    };
 
     let (compute_pipeline, maybe_err) = gfx_select!(device => instance.device_create_compute_pipeline(
       device,
       &descriptor,
       None,
-      implicit_pipelines
+      None,
     ));
 
     let rid = state
@@ -397,21 +386,11 @@ pub fn op_webgpu_create_render_pipeline(
         cache: None,
     };
 
-    let implicit_pipelines = match args.layout {
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Auto(GPUAutoLayoutMode::Auto) => {
-            Some(wgpu_core::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; MAX_BIND_GROUPS],
-            })
-        }
-    };
-
     let (render_pipeline, maybe_err) = gfx_select!(device => instance.device_create_render_pipeline(
       device,
       &descriptor,
       None,
-      implicit_pipelines
+      None,
     ));
 
     let rid = state
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 8acdcd043e..de56b16888 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -256,8 +256,8 @@ impl GlobalPlay for wgc::global::Global {
                     implicit_context
                         .as_ref()
                         .map(|ic| wgc::device::ImplicitPipelineIds {
-                            root_id: Some(ic.root_id),
-                            group_ids: wgc::id::as_option_slice(&ic.group_ids),
+                            root_id: ic.root_id,
+                            group_ids: &ic.group_ids,
                         });
                 let (_, error) =
                     self.device_create_compute_pipeline::<A>(device, &desc, Some(id), implicit_ids);
@@ -277,8 +277,8 @@ impl GlobalPlay for wgc::global::Global {
                     implicit_context
                         .as_ref()
                         .map(|ic| wgc::device::ImplicitPipelineIds {
-                            root_id: Some(ic.root_id),
-                            group_ids: wgc::id::as_option_slice(&ic.group_ids),
+                            root_id: ic.root_id,
+                            group_ids: &ic.group_ids,
                         });
                 let (_, error) =
                     self.device_create_render_pipeline::<A>(device, &desc, Some(id), implicit_ids);
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index e5643a3da9..e6a9251299 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1385,12 +1385,18 @@ impl Global {
 
         let hub = A::hub(self);
 
+        let missing_implicit_pipeline_ids =
+            desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
+
         let fid = hub.render_pipelines.prepare(id_in);
         let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
 
-        let is_auto_layout = desc.layout.is_none();
-
         let error = 'error: {
+            if missing_implicit_pipeline_ids {
+                // TODO: categorize this error as API misuse
+                break 'error pipeline::ImplicitLayoutError::MissingImplicitPipelineIds.into();
+            }
+
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
@@ -1505,23 +1511,18 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            if is_auto_layout {
-                // TODO: categorize the errors below as API misuse
-                let ids = if let Some(ids) = implicit_context.as_ref() {
-                    let group_count = pipeline.layout.bind_group_layouts.len();
-                    if ids.group_ids.len() < group_count {
-                        log::error!(
-                            "Not enough bind group IDs ({}) specified for the implicit layout ({})",
-                            ids.group_ids.len(),
-                            group_count
-                        );
-                        break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
-                            .into();
-                    }
-                    ids
-                } else {
-                    break 'error pipeline::ImplicitLayoutError::MissingIds(0).into();
-                };
+            if let Some(ids) = implicit_context.as_ref() {
+                let group_count = pipeline.layout.bind_group_layouts.len();
+                if ids.group_ids.len() < group_count {
+                    log::error!(
+                        "Not enough bind group IDs ({}) specified for the implicit layout ({})",
+                        ids.group_ids.len(),
+                        group_count
+                    );
+                    // TODO: categorize this error as API misuse
+                    break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
+                        .into();
+                }
 
                 let mut pipeline_layout_guard = hub.pipeline_layouts.write();
                 let mut bgl_guard = hub.bind_group_layouts.write();
@@ -1552,16 +1553,14 @@ impl Global {
 
         let id = fid.assign_error();
 
-        if is_auto_layout {
-            // We also need to assign errors to the implicit pipeline layout and the
-            // implicit bind group layouts.
-            if let Some(ids) = implicit_context {
-                let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-                let mut bgl_guard = hub.bind_group_layouts.write();
-                pipeline_layout_guard.insert_error(ids.root_id);
-                for bgl_id in ids.group_ids {
-                    bgl_guard.insert_error(bgl_id);
-                }
+        // We also need to assign errors to the implicit pipeline layout and the
+        // implicit bind group layouts.
+        if let Some(ids) = implicit_context {
+            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+            let mut bgl_guard = hub.bind_group_layouts.write();
+            pipeline_layout_guard.insert_error(ids.root_id);
+            for bgl_id in ids.group_ids {
+                bgl_guard.insert_error(bgl_id);
             }
         }
 
@@ -1629,12 +1628,18 @@ impl Global {
 
         let hub = A::hub(self);
 
+        let missing_implicit_pipeline_ids =
+            desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
+
         let fid = hub.compute_pipelines.prepare(id_in);
         let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
 
-        let is_auto_layout = desc.layout.is_none();
-
         let error = 'error: {
+            if missing_implicit_pipeline_ids {
+                // TODO: categorize this error as API misuse
+                break 'error pipeline::ImplicitLayoutError::MissingImplicitPipelineIds.into();
+            }
+
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
@@ -1703,23 +1708,18 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            if is_auto_layout {
-                // TODO: categorize the errors below as API misuse
-                let ids = if let Some(ids) = implicit_context.as_ref() {
-                    let group_count = pipeline.layout.bind_group_layouts.len();
-                    if ids.group_ids.len() < group_count {
-                        log::error!(
-                            "Not enough bind group IDs ({}) specified for the implicit layout ({})",
-                            ids.group_ids.len(),
-                            group_count
-                        );
-                        break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
-                            .into();
-                    }
-                    ids
-                } else {
-                    break 'error pipeline::ImplicitLayoutError::MissingIds(0).into();
-                };
+            if let Some(ids) = implicit_context.as_ref() {
+                let group_count = pipeline.layout.bind_group_layouts.len();
+                if ids.group_ids.len() < group_count {
+                    log::error!(
+                        "Not enough bind group IDs ({}) specified for the implicit layout ({})",
+                        ids.group_ids.len(),
+                        group_count
+                    );
+                    // TODO: categorize this error as API misuse
+                    break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
+                        .into();
+                }
 
                 let mut pipeline_layout_guard = hub.pipeline_layouts.write();
                 let mut bgl_guard = hub.bind_group_layouts.write();
@@ -1750,16 +1750,14 @@ impl Global {
 
         let id = fid.assign_error();
 
-        if is_auto_layout {
-            // We also need to assign errors to the implicit pipeline layout and the
-            // implicit bind group layouts.
-            if let Some(ids) = implicit_context {
-                let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-                let mut bgl_guard = hub.bind_group_layouts.write();
-                pipeline_layout_guard.insert_error(ids.root_id);
-                for bgl_id in ids.group_ids {
-                    bgl_guard.insert_error(bgl_id);
-                }
+        // We also need to assign errors to the implicit pipeline layout and the
+        // implicit bind group layouts.
+        if let Some(ids) = implicit_context {
+            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+            let mut bgl_guard = hub.bind_group_layouts.write();
+            pipeline_layout_guard.insert_error(ids.root_id);
+            for bgl_id in ids.group_ids {
+                bgl_guard.insert_error(bgl_id);
             }
         }
 
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 03d9adcc60..e37291ef20 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -433,18 +433,18 @@ pub struct ImplicitPipelineContext {
 }
 
 pub struct ImplicitPipelineIds<'a> {
-    pub root_id: Option<PipelineLayoutId>,
-    pub group_ids: &'a [Option<BindGroupLayoutId>],
+    pub root_id: PipelineLayoutId,
+    pub group_ids: &'a [BindGroupLayoutId],
 }
 
 impl ImplicitPipelineIds<'_> {
     fn prepare<A: HalApi>(self, hub: &Hub<A>) -> ImplicitPipelineContext {
         ImplicitPipelineContext {
-            root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(),
+            root_id: hub.pipeline_layouts.prepare(Some(self.root_id)).into_id(),
             group_ids: self
                 .group_ids
                 .iter()
-                .map(|id_in| hub.bind_group_layouts.prepare(*id_in).into_id())
+                .map(|id_in| hub.bind_group_layouts.prepare(Some(*id_in)).into_id())
                 .collect(),
         }
     }
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index 05efbd2e44..c795063da5 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -77,18 +77,6 @@ impl RawId {
     }
 }
 
-/// Coerce a slice of identifiers into a slice of optional raw identifiers.
-///
-/// There's two reasons why we know this is correct:
-/// * `Option<T>` is guaranteed to be niche-filled to 0's.
-/// * The `T` in `Option<T>` can inhabit any representation except 0's, since
-///   its underlying representation is `NonZero*`.
-pub fn as_option_slice<T: Marker>(ids: &[Id<T>]) -> &[Option<Id<T>>] {
-    // SAFETY: Any Id<T> is repr(transparent) over `Option<RawId>`, since both
-    // are backed by non-zero types.
-    unsafe { std::slice::from_raw_parts(ids.as_ptr().cast(), ids.len()) }
-}
-
 /// An identifier for a wgpu object.
 ///
 /// An `Id<T>` value identifies a value stored in a [`Global`]'s [`Hub`].
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index b422ced5eb..6366279eff 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -186,6 +186,8 @@ pub type ImplicitBindGroupCount = u8;
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum ImplicitLayoutError {
+    #[error("The implicit_pipeline_ids arg is required")]
+    MissingImplicitPipelineIds,
     #[error("Missing IDs for deriving {0} bind groups")]
     MissingIds(ImplicitBindGroupCount),
     #[error("Unable to reflect the shader {0:?} interface")]
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 6485aefcde..5e12823793 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1162,13 +1162,6 @@ impl crate::Context for ContextWgpuCore {
             })
             .collect();
 
-        let implicit_pipeline_ids = match desc.layout {
-            Some(_) => None,
-            None => Some(wgc::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; wgc::MAX_BIND_GROUPS],
-            }),
-        };
         let descriptor = pipe::RenderPipelineDescriptor {
             label: desc.label.map(Borrowed),
             layout: desc.layout.map(|l| l.id.into()),
@@ -1211,7 +1204,7 @@ impl crate::Context for ContextWgpuCore {
             *device,
             &descriptor,
             None,
-            implicit_pipeline_ids
+            None,
         ));
         if let Some(cause) = error {
             if let wgc::pipeline::CreateRenderPipelineError::Internal { stage, ref error } = cause {
@@ -1235,13 +1228,6 @@ impl crate::Context for ContextWgpuCore {
     ) -> (Self::ComputePipelineId, Self::ComputePipelineData) {
         use wgc::pipeline as pipe;
 
-        let implicit_pipeline_ids = match desc.layout {
-            Some(_) => None,
-            None => Some(wgc::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; wgc::MAX_BIND_GROUPS],
-            }),
-        };
         let descriptor = pipe::ComputePipelineDescriptor {
             label: desc.label.map(Borrowed),
             layout: desc.layout.map(|l| l.id.into()),
@@ -1261,7 +1247,7 @@ impl crate::Context for ContextWgpuCore {
             *device,
             &descriptor,
             None,
-            implicit_pipeline_ids
+            None,
         ));
         if let Some(cause) = error {
             if let wgc::pipeline::CreateComputePipelineError::Internal(ref error) = cause {

From 69a1134e02498ea3bbfaa966561201f8b6b4a172 Mon Sep 17 00:00:00 2001
From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 20:10:49 +0200
Subject: [PATCH 036/226] [wgpu] remove `trace` feature temporarily (#5975)

---
 CHANGELOG.md                  |  1 +
 wgpu/Cargo.toml               |  5 +++--
 wgpu/src/backend/wgpu_core.rs | 10 ++++++++--
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb893260dd..191be5c03c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -185,6 +185,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
 - Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767)
+- The `trace` wgpu feature has been temporarily removed. By @teoxoy in [#5975](https://github.com/gfx-rs/wgpu/pull/5975)
 
 #### Metal
 - Removed the `link` Cargo feature.
diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml
index d8538a6ed9..cd73f5dc9e 100644
--- a/wgpu/Cargo.toml
+++ b/wgpu/Cargo.toml
@@ -87,8 +87,9 @@ strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"]
 ## Enables serialization via `serde` on common wgpu types.
 serde = ["dep:serde", "wgc/serde"]
 
-## Allow writing of trace capture files. See [`Adapter::request_device`].
-trace = ["serde", "wgc/trace"]
+# Uncomment once we get to https://github.com/gfx-rs/wgpu/issues/5974
+# ## Allow writing of trace capture files. See [`Adapter::request_device`].
+# trace = ["serde", "wgc/trace"]
 
 ## Allow deserializing of trace capture files that were written with the `trace` feature.
 ## To replay a trace file use the [wgpu player](https://github.com/gfx-rs/wgpu/tree/trunk/player).
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 5e12823793..91629d638c 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -105,12 +105,15 @@ impl ContextWgpuCore {
         desc: &crate::DeviceDescriptor<'_>,
         trace_dir: Option<&std::path::Path>,
     ) -> Result<(Device, Queue), crate::RequestDeviceError> {
+        if trace_dir.is_some() {
+            log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
+        }
         let (device_id, queue_id, error) = unsafe {
             self.0.create_device_from_hal(
                 *adapter,
                 hal_device,
                 &desc.map_label(|l| l.map(Borrowed)),
-                trace_dir,
+                None,
                 None,
                 None,
             )
@@ -640,10 +643,13 @@ impl crate::Context for ContextWgpuCore {
         desc: &crate::DeviceDescriptor<'_>,
         trace_dir: Option<&std::path::Path>,
     ) -> Self::RequestDeviceFuture {
+        if trace_dir.is_some() {
+            log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
+        }
         let (device_id, queue_id, error) = wgc::gfx_select!(*adapter => self.0.adapter_request_device(
             *adapter,
             &desc.map_label(|l| l.map(Borrowed)),
-            trace_dir,
+            None,
             None,
             None
         ));

From a47ed5dc1ef7fb0591b89b5983bdde233503aa5d Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Tue, 16 Jul 2024 18:01:44 -0700
Subject: [PATCH 037/226] [hal doc] Note `wgpu_hal::Queue::submit`'s
 expectations for `Fence`.

---
 wgpu-hal/src/lib.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 6f470f4ddc..2dd09934df 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -955,6 +955,9 @@ pub trait Queue: WasmNotSendSync {
     /// - All calls to this function that include a given [`SurfaceTexture`][st]
     ///   in `surface_textures` must use the same [`Fence`].
     ///
+    /// - The [`Fence`] passed as `signal_fence.0` must remain alive until
+    ///   all submissions that will signal it have completed.
+    ///
     /// [`Fence`]: Api::Fence
     /// [cb]: Api::CommandBuffer
     /// [ce]: Api::CommandEncoder

From 2bc328c46f3f9e1205769b5e180863ac51d80ec4 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Tue, 16 Jul 2024 18:06:22 -0700
Subject: [PATCH 038/226] [hal, core] Introduce `wgpu_hal::AtomicFenceValue`,
 and use it.

Introduce the new type alias `wgpu_hal::AtomicFenceValue`, which is
the atomic version of `wgpu_hal::FenceValue`. Use this type alias in
`wgpu_core`. Remove `as` conversions made unnecessary since we're not
conflating `usize` with `u64` any more.
---
 wgpu-core/src/device/resource.rs |  3 +--
 wgpu-core/src/resource.rs        | 14 +++++---------
 wgpu-hal/src/lib.rs              |  1 +
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index c364711f5d..7030f3c6fe 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -88,8 +88,7 @@ pub struct Device<A: HalApi> {
     label: String,
 
     pub(crate) command_allocator: command::CommandAllocator<A>,
-    //Note: The submission index here corresponds to the last submission that is done.
-    pub(crate) active_submission_index: AtomicU64, //SubmissionIndex,
+    pub(crate) active_submission_index: hal::AtomicFenceValue,
     // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
     // `fence` lock to avoid deadlocks.
     pub(crate) fence: RwLock<Option<A::Fence>>,
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index ced9edbb56..1929b9f8f7 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -28,10 +28,7 @@ use std::{
     mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
-    sync::{
-        atomic::{AtomicUsize, Ordering},
-        Arc, Weak,
-    },
+    sync::{atomic::Ordering, Arc, Weak},
 };
 
 /// Information about the wgpu-core resource.
@@ -64,7 +61,7 @@ pub(crate) struct TrackingData {
     /// sequentially. Thus, when a queue submission completes, we know any
     /// resources used in that submission and any lower-numbered submissions are
     /// no longer in use by the GPU.
-    submission_index: AtomicUsize,
+    submission_index: hal::AtomicFenceValue,
 }
 
 impl Drop for TrackingData {
@@ -78,7 +75,7 @@ impl TrackingData {
         Self {
             tracker_index: tracker_indices.alloc(),
             tracker_indices,
-            submission_index: AtomicUsize::new(0),
+            submission_index: hal::AtomicFenceValue::new(0),
         }
     }
 
@@ -89,12 +86,11 @@ impl TrackingData {
     /// Record that this resource will be used by the queue submission with the
     /// given index.
     pub(crate) fn use_at(&self, submit_index: SubmissionIndex) {
-        self.submission_index
-            .store(submit_index as _, Ordering::Release);
+        self.submission_index.store(submit_index, Ordering::Release);
     }
 
     pub(crate) fn submission_index(&self) -> SubmissionIndex {
-        self.submission_index.load(Ordering::Acquire) as _
+        self.submission_index.load(Ordering::Acquire)
     }
 }
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 2dd09934df..b28a005a7a 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -294,6 +294,7 @@ pub const QUERY_SIZE: wgt::BufferAddress = 8;
 pub type Label<'a> = Option<&'a str>;
 pub type MemoryRange = Range<wgt::BufferAddress>;
 pub type FenceValue = u64;
+pub type AtomicFenceValue = std::sync::atomic::AtomicU64;
 
 /// Drop guard to signal wgpu-hal is no longer using an externally created object.
 pub type DropGuard = Box<dyn std::any::Any + Send + Sync>;

From aeb2067e8120c1ff480625c00b9571db8d01d5a4 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Tue, 16 Jul 2024 20:43:33 -0700
Subject: [PATCH 039/226] [core] Make `poll(Wait)` not hang after bad command
 submission.

Add `wgpu_core::device::Device::last_successful_submission_index`,
which records the fence value that `Maintain::Wait` should actually
wait for. See comments for details.

Fixes #5969.
---
 tests/tests/poll.rs              | 34 ++++++++++++++++
 wgpu-core/src/device/queue.rs    |  7 +++-
 wgpu-core/src/device/resource.rs | 67 ++++++++++++++++++++++----------
 3 files changed, 87 insertions(+), 21 deletions(-)

diff --git a/tests/tests/poll.rs b/tests/tests/poll.rs
index 740618f23c..6b86436f7a 100644
--- a/tests/tests/poll.rs
+++ b/tests/tests/poll.rs
@@ -125,3 +125,37 @@ static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
             .await
             .panic_on_timeout();
     });
+
+/// Submit a command buffer to the wrong device. A wait poll shouldn't hang.
+///
+/// We can't catch panics on Wasm, since they get reported directly to the
+/// console.
+#[gpu_test]
+static WAIT_AFTER_BAD_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(wgpu_test::TestParameters::default().skip(wgpu_test::FailureCase::webgl2()))
+    .run_async(wait_after_bad_submission);
+
+async fn wait_after_bad_submission(ctx: TestingContext) {
+    let (device2, queue2) =
+        wgpu_test::initialize_device(&ctx.adapter, ctx.device_features, ctx.device_limits.clone())
+            .await;
+
+    let command_buffer1 = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default())
+        .finish();
+
+    // This should panic, since the command buffer belongs to the wrong
+    // device, and queue submission errors seem to be fatal errors?
+    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        queue2.submit([command_buffer1]);
+    }));
+    assert!(result.is_err());
+
+    // This should not hang.
+    //
+    // Specifically, the failed submission should not cause a new fence value to
+    // be allocated that will not be signalled until further work is
+    // successfully submitted, causing a greater fence value to be signalled.
+    device2.poll(wgpu::Maintain::Wait);
+}
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 291fb6456f..1c7b787428 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1069,7 +1069,7 @@ impl Global {
             let fence = fence_guard.as_mut().unwrap();
             let submit_index = device
                 .active_submission_index
-                .fetch_add(1, Ordering::Relaxed)
+                .fetch_add(1, Ordering::SeqCst)
                 + 1;
             let mut active_executions = Vec::new();
 
@@ -1392,6 +1392,11 @@ impl Global {
                         )
                         .map_err(DeviceError::from)?;
                 }
+
+                // Advance the successful submission index.
+                device
+                    .last_successful_submission_index
+                    .fetch_max(submit_index, Ordering::SeqCst);
             }
 
             profiling::scope!("cleanup");
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 7030f3c6fe..195c6c7e6a 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -88,7 +88,27 @@ pub struct Device<A: HalApi> {
     label: String,
 
     pub(crate) command_allocator: command::CommandAllocator<A>,
+
+    /// The index of the last command submission that was attempted.
+    ///
+    /// Note that `fence` may never be signalled with this value, if the command
+    /// submission failed. If you need to wait for everything running on a
+    /// `Queue` to complete, wait for [`last_successful_submission_index`].
+    ///
+    /// [`last_successful_submission_index`]: Device::last_successful_submission_index
     pub(crate) active_submission_index: hal::AtomicFenceValue,
+
+    /// The index of the last successful submission to this device's
+    /// [`hal::Queue`].
+    ///
+    /// Unlike [`active_submission_index`], which is incremented each time
+    /// submission is attempted, this is updated only when submission succeeds,
+    /// so waiting for this value won't hang waiting for work that was never
+    /// submitted.
+    ///
+    /// [`active_submission_index`]: Device::active_submission_index
+    pub(crate) last_successful_submission_index: hal::AtomicFenceValue,
+
     // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
     // `fence` lock to avoid deadlocks.
     pub(crate) fence: RwLock<Option<A::Fence>>,
@@ -257,6 +277,7 @@ impl<A: HalApi> Device<A> {
             label: desc.label.to_string(),
             command_allocator,
             active_submission_index: AtomicU64::new(0),
+            last_successful_submission_index: AtomicU64::new(0),
             fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)),
             snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
             valid: AtomicBool::new(true),
@@ -387,37 +408,41 @@ impl<A: HalApi> Device<A> {
         profiling::scope!("Device::maintain");
 
         let fence = fence_guard.as_ref().unwrap();
-        let last_done_index = if maintain.is_wait() {
-            let index_to_wait_for = match maintain {
-                wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
-                    // We don't need to check to see if the queue id matches
-                    // as we already checked this from inside the poll call.
-                    submission_index.index
-                }
-                _ => self.active_submission_index.load(Ordering::Relaxed),
-            };
-            unsafe {
+
+        // Determine which submission index `maintain` represents.
+        let submission_index = match maintain {
+            wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
+                // We don't need to check to see if the queue id matches
+                // as we already checked this from inside the poll call.
+                submission_index.index
+            }
+            wgt::Maintain::Wait => self
+                .last_successful_submission_index
+                .load(Ordering::Acquire),
+            wgt::Maintain::Poll => unsafe {
                 self.raw
                     .as_ref()
                     .unwrap()
-                    .wait(fence, index_to_wait_for, CLEANUP_WAIT_MS)
+                    .get_fence_value(fence)
                     .map_err(DeviceError::from)?
-            };
-            index_to_wait_for
-        } else {
+            },
+        };
+
+        // If necessary, wait for that submission to complete.
+        if maintain.is_wait() {
             unsafe {
                 self.raw
                     .as_ref()
                     .unwrap()
-                    .get_fence_value(fence)
+                    .wait(fence, submission_index, CLEANUP_WAIT_MS)
                     .map_err(DeviceError::from)?
-            }
-        };
-        log::info!("Device::maintain: last done index {last_done_index}");
+            };
+        }
+        log::info!("Device::maintain: waiting for submission index {submission_index}");
 
         let mut life_tracker = self.lock_life();
         let submission_closures =
-            life_tracker.triage_submissions(last_done_index, &self.command_allocator);
+            life_tracker.triage_submissions(submission_index, &self.command_allocator);
 
         life_tracker.triage_mapped();
 
@@ -3586,7 +3611,9 @@ impl<A: HalApi> Device<A> {
     /// Wait for idle and remove resources that we can, before we die.
     pub(crate) fn prepare_to_die(&self) {
         self.pending_writes.lock().as_mut().unwrap().deactivate();
-        let current_index = self.active_submission_index.load(Ordering::Relaxed);
+        let current_index = self
+            .last_successful_submission_index
+            .load(Ordering::Acquire);
         if let Err(error) = unsafe {
             let fence = self.fence.read();
             let fence = fence.as_ref().unwrap();

From 911d28fd1ae591426fa171ad02d3c364a222741f Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 18 Jul 2024 11:52:10 +0200
Subject: [PATCH 040/226] add a few missing changelog entries

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 191be5c03c..6bd103a32a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -185,6 +185,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
 - Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767)
+- Improve performance of `.submit()` by 39-64% (`.submit()` + `.poll()` by 22-32%). By @teoxoy in [#5910](https://github.com/gfx-rs/wgpu/pull/5910)
 - The `trace` wgpu feature has been temporarily removed. By @teoxoy in [#5975](https://github.com/gfx-rs/wgpu/pull/5975)
 
 #### Metal
@@ -204,6 +205,11 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Ensure render pipelines have at least 1 target. By @ErichDonGubler in [#5715](https://github.com/gfx-rs/wgpu/pull/5715)
 - `wgpu::ComputePass` now internally takes ownership of `QuerySet` for both `wgpu::ComputePassTimestampWrites` as well as timestamp writes and statistics query, fixing crashes when destroying `QuerySet` before ending the pass. By @wumpf in [#5671](https://github.com/gfx-rs/wgpu/pull/5671)
 - Validate resources passed during compute pass recording for mismatching device. By @wumpf in [#5779](https://github.com/gfx-rs/wgpu/pull/5779)
+- Fix staging buffers being destroyed too early. By @teoxoy in [#5910](https://github.com/gfx-rs/wgpu/pull/5910)
+- Fix attachment byte cost validation panicking with native only formats. By @teoxoy in [#5934](https://github.com/gfx-rs/wgpu/pull/5934)
+- [wgpu] Fix leaks from auto layout pipelines. By @teoxoy in [#5971](https://github.com/gfx-rs/wgpu/pull/5971)
+- [wgpu-core] Fix length of copy in `queue_write_texture` (causing UB). By @teoxoy in [#5973](https://github.com/gfx-rs/wgpu/pull/5973)
+- Add missing same device checks. By @teoxoy in [#5980](https://github.com/gfx-rs/wgpu/pull/5980)
 
 #### GLES / OpenGL
 

From 7761b5723da85c51b1a91ab93f5a75d52b0365f4 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:10:17 +0200
Subject: [PATCH 041/226] move same device checks in `render_pass_end`

---
 wgpu-core/src/command/render.rs | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index dfeb4fb52a..8c65f98fa5 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -817,7 +817,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
     }
 
     fn start(
-        device: &'d Device<A>,
+        device: &'d Arc<Device<A>>,
         hal_label: Option<&str>,
         color_attachments: ArrayVec<
             Option<ArcRenderPassColorAttachment<A>>,
@@ -919,6 +919,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
         if let Some(at) = depth_stencil_attachment.as_ref() {
             let view = &at.view;
+            view.same_device(device)?;
             check_multiview(view)?;
             add_view(view, AttachmentErrorLocation::Depth)?;
 
@@ -1049,6 +1050,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                 continue;
             };
             let color_view: &TextureView<A> = &at.view;
+            color_view.same_device(device)?;
             check_multiview(color_view)?;
             add_view(
                 color_view,
@@ -1079,6 +1081,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
             let mut hal_resolve_target = None;
             if let Some(resolve_view) = &at.resolve_target {
+                resolve_view.same_device(device)?;
                 check_multiview(resolve_view)?;
 
                 let resolve_location = AttachmentErrorLocation::Color {
@@ -1178,8 +1181,9 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
             multiview,
         };
 
-        let timestamp_writes_hal = timestamp_writes.as_ref().map(|tw| {
+        let timestamp_writes_hal = if let Some(tw) = timestamp_writes.as_ref() {
             let query_set = &tw.query_set;
+            query_set.same_device(device)?;
 
             if let Some(index) = tw.beginning_of_pass_write_index {
                 pending_query_resets.use_query_set(query_set, index);
@@ -1188,16 +1192,21 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                 pending_query_resets.use_query_set(query_set, index);
             }
 
-            hal::RenderPassTimestampWrites {
+            Some(hal::RenderPassTimestampWrites {
                 query_set: query_set.raw.as_ref().unwrap(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
-            }
-        });
+            })
+        } else {
+            None
+        };
 
-        let occlusion_query_set_hal = occlusion_query_set
-            .as_ref()
-            .map(|query_set| query_set.raw.as_ref().unwrap());
+        let occlusion_query_set_hal = if let Some(query_set) = occlusion_query_set.as_ref() {
+            query_set.same_device(device)?;
+            Some(query_set.raw.as_ref().unwrap())
+        } else {
+            None
+        };
 
         let hal_desc = hal::RenderPassDescriptor {
             label: hal_label,
@@ -1331,7 +1340,6 @@ impl Global {
     ) -> (RenderPass<A>, Option<CommandEncoderError>) {
         fn fill_arc_desc<A: HalApi>(
             hub: &crate::hub::Hub<A>,
-            device: &Arc<Device<A>>,
             desc: &RenderPassDescriptor<'_>,
             arc_desc: &mut ArcRenderPassDescriptor<A>,
         ) -> Result<(), CommandEncoderError> {
@@ -1348,13 +1356,11 @@ impl Global {
                     let view = texture_views
                         .get_owned(*view_id)
                         .map_err(|_| CommandEncoderError::InvalidAttachmentId(*view_id))?;
-                    view.same_device(device)?;
 
                     let resolve_target = if let Some(resolve_target_id) = resolve_target {
                         let rt_arc = texture_views.get_owned(*resolve_target_id).map_err(|_| {
                             CommandEncoderError::InvalidResolveTargetId(*resolve_target_id)
                         })?;
-                        rt_arc.same_device(device)?;
 
                         Some(rt_arc)
                     } else {
@@ -1382,7 +1388,6 @@ impl Global {
                                 depth_stencil_attachment.view,
                             )
                         })?;
-                    view.same_device(device)?;
 
                     Some(ArcRenderPassDepthStencilAttachment {
                         view,
@@ -1397,7 +1402,6 @@ impl Global {
                 let query_set = query_sets.get_owned(tw.query_set).map_err(|_| {
                     CommandEncoderError::InvalidTimestampWritesQuerySetId(tw.query_set)
                 })?;
-                query_set.same_device(device)?;
 
                 Some(ArcPassTimestampWrites {
                     query_set,
@@ -1413,7 +1417,6 @@ impl Global {
                     let query_set = query_sets.get_owned(occlusion_query_set).map_err(|_| {
                         CommandEncoderError::InvalidOcclusionQuerySetId(occlusion_query_set)
                     })?;
-                    query_set.same_device(device)?;
 
                     Some(query_set)
                 } else {
@@ -1444,7 +1447,7 @@ impl Global {
             Err(e) => return make_err(e, arc_desc),
         };
 
-        let err = fill_arc_desc(hub, &cmd_buf.device, desc, &mut arc_desc).err();
+        let err = fill_arc_desc(hub, desc, &mut arc_desc).err();
 
         (RenderPass::new(Some(cmd_buf), arc_desc), err)
     }

From 2f7860b6e40357d9d183bf4768e0329cdbc7a608 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:18:59 +0200
Subject: [PATCH 042/226] move same device check in `compute_pass_end_impl`

---
 wgpu-core/src/command/compute.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index ff2bdf37e7..c92b08e72f 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -321,10 +321,6 @@ impl Global {
                 );
             };
 
-            if let Err(e) = query_set.same_device_as(cmd_buf.as_ref()) {
-                return make_err(e.into(), arc_desc);
-            }
-
             Some(ArcPassTimestampWrites {
                 query_set,
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
@@ -501,6 +497,10 @@ impl Global {
         state.tracker.query_sets.set_size(indices.query_sets.size());
 
         let timestamp_writes = if let Some(tw) = timestamp_writes.take() {
+            tw.query_set
+                .same_device_as(cmd_buf)
+                .map_pass_err(pass_scope)?;
+
             let query_set = state.tracker.query_sets.insert_single(tw.query_set);
 
             // Unlike in render passes we can't delay resetting the query sets since

From 63303d4b4e8fe93a2c4d389489c33c4b0d00e547 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:54:36 +0200
Subject: [PATCH 043/226] add missing same device checks

---
 wgpu-core/src/command/render.rs  | 14 +++++++++++++-
 wgpu-core/src/device/resource.rs |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 8c65f98fa5..b9c760b67d 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1791,7 +1791,7 @@ impl Global {
                             },
                             indexed,
                         };
-                        multi_draw_indirect(&mut state, buffer, offset, count, indexed)
+                        multi_draw_indirect(&mut state, cmd_buf, buffer, offset, count, indexed)
                             .map_pass_err(scope)?;
                     }
                     ArcRenderCommand::MultiDrawIndirectCount {
@@ -1808,6 +1808,7 @@ impl Global {
                         };
                         multi_draw_indirect_count(
                             &mut state,
+                            cmd_buf,
                             buffer,
                             offset,
                             count_buffer,
@@ -1834,6 +1835,7 @@ impl Global {
                         let scope = PassErrorScope::WriteTimestamp;
                         write_timestamp(
                             &mut state,
+                            cmd_buf,
                             &mut cmd_buf_data.pending_query_resets,
                             query_set,
                             query_index,
@@ -2448,6 +2450,7 @@ fn draw_indexed<A: HalApi>(
 
 fn multi_draw_indirect<A: HalApi>(
     state: &mut State<A>,
+    cmd_buf: &Arc<CommandBuffer<A>>,
     indirect_buffer: Arc<crate::resource::Buffer<A>>,
     offset: u64,
     count: Option<NonZeroU32>,
@@ -2474,6 +2477,8 @@ fn multi_draw_indirect<A: HalApi>(
         .device
         .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
 
+    indirect_buffer.same_device_as(cmd_buf.as_ref())?;
+
     state
         .info
         .usage_scope
@@ -2520,6 +2525,7 @@ fn multi_draw_indirect<A: HalApi>(
 
 fn multi_draw_indirect_count<A: HalApi>(
     state: &mut State<A>,
+    cmd_buf: &Arc<CommandBuffer<A>>,
     indirect_buffer: Arc<crate::resource::Buffer<A>>,
     offset: u64,
     count_buffer: Arc<crate::resource::Buffer<A>>,
@@ -2547,6 +2553,9 @@ fn multi_draw_indirect_count<A: HalApi>(
         .device
         .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
 
+    indirect_buffer.same_device_as(cmd_buf.as_ref())?;
+    count_buffer.same_device_as(cmd_buf.as_ref())?;
+
     state
         .info
         .usage_scope
@@ -2677,6 +2686,7 @@ fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len:
 
 fn write_timestamp<A: HalApi>(
     state: &mut State<A>,
+    cmd_buf: &CommandBuffer<A>,
     pending_query_resets: &mut QueryResetMap<A>,
     query_set: Arc<QuerySet<A>>,
     query_index: u32,
@@ -2686,6 +2696,8 @@ fn write_timestamp<A: HalApi>(
         query_set.error_ident()
     );
 
+    query_set.same_device_as(cmd_buf)?;
+
     state
         .device
         .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)?;
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 195c6c7e6a..f434a89039 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3119,6 +3119,7 @@ impl<A: HalApi> Device<A> {
                 let stage = wgt::ShaderStages::FRAGMENT;
 
                 let shader_module = &fragment_state.stage.module;
+                shader_module.same_device(self)?;
 
                 let stage_err = |error| pipeline::CreateRenderPipelineError::Stage { stage, error };
 

From ed67ff289cfdc2aa622bf4e1c2606305a4df7ef6 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:54:48 +0200
Subject: [PATCH 044/226] [deno] remove `assertDeviceMatch`

---
 deno_webgpu/01_webgpu.js | 255 ---------------------------------------
 1 file changed, 255 deletions(-)

diff --git a/deno_webgpu/01_webgpu.js b/deno_webgpu/01_webgpu.js
index 719a0f4860..f226c8ab5f 100644
--- a/deno_webgpu/01_webgpu.js
+++ b/deno_webgpu/01_webgpu.js
@@ -180,27 +180,6 @@ function assertDevice(self, prefix, context) {
   return device;
 }
 
-/**
- * @param {InnerGPUDevice} self
- * @param {any} resource
- * @param {{prefix: string, resourceContext: string, selfContext: string}} opts
- * @returns {InnerGPUDevice & {rid: number}}
- */
-function assertDeviceMatch(
-  self,
-  resource,
-  { prefix, resourceContext, selfContext },
-) {
-  const resourceDevice = assertDevice(resource, prefix, resourceContext);
-  if (resourceDevice.rid !== self.rid) {
-    throw new DOMException(
-      `${prefix}: ${resourceContext} belongs to a different device than ${selfContext}.`,
-      "OperationError",
-    );
-  }
-  return { ...resourceDevice, rid: resourceDevice.rid };
-}
-
 /**
  * @param {any} self
  * @param {string} prefix
@@ -1262,11 +1241,6 @@ class GPUDevice extends EventTarget {
       (layout, i) => {
         const context = `bind group layout ${i + 1}`;
         const rid = assertResource(layout, prefix, context);
-        assertDeviceMatch(device, layout, {
-          prefix,
-          selfContext: "this",
-          resourceContext: context,
-        });
         return rid;
       },
     );
@@ -1301,11 +1275,6 @@ class GPUDevice extends EventTarget {
     );
     const device = assertDevice(this, prefix, "this");
     const layout = assertResource(descriptor.layout, prefix, "layout");
-    assertDeviceMatch(device, descriptor.layout, {
-      prefix,
-      resourceContext: "layout",
-      selfContext: "this",
-    });
     const entries = ArrayPrototypeMap(descriptor.entries, (entry, i) => {
       const context = `entry ${i + 1}`;
       const resource = entry.resource;
@@ -1403,22 +1372,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.compute.module,
       prefix,
       "compute shader module",
     );
-    assertDeviceMatch(device, descriptor.compute.module, {
-      prefix,
-      resourceContext: "compute shader module",
-      selfContext: "this",
-    });
 
     const { rid, err } = op_webgpu_create_compute_pipeline(
       device.rid,
@@ -1459,22 +1418,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.vertex.module,
       prefix,
       "vertex shader module",
     );
-    assertDeviceMatch(device, descriptor.vertex.module, {
-      prefix,
-      resourceContext: "vertex shader module",
-      selfContext: "this",
-    });
     let fragment = undefined;
     if (descriptor.fragment) {
       const module = assertResource(
@@ -1482,11 +1431,6 @@ class GPUDevice extends EventTarget {
         prefix,
         "fragment shader module",
       );
-      assertDeviceMatch(device, descriptor.fragment.module, {
-        prefix,
-        resourceContext: "fragment shader module",
-        selfContext: "this",
-      });
       fragment = {
         module,
         entryPoint: descriptor.fragment.entryPoint,
@@ -1536,22 +1480,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.compute.module,
       prefix,
       "compute shader module",
     );
-    assertDeviceMatch(device, descriptor.compute.module, {
-      prefix,
-      resourceContext: "compute shader module",
-      selfContext: "this",
-    });
 
     const { rid, err } = op_webgpu_create_compute_pipeline(
       device.rid,
@@ -1607,22 +1541,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.vertex.module,
       prefix,
       "vertex shader module",
     );
-    assertDeviceMatch(device, descriptor.vertex.module, {
-      prefix,
-      resourceContext: "vertex shader module",
-      selfContext: "this",
-    });
     let fragment = undefined;
     if (descriptor.fragment) {
       const module = assertResource(
@@ -1630,11 +1554,6 @@ class GPUDevice extends EventTarget {
         prefix,
         "fragment shader module",
       );
-      assertDeviceMatch(device, descriptor.fragment.module, {
-        prefix,
-        resourceContext: "fragment shader module",
-        selfContext: "this",
-      });
       fragment = {
         module,
         entryPoint: descriptor.fragment.entryPoint,
@@ -1916,11 +1835,6 @@ class GPUQueue {
       (buffer, i) => {
         const context = `command buffer ${i + 1}`;
         const rid = assertResource(buffer, prefix, context);
-        assertDeviceMatch(device, buffer, {
-          prefix,
-          selfContext: "this",
-          resourceContext: context,
-        });
         return rid;
       },
     );
@@ -1964,11 +1878,6 @@ class GPUQueue {
       : webidl.converters.GPUSize64(size, prefix, "Argument 5");
     const device = assertDevice(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      selfContext: "this",
-      resourceContext: "Argument 1",
-    });
     /** @type {ArrayBufferLike} */
     let abLike = data;
     if (isTypedArray(data)) {
@@ -2014,11 +1923,6 @@ class GPUQueue {
     size = webidl.converters.GPUExtent3D(size, prefix, "Argument 4");
     const device = assertDevice(this, prefix, "this");
     const textureRid = assertResource(destination.texture, prefix, "texture");
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      selfContext: "this",
-      resourceContext: "texture",
-    });
 
     /** @type {ArrayBufferLike} */
     let abLike = data;
@@ -3189,15 +3093,6 @@ class GPUCommandEncoder {
         prefix,
         "texture view for depth stencil attachment",
       );
-      assertDeviceMatch(
-        device,
-        descriptor.depthStencilAttachment.view[_texture],
-        {
-          prefix,
-          resourceContext: "texture view for depth stencil attachment",
-          selfContext: "this",
-        },
-      );
 
       depthStencilAttachment = {
         ...descriptor.depthStencilAttachment,
@@ -3218,15 +3113,6 @@ class GPUCommandEncoder {
           prefix,
           `texture backing texture view for ${context}`,
         );
-        assertDeviceMatch(
-          device,
-          colorAttachment.view[_texture],
-          {
-            prefix,
-            resourceContext: `texture view for ${context}`,
-            selfContext: "this",
-          },
-        );
         let resolveTarget;
         if (colorAttachment.resolveTarget) {
           resolveTarget = assertResource(
@@ -3239,15 +3125,6 @@ class GPUCommandEncoder {
             prefix,
             `texture backing resolve target texture view for ${context}`,
           );
-          assertDeviceMatch(
-            device,
-            colorAttachment.resolveTarget[_texture],
-            {
-              prefix,
-              resourceContext: `resolve target texture view for ${context}`,
-              selfContext: "this",
-            },
-          );
         }
         return {
           view: view,
@@ -3388,17 +3265,7 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const sourceRid = assertResource(source, prefix, "Argument 1");
-    assertDeviceMatch(device, source, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const destinationRid = assertResource(destination, prefix, "Argument 3");
-    assertDeviceMatch(device, destination, {
-      prefix,
-      resourceContext: "Argument 3",
-      selfContext: "this",
-    });
 
     const { err } = op_webgpu_command_encoder_copy_buffer_to_buffer(
       commandEncoderRid,
@@ -3436,22 +3303,11 @@ class GPUCommandEncoder {
       prefix,
       "source in Argument 1",
     );
-    // deno-lint-ignore prefer-primordials
-    assertDeviceMatch(device, source.buffer, {
-      prefix,
-      resourceContext: "source in Argument 1",
-      selfContext: "this",
-    });
     const destinationTextureRid = assertResource(
       destination.texture,
       prefix,
       "texture in Argument 2",
     );
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      resourceContext: "texture in Argument 2",
-      selfContext: "this",
-    });
 
     const { err } = op_webgpu_command_encoder_copy_buffer_to_texture(
       commandEncoderRid,
@@ -3500,23 +3356,12 @@ class GPUCommandEncoder {
       prefix,
       "texture in Argument 1",
     );
-    assertDeviceMatch(device, source.texture, {
-      prefix,
-      resourceContext: "texture in Argument 1",
-      selfContext: "this",
-    });
     const destinationBufferRid = assertResource(
       // deno-lint-ignore prefer-primordials
       destination.buffer,
       prefix,
       "buffer in Argument 2",
     );
-    // deno-lint-ignore prefer-primordials
-    assertDeviceMatch(device, destination.buffer, {
-      prefix,
-      resourceContext: "buffer in Argument 2",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_copy_texture_to_buffer(
       commandEncoderRid,
       {
@@ -3562,21 +3407,11 @@ class GPUCommandEncoder {
       prefix,
       "texture in Argument 1",
     );
-    assertDeviceMatch(device, source.texture, {
-      prefix,
-      resourceContext: "texture in Argument 1",
-      selfContext: "this",
-    });
     const destinationTextureRid = assertResource(
       destination.texture,
       prefix,
       "texture in Argument 2",
     );
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      resourceContext: "texture in Argument 2",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_copy_texture_to_texture(
       commandEncoderRid,
       {
@@ -3685,11 +3520,6 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const querySetRid = assertResource(querySet, prefix, "Argument 1");
-    assertDeviceMatch(device, querySet, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_write_timestamp(
       commandEncoderRid,
       querySetRid,
@@ -3731,17 +3561,7 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const querySetRid = assertResource(querySet, prefix, "Argument 1");
-    assertDeviceMatch(device, querySet, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const destinationRid = assertResource(destination, prefix, "Argument 3");
-    assertDeviceMatch(device, destination, {
-      prefix,
-      resourceContext: "Argument 3",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_resolve_query_set(
       commandEncoderRid,
       querySetRid,
@@ -3991,11 +3811,6 @@ class GPURenderPassEncoder {
     const bundleRids = ArrayPrototypeMap(bundles, (bundle, i) => {
       const context = `bundle ${i + 1}`;
       const rid = assertResource(bundle, prefix, context);
-      assertDeviceMatch(device, bundle, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
       return rid;
     });
     op_webgpu_render_pass_execute_bundles(renderPassRid, bundleRids);
@@ -4041,11 +3856,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4128,11 +3938,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_pipeline(renderPassRid, pipelineRid);
   }
 
@@ -4165,11 +3970,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_index_buffer(
       renderPassRid,
       bufferRid,
@@ -4204,11 +4004,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 2");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_vertex_buffer(
       renderPassRid,
       slot,
@@ -4337,11 +4132,6 @@ class GPURenderPassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_draw_indirect(
       renderPassRid,
       indirectBufferRid,
@@ -4380,11 +4170,6 @@ class GPURenderPassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_draw_indexed_indirect(
       renderPassRid,
       indirectBufferRid,
@@ -4466,11 +4251,6 @@ class GPUComputePassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const computePassRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_compute_pass_set_pipeline(computePassRid, pipelineRid);
   }
 
@@ -4545,11 +4325,6 @@ class GPUComputePassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_compute_pass_dispatch_workgroups_indirect(
       computePassRid,
       indirectBufferRid,
@@ -4598,11 +4373,6 @@ class GPUComputePassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const computePassRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4814,11 +4584,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4902,11 +4667,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_pipeline(
       renderBundleEncoderRid,
       pipelineRid,
@@ -4935,11 +4695,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_index_buffer(
       renderBundleEncoderRid,
       bufferRid,
@@ -4969,11 +4724,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 2");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_vertex_buffer(
       renderBundleEncoderRid,
       slot,
@@ -5097,11 +4847,6 @@ class GPURenderBundleEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_draw_indirect(
       renderBundleEncoderRid,
       indirectBufferRid,

From f767220399fb0289ed20524cd544f982c1058c16 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 18 Jul 2024 12:07:05 +0200
Subject: [PATCH 045/226] remove same device check from
 `create_texture_binding`

`view.parent` will have the same `device` as the `view` itself
---
 wgpu-core/src/device/resource.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index f434a89039..8caea3ac6f 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2069,8 +2069,6 @@ impl<A: HalApi> Device<A> {
         used.textures
             .add_single(texture, Some(view.selector.clone()), internal_use);
 
-        texture.same_device_as(view.as_ref())?;
-
         texture.check_usage(pub_usage)?;
 
         used_texture_ranges.push(TextureInitTrackerAction {

From 77e45d46df8829bdd49ca4336fe29e294a579831 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 18 Jul 2024 12:21:26 +0200
Subject: [PATCH 046/226] add missing device valid check in
 `create_texture_view`

---
 wgpu-core/src/device/resource.rs | 2 ++
 wgpu-core/src/resource.rs        | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 8caea3ac6f..0af4516bbb 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -991,6 +991,8 @@ impl<A: HalApi> Device<A> {
         texture: &Arc<Texture<A>>,
         desc: &resource::TextureViewDescriptor,
     ) -> Result<Arc<TextureView<A>>, resource::CreateTextureViewError> {
+        self.check_is_valid()?;
+
         let snatch_guard = texture.device.snatchable_lock.read();
 
         let texture_raw = texture.try_raw(&snatch_guard)?;
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 1929b9f8f7..590de4747f 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -1632,6 +1632,8 @@ impl<A: HalApi> TextureView<A> {
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum CreateTextureViewError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
     #[error("TextureId {0:?} is invalid")]
     InvalidTextureId(TextureId),
     #[error(transparent)]

From 9a0adefe88b2f15d44bd66995c35a8ac61592b36 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 18 Jul 2024 14:53:17 +0200
Subject: [PATCH 047/226] use `ManuallyDrop` instead of `Option` for
 `PendingWrites`

---
 wgpu-core/src/device/global.rs   |  6 +-----
 wgpu-core/src/device/queue.rs    | 21 ++++++---------------
 wgpu-core/src/device/resource.rs | 13 +++++++++----
 wgpu-core/src/resource.rs        |  3 ---
 4 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index e6a9251299..b0003e0352 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2347,11 +2347,7 @@ impl Global {
             // need to wait for submissions or triage them. We know we were
             // just polled, so `life_tracker.free_resources` is empty.
             debug_assert!(device.lock_life().queue_empty());
-            {
-                let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
-                pending_writes.deactivate();
-            }
+            device.pending_writes.lock().deactivate();
 
             drop(device);
         }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 1c7b787428..9f138594d8 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -407,7 +407,6 @@ impl Global {
         // `device.pending_writes.consume`.
         let mut staging_buffer = StagingBuffer::new(device, data_size)?;
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
 
         let staging_buffer = {
             profiling::scope!("copy");
@@ -418,7 +417,7 @@ impl Global {
         let result = self.queue_write_staging_buffer_impl(
             &queue,
             device,
-            pending_writes,
+            &mut pending_writes,
             &staging_buffer,
             buffer_id,
             buffer_offset,
@@ -478,7 +477,6 @@ impl Global {
             .ok_or_else(|| QueueWriteError::Transfer(TransferError::InvalidBufferId(buffer_id)))?;
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
 
         // At this point, we have taken ownership of the staging_buffer from the
         // user. Platform validation requires that the staging buffer always
@@ -489,7 +487,7 @@ impl Global {
         let result = self.queue_write_staging_buffer_impl(
             &queue,
             device,
-            pending_writes,
+            &mut pending_writes,
             &staging_buffer,
             buffer_id,
             buffer_offset,
@@ -713,7 +711,6 @@ impl Global {
             wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64).unwrap();
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
@@ -967,7 +964,7 @@ impl Global {
             extract_texture_selector(&destination.to_untagged(), &size, &dst)?;
 
         let mut pending_writes = device.pending_writes.lock();
-        let encoder = pending_writes.as_mut().unwrap().activate();
+        let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
         // zero *first* as we don't keep track of partial texture layer inits.
@@ -1315,8 +1312,7 @@ impl Global {
                 }
             }
 
-            let mut pending_writes_guard = device.pending_writes.lock();
-            let pending_writes = pending_writes_guard.as_mut().unwrap();
+            let mut pending_writes = device.pending_writes.lock();
 
             {
                 used_surface_textures.set_size(hub.textures.read().len());
@@ -1402,17 +1398,12 @@ impl Global {
             profiling::scope!("cleanup");
 
             // this will register the new submission to the life time tracker
-            let mut pending_write_resources = mem::take(&mut pending_writes.temp_resources);
             device.lock_life().track_submission(
                 submit_index,
-                pending_write_resources.drain(..),
+                pending_writes.temp_resources.drain(..),
                 active_executions,
             );
-
-            // pending_write_resources has been drained, so it's empty, but we
-            // want to retain its heap allocation.
-            pending_writes.temp_resources = pending_write_resources;
-            drop(pending_writes_guard);
+            drop(pending_writes);
 
             // This will schedule destruction of all resources that are no longer needed
             // by the user but used in the command stream, among other things.
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 0af4516bbb..ee943d7fdc 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -46,6 +46,7 @@ use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimensi
 use std::{
     borrow::Cow,
     iter,
+    mem::ManuallyDrop,
     num::NonZeroU32,
     sync::{
         atomic::{AtomicBool, AtomicU64, Ordering},
@@ -142,7 +143,7 @@ pub struct Device<A: HalApi> {
     pub(crate) features: wgt::Features,
     pub(crate) downlevel: wgt::DownlevelCapabilities,
     pub(crate) instance_flags: wgt::InstanceFlags,
-    pub(crate) pending_writes: Mutex<Option<PendingWrites<A>>>,
+    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites<A>>>,
     pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy<A>>>,
     #[cfg(feature = "trace")]
     pub(crate) trace: Mutex<Option<trace::Trace>>,
@@ -169,7 +170,8 @@ impl<A: HalApi> Drop for Device<A> {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
         let raw = self.raw.take().unwrap();
-        let pending_writes = self.pending_writes.lock().take().unwrap();
+        // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
+        let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
         pending_writes.dispose(&raw);
         self.command_allocator.dispose(&raw);
         unsafe {
@@ -307,7 +309,10 @@ impl<A: HalApi> Device<A> {
             features: desc.required_features,
             downlevel,
             instance_flags,
-            pending_writes: Mutex::new(rank::DEVICE_PENDING_WRITES, Some(pending_writes)),
+            pending_writes: Mutex::new(
+                rank::DEVICE_PENDING_WRITES,
+                ManuallyDrop::new(pending_writes),
+            ),
             deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()),
             usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()),
         })
@@ -3611,7 +3616,7 @@ impl<A: HalApi> Device<A> {
 
     /// Wait for idle and remove resources that we can, before we die.
     pub(crate) fn prepare_to_die(&self) {
-        self.pending_writes.lock().as_mut().unwrap().deactivate();
+        self.pending_writes.lock().deactivate();
         let current_index = self
             .last_successful_submission_index
             .load(Ordering::Acquire);
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 590de4747f..6070089e2a 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -657,7 +657,6 @@ impl<A: HalApi> Buffer<A> {
                 }
 
                 let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
 
                 let staging_buffer = staging_buffer.flush();
 
@@ -746,7 +745,6 @@ impl<A: HalApi> Buffer<A> {
         };
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         if pending_writes.contains_buffer(self) {
             pending_writes.consume_temp(temp);
         } else {
@@ -1210,7 +1208,6 @@ impl<A: HalApi> Texture<A> {
         };
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         if pending_writes.contains_texture(self) {
             pending_writes.consume_temp(temp);
         } else {

From 6a1432c132e321526fa7fb00119174f50c1f7251 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 17 Jul 2024 17:05:46 -0400
Subject: [PATCH 048/226] chore: release 22.0.0

---
 CHANGELOG.md          | 25 ++++++++++++++++++++++++-
 Cargo.lock            | 28 ++++++++++++++--------------
 Cargo.toml            | 22 +++++++++++-----------
 d3d12/Cargo.toml      |  2 +-
 naga-cli/Cargo.toml   |  4 ++--
 naga/Cargo.toml       |  2 +-
 naga/fuzz/Cargo.toml  |  2 +-
 wgpu-core/Cargo.toml  |  8 ++++----
 wgpu-hal/Cargo.toml   | 10 +++++-----
 wgpu-types/Cargo.toml |  2 +-
 10 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6bd103a32a..113bbb0efc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,7 +37,27 @@ Bottom level categories:
 - Hal
 -->
 
-## Unreleased
+## 22.0.0 (2024-07-17)
+
+### Overview
+
+### Our first major version release!
+
+For the first time ever, WGPU is being released with a major version (i.e., 22.* instead of 0.22.*)! Maintainership has decided to fully adhere to [Semantic Versioning](https://semver.org/)'s recommendations for versioning production software. According to [SemVer 2.0.0's Q&A about when to use 1.0.0 versions (and beyond)](https://semver.org/spec/v2.0.0.html#how-do-i-know-when-to-release-100):
+
+> ### How do I know when to release 1.0.0?
+> 
+> If your software is being used in production, it should probably already be 1.0.0. If you have a stable API on which users have come to depend, you should be 1.0.0. If you’re worrying a lot about backward compatibility, you should probably already be 1.0.0.
+
+It is a well-known fact that WGPU has been used for applications and platforms already in production for years, at this point. We are often concerned with tracking breaking changes, and affecting these consumers' ability to ship. By releasing our first major version, we publicly acknowledge that this is the case. We encourage other projects in the Rust ecosystem to follow suit.
+
+Note that while we start to use the major version number, WGPU is _not_ "going stable", as many Rust projects do. We anticipate many breaking changes before we fully comply with the WebGPU spec., which we expect to take a small number of years.
+
+### Overview
+
+A major ([pun intended](#our-first-major-version-release)) theme of this release is incremental improvement. Among the typically large set of bug fixes, new features, and other adjustments to WGPU by the many contributors listed below, @wumpf and @teoxoy have merged a series of many simplifications to WGPU's internals and, in one case, to the render and compute pass recording APIs. Many of these change WGPU to use atomically reference-counted resource tracking (i.e., `Arc<…>`), rather than using IDs to manage the lifetimes of platform-specific graphics resources in a registry of separate reference counts. This has led us to diagnose and fix many long-standing bugs, and net some neat performance improvements on the order of 40% or more of some workloads.
+
+While the above is exciting, we acknowledge already finding and fixing some (easy-to-fix) regressions from the above work. If you migrate to WGPU 22 and encounter such bugs, please engage us in the issue tracker right away!
 
 ### Major Changes
 
@@ -46,6 +66,7 @@ Bottom level categories:
 `wgpu::RenderPass` & `wgpu::ComputePass` recording methods (e.g. `wgpu::RenderPass:set_render_pipeline`) no longer impose a lifetime constraint to objects passed to a pass (like pipelines/buffers/bindgroups/query-sets etc.).
 
 This means the following pattern works now as expected:
+
 ```rust
 let mut pipelines: Vec<wgpu::RenderPipeline> = ...;
 // ...
@@ -79,6 +100,7 @@ By @wumpf in [#5569](https://github.com/gfx-rs/wgpu/pull/5569), [#5575](https://
 Wgpu now supports querying [shader compilation info](https://www.w3.org/TR/webgpu/#dom-gpushadermodule-getcompilationinfo).
 
 This allows you to get more structured information about compilation errors, warnings and info:
+
 ```rust
 ...
 let lighting_shader = ctx.device.create_shader_module(include_wgsl!("lighting.wgsl"));
@@ -143,6 +165,7 @@ to pass a compatible surface when targeting WebGL2, having `enumerate_adapters()
 By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 
 ### New features
+
 #### General
 
 - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724)
diff --git a/Cargo.lock b/Cargo.lock
index 5b2f904534..4f9cb57869 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -917,7 +917,7 @@ checksum = "96a6ac251f4a2aca6b3f91340350eab87ae57c3f127ffeb585e92bd336717991"
 
 [[package]]
 name = "d3d12"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bitflags 2.6.0",
  "libloading 0.8.4",
@@ -2179,7 +2179,7 @@ dependencies = [
 
 [[package]]
 name = "naga"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arbitrary",
  "arrayvec 0.7.4",
@@ -2207,7 +2207,7 @@ dependencies = [
 
 [[package]]
 name = "naga-cli"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "argh",
@@ -2679,7 +2679,7 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
 
 [[package]]
 name = "player"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "env_logger",
  "log",
@@ -4205,7 +4205,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arrayvec 0.7.4",
  "cfg_aliases",
@@ -4229,7 +4229,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-benchmark"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bincode",
  "bytemuck",
@@ -4246,7 +4246,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arrayvec 0.7.4",
  "bit-vec",
@@ -4272,7 +4272,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-examples"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bytemuck",
  "cfg-if",
@@ -4303,7 +4303,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "android_system_properties",
  "arrayvec 0.7.4",
@@ -4327,7 +4327,7 @@ dependencies = [
  "js-sys",
  "khronos-egl",
  "libc",
- "libloading 0.8.4",
+ "libloading 0.7.4",
  "log",
  "metal",
  "naga",
@@ -4351,7 +4351,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-info"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "bitflags 2.6.0",
@@ -4364,7 +4364,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-macros"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
@@ -4373,7 +4373,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-test"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "arrayvec 0.7.4",
@@ -4408,7 +4408,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bitflags 2.6.0",
  "js-sys",
diff --git a/Cargo.toml b/Cargo.toml
index 654ed1660d..78ced6d6e5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -47,27 +47,27 @@ keywords = ["graphics"]
 license = "MIT OR Apache-2.0"
 homepage = "https://wgpu.rs/"
 repository = "https://github.com/gfx-rs/wgpu"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 
 [workspace.dependencies.wgc]
 package = "wgpu-core"
 path = "./wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.wgt]
 package = "wgpu-types"
 path = "./wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.hal]
 package = "wgpu-hal"
 path = "./wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.naga]
 path = "./naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies]
 anyhow = "1.0.86"
@@ -125,12 +125,12 @@ static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }
 tracy-client = "0.17"
 thiserror = "1"
-wgpu = { version = "0.20.0", path = "./wgpu", default-features = false }
-wgpu-core = { version = "0.20.0", path = "./wgpu-core" }
+wgpu = { version = "22.0.0", path = "./wgpu", default-features = false }
+wgpu-core = { version = "22.0.0", path = "./wgpu-core" }
 wgpu-example = { version = "0.20.0", path = "./examples/common" }
-wgpu-macros = { version = "0.20.0", path = "./wgpu-macros" }
-wgpu-test = { version = "0.20.0", path = "./tests" }
-wgpu-types = { version = "0.20.0", path = "./wgpu-types" }
+wgpu-macros = { version = "22.0.0", path = "./wgpu-macros" }
+wgpu-test = { version = "22.0.0", path = "./tests" }
+wgpu-types = { version = "22.0.0", path = "./wgpu-types" }
 winit = { version = "0.29", features = ["android-native-activity"] }
 
 # Metal dependencies
@@ -151,7 +151,7 @@ gpu-allocator = { version = "0.26", default-features = false, features = [
     "d3d12",
     "public-winapi",
 ] }
-d3d12 = { version = "0.20.0", path = "./d3d12/" }
+d3d12 = { version = "22.0.0", path = "./d3d12/" }
 range-alloc = "0.1"
 winapi = "0.3"
 hassle-rs = "0.11.0"
diff --git a/d3d12/Cargo.toml b/d3d12/Cargo.toml
index 2c3f721525..a792aeab69 100644
--- a/d3d12/Cargo.toml
+++ b/d3d12/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "d3d12"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 description = "Low level D3D12 API wrapper"
 repository = "https://github.com/gfx-rs/wgpu/tree/trunk/d3d12"
diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml
index 9ffe6e937b..fb999c495a 100644
--- a/naga-cli/Cargo.toml
+++ b/naga-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "naga-cli"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "Shader translation command line tool"
@@ -25,7 +25,7 @@ argh = "0.1.5"
 anyhow.workspace = true
 
 [dependencies.naga]
-version = "0.20.0"
+version = "22.0.0"
 path = "../naga"
 features = [
     "compact",
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 255d93f32d..1bd14e9ee8 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "naga"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "Shader translation infrastructure"
diff --git a/naga/fuzz/Cargo.toml b/naga/fuzz/Cargo.toml
index 196919e441..c4dd8cd1c1 100644
--- a/naga/fuzz/Cargo.toml
+++ b/naga/fuzz/Cargo.toml
@@ -15,7 +15,7 @@ libfuzzer-sys = "0.4"
 
 [target.'cfg(not(any(target_arch = "wasm32", target_os = "ios")))'.dependencies.naga]
 path = ".."
-version = "0.20.0"
+version = "22.0.0"
 features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"]
 
 [[bin]]
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index 2ad5e5a402..2e645a5406 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU core logic on wgpu-hal"
@@ -123,17 +123,17 @@ thiserror = "1"
 
 [dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [dependencies.wgt]
 package = "wgpu-types"
 path = "../wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [dependencies.hal]
 package = "wgpu-hal"
 path = "../wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 default-features = false
 
 [build-dependencies]
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index b079fef630..d0c9ea6d99 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU hardware abstraction layer"
@@ -125,7 +125,7 @@ glow = { version = "0.13.1", optional = true }
 [dependencies.wgt]
 package = "wgpu-types"
 path = "../wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 # backend: Vulkan
@@ -161,7 +161,7 @@ winapi = { version = "0.3", features = [
     "winuser",
     "dcomp",
 ] }
-d3d12 = { path = "../d3d12/", version = "0.20.0", optional = true, features = [
+d3d12 = { path = "../d3d12/", version = "22.0.0", optional = true, features = [
     "libloading",
 ] }
 
@@ -192,7 +192,7 @@ ndk-sys = { version = "0.5.0", optional = true }
 
 [dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [build-dependencies]
 cfg_aliases.workspace = true
@@ -200,7 +200,7 @@ cfg_aliases.workspace = true
 # DEV dependencies
 [dev-dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 features = ["wgsl-in"]
 
 [dev-dependencies]
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index 3c2e6e68bd..915cdde6f0 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU types"

From e883fa7b8b7bdfea0a9cd79a610c726eacebb05e Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 17 Jul 2024 18:43:41 -0400
Subject: [PATCH 049/226] docs(CHANGELOG): add `Unreleased` section

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 113bbb0efc..25a47dd456 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,8 @@ Bottom level categories:
 - Hal
 -->
 
+## Unreleased
+
 ## 22.0.0 (2024-07-17)
 
 ### Overview

From 278d278b28ff7b424dc7dde0c0d7519e8d839ec1 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 17 Jul 2024 18:49:06 -0400
Subject: [PATCH 050/226] chore: remove non-existent `wgpu-example` workspace
 dep.

---
 Cargo.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 78ced6d6e5..2b2ca766a4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -127,7 +127,6 @@ tracy-client = "0.17"
 thiserror = "1"
 wgpu = { version = "22.0.0", path = "./wgpu", default-features = false }
 wgpu-core = { version = "22.0.0", path = "./wgpu-core" }
-wgpu-example = { version = "0.20.0", path = "./examples/common" }
 wgpu-macros = { version = "22.0.0", path = "./wgpu-macros" }
 wgpu-test = { version = "22.0.0", path = "./tests" }
 wgpu-types = { version = "22.0.0", path = "./wgpu-types" }

From 3c3b532cf3fec481ed18c5be4f2fc3d942bbc613 Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Thu, 18 Jul 2024 12:43:39 -0400
Subject: [PATCH 051/226] Remove latest_submission_index (#5976)

* Remove latest_submission_index

* CI

* Comments
---
 benches/Cargo.toml               |   2 +-
 wgpu-core/src/device/global.rs   |  47 +++++++++++---
 wgpu-core/src/device/life.rs     | 106 ++++++++++++++++++++++++++++---
 wgpu-core/src/device/queue.rs    |  85 +++----------------------
 wgpu-core/src/resource.rs        |  52 ++++-----------
 wgpu-core/src/track/buffer.rs    |   5 ++
 wgpu-core/src/track/metadata.rs  |   2 +-
 wgpu-core/src/track/stateless.rs |  11 ----
 wgpu-core/src/track/texture.rs   |   5 ++
 9 files changed, 165 insertions(+), 150 deletions(-)

diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index 1dba81434b..82207d5105 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -43,4 +43,4 @@ pollster.workspace = true
 profiling.workspace = true
 rayon.workspace = true
 tracy-client = { workspace = true, optional = true }
-wgpu = { workspace = true, features = ["wgsl"] }
+wgpu = { workspace = true, features = ["wgsl", "metal", "dx12"] }
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index b0003e0352..5ebd7c7de7 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -19,7 +19,6 @@ use crate::{
     present,
     resource::{
         self, BufferAccessError, BufferAccessResult, BufferMapOperation, CreateBufferError,
-        Trackable,
     },
     storage::Storage,
     Label,
@@ -260,15 +259,25 @@ impl Global {
     ) -> Result<(), WaitIdleError> {
         let hub = A::hub(self);
 
-        let last_submission = match hub.buffers.read().get(buffer_id) {
-            Ok(buffer) => buffer.submission_index(),
+        let device = hub
+            .devices
+            .get(device_id)
+            .map_err(|_| DeviceError::InvalidDeviceId)?;
+
+        let buffer = match hub.buffers.get(buffer_id) {
+            Ok(buffer) => buffer,
             Err(_) => return Ok(()),
         };
 
-        hub.devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?
-            .wait_for_submit(last_submission)
+        let last_submission = device
+            .lock_life()
+            .get_buffer_latest_submission_index(&buffer);
+
+        if let Some(last_submission) = last_submission {
+            device.wait_for_submit(last_submission)
+        } else {
+            Ok(())
+        }
     }
 
     #[doc(hidden)]
@@ -424,7 +433,13 @@ impl Global {
         );
 
         if wait {
-            let last_submit_index = buffer.submission_index();
+            let Some(last_submit_index) = buffer
+                .device
+                .lock_life()
+                .get_buffer_latest_submission_index(&buffer)
+            else {
+                return;
+            };
             match buffer.device.wait_for_submit(last_submit_index) {
                 Ok(()) => (),
                 Err(e) => log::error!("Failed to wait for buffer {:?}: {}", buffer_id, e),
@@ -599,7 +614,13 @@ impl Global {
             }
 
             if wait {
-                let last_submit_index = texture.submission_index();
+                let Some(last_submit_index) = texture
+                    .device
+                    .lock_life()
+                    .get_texture_latest_submission_index(&texture)
+                else {
+                    return;
+                };
                 match texture.device.wait_for_submit(last_submit_index) {
                     Ok(()) => (),
                     Err(e) => log::error!("Failed to wait for texture {texture_id:?}: {e}"),
@@ -672,7 +693,13 @@ impl Global {
             }
 
             if wait {
-                let last_submit_index = view.submission_index();
+                let Some(last_submit_index) = view
+                    .device
+                    .lock_life()
+                    .get_texture_latest_submission_index(&view.parent)
+                else {
+                    return Ok(());
+                };
                 match view.device.wait_for_submit(last_submit_index) {
                     Ok(()) => (),
                     Err(e) => {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 118e1498b4..3696d8abe4 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -5,7 +5,7 @@ use crate::{
     },
     hal_api::HalApi,
     id,
-    resource::{self, Buffer, Labeled, Trackable},
+    resource::{self, Buffer, Labeled, Texture, Trackable},
     snatch::SnatchGuard,
     SubmissionIndex,
 };
@@ -55,6 +55,58 @@ struct ActiveSubmission<A: HalApi> {
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
 
+impl<A: HalApi> ActiveSubmission<A> {
+    /// Returns true if this submission contains the given buffer.
+    ///
+    /// This only uses constant-time operations.
+    pub fn contains_buffer(&self, buffer: &Buffer<A>) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of buffers depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.buffers.contains(buffer) {
+                return true;
+            }
+
+            if encoder
+                .pending_buffers
+                .contains_key(&buffer.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    /// Returns true if this submission contains the given texture.
+    ///
+    /// This only uses constant-time operations.
+    pub fn contains_texture(&self, texture: &Texture<A>) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of textures depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.textures.contains(texture) {
+                return true;
+            }
+
+            if encoder
+                .pending_textures
+                .contains_key(&texture.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum WaitIdleError {
@@ -165,6 +217,40 @@ impl<A: HalApi> LifetimeTracker<A> {
         self.mapped.push(value.clone());
     }
 
+    /// Returns the submission index of the most recent submission that uses the
+    /// given buffer.
+    pub fn get_buffer_latest_submission_index(
+        &self,
+        buffer: &Buffer<A>,
+    ) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_buffer(buffer) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns the submission index of the most recent submission that uses the
+    /// given texture.
+    pub fn get_texture_latest_submission_index(
+        &self,
+        texture: &Texture<A>,
+    ) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_texture(texture) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
     /// Sort out the consequences of completed submissions.
     ///
     /// Assume that all submissions up through `last_done` have completed.
@@ -236,9 +322,7 @@ impl<A: HalApi> LifetimeTracker<A> {
             }
         }
     }
-}
 
-impl<A: HalApi> LifetimeTracker<A> {
     /// Determine which buffers are ready to map, and which must wait for the
     /// GPU.
     ///
@@ -249,17 +333,19 @@ impl<A: HalApi> LifetimeTracker<A> {
         }
 
         for buffer in self.mapped.drain(..) {
-            let submit_index = buffer.submission_index();
+            let submission = self
+                .active
+                .iter_mut()
+                .rev()
+                .find(|a| a.contains_buffer(&buffer));
+
             log::trace!(
-                "Mapping of {} at submission {:?} gets assigned to active {:?}",
+                "Mapping of {} at submission {:?}",
                 buffer.error_ident(),
-                submit_index,
-                self.active.iter().position(|a| a.index == submit_index)
+                submission.as_deref().map(|s| s.index)
             );
 
-            self.active
-                .iter_mut()
-                .find(|a| a.index == submit_index)
+            submission
                 .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
                 .push(buffer);
         }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 9f138594d8..220085f8f7 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -149,12 +149,12 @@ pub enum TempResource<A: HalApi> {
 pub(crate) struct EncoderInFlight<A: HalApi> {
     raw: A::CommandEncoder,
     cmd_buffers: Vec<A::CommandBuffer>,
-    trackers: Tracker<A>,
+    pub(crate) trackers: Tracker<A>,
 
     /// These are the buffers that have been tracked by `PendingWrites`.
-    pending_buffers: Vec<Arc<Buffer<A>>>,
+    pub(crate) pending_buffers: FastHashMap<TrackerIndex, Arc<Buffer<A>>>,
     /// These are the textures that have been tracked by `PendingWrites`.
-    pending_textures: Vec<Arc<Texture<A>>>,
+    pub(crate) pending_textures: FastHashMap<TrackerIndex, Arc<Texture<A>>>,
 }
 
 impl<A: HalApi> EncoderInFlight<A> {
@@ -268,8 +268,8 @@ impl<A: HalApi> PendingWrites<A> {
         queue: &A::Queue,
     ) -> Result<Option<EncoderInFlight<A>>, DeviceError> {
         if self.is_recording {
-            let pending_buffers = self.dst_buffers.drain().map(|(_, b)| b).collect();
-            let pending_textures = self.dst_textures.drain().map(|(_, t)| t).collect();
+            let pending_buffers = mem::take(&mut self.dst_buffers);
+            let pending_textures = mem::take(&mut self.dst_textures);
 
             let cmd_buf = unsafe { self.command_encoder.end_encoding()? };
             self.is_recording = false;
@@ -570,8 +570,6 @@ impl Global {
 
         self.queue_validate_write_buffer_impl(&dst, buffer_offset, staging_buffer.size)?;
 
-        dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
-
         let region = hal::BufferCopy {
             src_offset: 0,
             dst_offset: buffer_offset,
@@ -762,7 +760,6 @@ impl Global {
         // call above. Since we've held `texture_guard` the whole time, we know
         // the texture hasn't gone away in the mean time, so we can unwrap.
         let dst = hub.textures.get(destination.texture).unwrap();
-        dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
         let dst_raw = dst.try_raw(&snatch_guard)?;
 
@@ -1007,7 +1004,6 @@ impl Global {
                     .drain(init_layer_range);
             }
         }
-        dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
         let snatch_guard = device.snatchable_lock.read();
         let dst_raw = dst.try_raw(&snatch_guard)?;
@@ -1126,7 +1122,7 @@ impl Global {
                         }
 
                         {
-                            profiling::scope!("update submission ids");
+                            profiling::scope!("check resource state");
 
                             let cmd_buf_data = cmdbuf.data.lock();
                             let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers;
@@ -1136,7 +1132,6 @@ impl Global {
                                 profiling::scope!("buffers");
                                 for buffer in cmd_buf_trackers.buffers.used_resources() {
                                     buffer.check_destroyed(&snatch_guard)?;
-                                    buffer.use_at(submit_index);
 
                                     match *buffer.map_state.lock() {
                                         BufferMapState::Idle => (),
@@ -1163,7 +1158,6 @@ impl Global {
                                             true
                                         }
                                     };
-                                    texture.use_at(submit_index);
                                     if should_extend {
                                         unsafe {
                                             used_surface_textures
@@ -1177,69 +1171,6 @@ impl Global {
                                     }
                                 }
                             }
-                            {
-                                profiling::scope!("views");
-                                for texture_view in cmd_buf_trackers.views.used_resources() {
-                                    texture_view.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("bind groups (+ referenced views/samplers)");
-                                for bg in cmd_buf_trackers.bind_groups.used_resources() {
-                                    bg.use_at(submit_index);
-                                    // We need to update the submission indices for the contained
-                                    // state-less (!) resources as well, so that they don't get
-                                    // deleted too early if the parent bind group goes out of scope.
-                                    for view in bg.used.views.used_resources() {
-                                        view.use_at(submit_index);
-                                    }
-                                    for sampler in bg.used.samplers.used_resources() {
-                                        sampler.use_at(submit_index);
-                                    }
-                                }
-                            }
-                            {
-                                profiling::scope!("compute pipelines");
-                                for compute_pipeline in
-                                    cmd_buf_trackers.compute_pipelines.used_resources()
-                                {
-                                    compute_pipeline.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("render pipelines");
-                                for render_pipeline in
-                                    cmd_buf_trackers.render_pipelines.used_resources()
-                                {
-                                    render_pipeline.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("query sets");
-                                for query_set in cmd_buf_trackers.query_sets.used_resources() {
-                                    query_set.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!(
-                                    "render bundles (+ referenced pipelines/query sets)"
-                                );
-                                for bundle in cmd_buf_trackers.bundles.used_resources() {
-                                    bundle.use_at(submit_index);
-                                    // We need to update the submission indices for the contained
-                                    // state-less (!) resources as well, excluding the bind groups.
-                                    // They don't get deleted too early if the bundle goes out of scope.
-                                    for render_pipeline in
-                                        bundle.used.render_pipelines.read().used_resources()
-                                    {
-                                        render_pipeline.use_at(submit_index);
-                                    }
-                                    for query_set in bundle.used.query_sets.read().used_resources()
-                                    {
-                                        query_set.use_at(submit_index);
-                                    }
-                                }
-                            }
                         }
                         let mut baked = cmdbuf.from_arc_into_baked();
 
@@ -1303,8 +1234,8 @@ impl Global {
                             raw: baked.encoder,
                             cmd_buffers: baked.list,
                             trackers: baked.trackers,
-                            pending_buffers: Vec::new(),
-                            pending_textures: Vec::new(),
+                            pending_buffers: FastHashMap::default(),
+                            pending_textures: FastHashMap::default(),
                         });
                     }
 
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 6070089e2a..4e94f1731c 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -14,7 +14,7 @@ use crate::{
     resource_log,
     snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable},
     track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex},
-    Label, LabelHelpers, SubmissionIndex,
+    Label, LabelHelpers,
 };
 
 use hal::CommandEncoder;
@@ -28,7 +28,7 @@ use std::{
     mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
-    sync::{atomic::Ordering, Arc, Weak},
+    sync::{Arc, Weak},
 };
 
 /// Information about the wgpu-core resource.
@@ -54,14 +54,6 @@ use std::{
 pub(crate) struct TrackingData {
     tracker_index: TrackerIndex,
     tracker_indices: Arc<SharedTrackerIndexAllocator>,
-    /// The index of the last queue submission in which the resource
-    /// was used.
-    ///
-    /// Each queue submission is fenced and assigned an index number
-    /// sequentially. Thus, when a queue submission completes, we know any
-    /// resources used in that submission and any lower-numbered submissions are
-    /// no longer in use by the GPU.
-    submission_index: hal::AtomicFenceValue,
 }
 
 impl Drop for TrackingData {
@@ -75,23 +67,12 @@ impl TrackingData {
         Self {
             tracker_index: tracker_indices.alloc(),
             tracker_indices,
-            submission_index: hal::AtomicFenceValue::new(0),
         }
     }
 
     pub(crate) fn tracker_index(&self) -> TrackerIndex {
         self.tracker_index
     }
-
-    /// Record that this resource will be used by the queue submission with the
-    /// given index.
-    pub(crate) fn use_at(&self, submit_index: SubmissionIndex) {
-        self.submission_index.store(submit_index, Ordering::Release);
-    }
-
-    pub(crate) fn submission_index(&self) -> SubmissionIndex {
-        self.submission_index.load(Ordering::Acquire)
-    }
 }
 
 #[derive(Clone, Debug)]
@@ -193,10 +174,6 @@ macro_rules! impl_labeled {
 
 pub(crate) trait Trackable: Labeled {
     fn tracker_index(&self) -> TrackerIndex;
-    /// Record that this resource will be used by the queue submission with the
-    /// given index.
-    fn use_at(&self, submit_index: SubmissionIndex);
-    fn submission_index(&self) -> SubmissionIndex;
 }
 
 #[macro_export]
@@ -206,12 +183,6 @@ macro_rules! impl_trackable {
             fn tracker_index(&self) -> $crate::track::TrackerIndex {
                 self.tracking_data.tracker_index()
             }
-            fn use_at(&self, submit_index: $crate::SubmissionIndex) {
-                self.tracking_data.use_at(submit_index)
-            }
-            fn submission_index(&self) -> $crate::SubmissionIndex {
-                self.tracking_data.submission_index()
-            }
         }
     };
 }
@@ -660,7 +631,6 @@ impl<A: HalApi> Buffer<A> {
 
                 let staging_buffer = staging_buffer.flush();
 
-                self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
                     src_offset: 0,
                     dst_offset: 0,
@@ -748,10 +718,11 @@ impl<A: HalApi> Buffer<A> {
         if pending_writes.contains_buffer(self) {
             pending_writes.consume_temp(temp);
         } else {
-            let last_submit_index = self.submission_index();
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut life_lock = device.lock_life();
+            let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
+            if let Some(last_submit_index) = last_submit_index {
+                life_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
@@ -1211,10 +1182,11 @@ impl<A: HalApi> Texture<A> {
         if pending_writes.contains_texture(self) {
             pending_writes.consume_temp(temp);
         } else {
-            let last_submit_index = self.submission_index();
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut life_lock = device.lock_life();
+            let last_submit_index = life_lock.get_texture_latest_submission_index(self);
+            if let Some(last_submit_index) = last_submit_index {
+                life_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index a7ec8201fc..afb20e149d 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -277,6 +277,11 @@ impl<A: HalApi> BufferTracker<A> {
         }
     }
 
+    /// Returns true if the given buffer is tracked.
+    pub fn contains(&self, buffer: &Buffer<A>) -> bool {
+        self.metadata.contains(buffer.tracker_index().as_usize())
+    }
+
     /// Returns a list of all buffers tracked.
     pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ {
         self.metadata.owned_resources()
diff --git a/wgpu-core/src/track/metadata.rs b/wgpu-core/src/track/metadata.rs
index d7d63f04fa..855282d72c 100644
--- a/wgpu-core/src/track/metadata.rs
+++ b/wgpu-core/src/track/metadata.rs
@@ -67,7 +67,7 @@ impl<T: Clone> ResourceMetadata<T> {
 
     /// Returns true if the set contains the resource with the given index.
     pub(super) fn contains(&self, index: usize) -> bool {
-        self.owned[index]
+        self.owned.get(index).unwrap_or(false)
     }
 
     /// Returns true if the set contains the resource with the given index.
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 06779540d7..7d8d904d2a 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -34,12 +34,6 @@ impl<T: Trackable> StatelessBindGroupState<T> {
         resources.sort_unstable_by_key(|resource| resource.tracker_index());
     }
 
-    /// Returns a list of all resources tracked. May contain duplicates.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        let resources = self.resources.lock();
-        resources.iter().cloned().collect::<Vec<_>>().into_iter()
-    }
-
     /// Adds the given resource.
     pub fn add_single(&self, resource: &Arc<T>) {
         let mut resources = self.resources.lock();
@@ -79,11 +73,6 @@ impl<T: Trackable> StatelessTracker<T> {
         }
     }
 
-    /// Returns a list of all resources tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        self.metadata.owned_resources()
-    }
-
     /// Inserts a single resource into the resource tracker.
     ///
     /// If the resource already exists in the tracker, it will be overwritten.
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index d34f47e128..bad216db19 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -446,6 +446,11 @@ impl<A: HalApi> TextureTracker<A> {
         }
     }
 
+    /// Returns true if the tracker owns the given texture.
+    pub fn contains(&self, texture: &Texture<A>) -> bool {
+        self.metadata.contains(texture.tracker_index().as_usize())
+    }
+
     /// Returns a list of all textures tracked.
     pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ {
         self.metadata.owned_resources()

From 80921f4720dd495040d17bfedf0e3a86d9d6ddb1 Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Thu, 18 Jul 2024 16:56:30 -0400
Subject: [PATCH 052/226] Fix Codecov Timeout Issues (#5985)

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a8ffaf1dfd..cd254fda6a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -568,6 +568,7 @@ jobs:
         if: steps.coverage.outcome == 'success'
         with:
           files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
 
   doctest:
     # runtime is normally 2 minutes

From 8aed6ed2206ceadd55ed7c2feafd6b76b4dab7d0 Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Fri, 19 Jul 2024 01:18:32 +0200
Subject: [PATCH 053/226] Update gpu-allocator dependency to 0.27 (#5982)

---
 Cargo.lock          | 29 +++++------------------------
 wgpu-hal/Cargo.toml |  2 +-
 2 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4f9cb57869..b36c3efc98 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1522,7 +1522,7 @@ dependencies = [
  "libc",
  "log",
  "rustversion",
- "windows 0.54.0",
+ "windows",
 ]
 
 [[package]]
@@ -1678,15 +1678,15 @@ dependencies = [
 
 [[package]]
 name = "gpu-allocator"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd4240fc91d3433d5e5b0fc5b67672d771850dc19bbee03c1381e19322803d7"
+checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd"
 dependencies = [
  "log",
  "presser",
  "thiserror",
  "winapi",
- "windows 0.52.0",
+ "windows",
 ]
 
 [[package]]
@@ -4467,32 +4467,13 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
-[[package]]
-name = "windows"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
-dependencies = [
- "windows-core 0.52.0",
- "windows-targets 0.52.5",
-]
-
 [[package]]
 name = "windows"
 version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
 dependencies = [
- "windows-core 0.54.0",
- "windows-targets 0.52.5",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
-dependencies = [
+ "windows-core",
  "windows-targets 0.52.5",
 ]
 
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index d0c9ea6d99..b8834dc705 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -147,7 +147,7 @@ libloading = { version = ">=0.7, <0.9", optional = true }
 # backend: Dx12
 bit-set = { version = "0.6", optional = true }
 range-alloc = { version = "0.1", optional = true }
-gpu-allocator = { version = "0.26", default-features = false, features = [
+gpu-allocator = { version = "0.27", default-features = false, features = [
     "d3d12",
     "public-winapi",
 ], optional = true }

From 20973d1cdcfb35dedf71e0889bb9f1944e464110 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Fri, 19 Jul 2024 11:56:16 +0700
Subject: [PATCH 054/226] Fix profiling with tracy. (#5988)

The profiling APIs require a `&str`, but since the label here
is now an `Option<String>`, we must get a `&str` from it.
---
 CHANGELOG.md                    | 6 ++++++
 wgpu-core/src/command/render.rs | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 25a47dd456..149c58cde9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,12 @@ Bottom level categories:
 
 ## Unreleased
 
+### Bug Fixes
+
+#### General
+
+- Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
+
 ## 22.0.0 (2024-07-17)
 
 ### Overview
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index b9c760b67d..130c04704c 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1566,7 +1566,7 @@ impl Global {
 
         profiling::scope!(
             "CommandEncoder::run_render_pass {}",
-            base.label.unwrap_or("")
+            base.label.as_deref().unwrap_or("")
         );
 
         let Some(cmd_buf) = pass.parent.as_ref() else {

From bc7622f641fc75b5f96cafd895d111cac4f022f5 Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Thu, 18 Jul 2024 17:35:26 +0200
Subject: [PATCH 055/226] Expose GPU allocation reports in wgpu, wgpu-core and
 wgpu-hal

---
 wgpu-core/src/device/global.rs   | 11 ++++
 wgpu-core/src/device/resource.rs |  7 +++
 wgpu-hal/src/dx12/device.rs      | 37 +++++++++++++
 wgpu-hal/src/lib.rs              |  4 ++
 wgpu-types/src/counters.rs       | 90 +++++++++++++++++++++++++++++++-
 wgpu/src/backend/webgpu.rs       |  8 +++
 wgpu/src/backend/wgpu_core.rs    |  8 +++
 wgpu/src/context.rs              | 22 ++++++++
 wgpu/src/lib.rs                  |  9 ++++
 9 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 5ebd7c7de7..0942fda46c 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2458,6 +2458,17 @@ impl Global {
         }
     }
 
+    pub fn device_generate_allocator_report<A: HalApi>(
+        &self,
+        device_id: DeviceId,
+    ) -> Option<wgt::AllocatorReport> {
+        let hub = A::hub(self);
+        hub.devices
+            .get(device_id)
+            .ok()
+            .and_then(|device| device.generate_allocator_report())
+    }
+
     pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
         profiling::scope!("Queue::drop");
         api_log!("Queue::drop {queue_id:?}");
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index ee943d7fdc..25f95f8a2a 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3598,6 +3598,13 @@ impl<A: HalApi> Device<A> {
             .map(|raw| raw.get_internal_counters())
             .unwrap_or_default()
     }
+
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        self.raw
+            .as_ref()
+            .map(|raw| raw.generate_allocator_report())
+            .unwrap_or_default()
+    }
 }
 
 impl<A: HalApi> Device<A> {
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 27b3002431..e886e2fd04 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1801,4 +1801,41 @@ impl crate::Device for super::Device {
     fn get_internal_counters(&self) -> wgt::HalCounters {
         self.counters.clone()
     }
+
+    #[cfg(feature = "windows_rs")]
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        let mut upstream = {
+            self.mem_allocator
+                .as_ref()?
+                .lock()
+                .allocator
+                .generate_report()
+        };
+
+        let allocations = upstream
+            .allocations
+            .iter_mut()
+            .map(|alloc| wgt::AllocationReport {
+                name: std::mem::take(&mut alloc.name),
+                offset: alloc.offset,
+                size: alloc.size,
+            })
+            .collect();
+
+        let blocks = upstream
+            .blocks
+            .iter()
+            .map(|block| wgt::MemoryBlockReport {
+                size: block.size,
+                allocations: block.allocations.clone(),
+            })
+            .collect();
+
+        Some(wgt::AllocatorReport {
+            allocations,
+            blocks,
+            total_allocated_bytes: upstream.total_allocated_bytes,
+            total_reserved_bytes: upstream.total_reserved_bytes,
+        })
+    }
 }
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index b28a005a7a..bd047b5ff6 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -894,6 +894,10 @@ pub trait Device: WasmNotSendSync {
     );
 
     fn get_internal_counters(&self) -> wgt::HalCounters;
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        None
+    }
 }
 
 pub trait Queue: WasmNotSendSync {
diff --git a/wgpu-types/src/counters.rs b/wgpu-types/src/counters.rs
index 9dfa739f8b..d0f9a5ea18 100644
--- a/wgpu-types/src/counters.rs
+++ b/wgpu-types/src/counters.rs
@@ -1,5 +1,6 @@
 #[cfg(feature = "counters")]
 use std::sync::atomic::{AtomicIsize, Ordering};
+use std::{fmt, ops::Range};
 
 /// An internal counter for debugging purposes
 ///
@@ -128,7 +129,7 @@ pub struct HalCounters {
 /// `wgpu-core`'s internal counters.
 #[derive(Clone, Default)]
 pub struct CoreCounters {
-    // TODO
+    // TODO    #[cfg(features=)]
 }
 
 /// All internal counters, exposed for debugging purposes.
@@ -139,3 +140,90 @@ pub struct InternalCounters {
     /// `wgpu-hal` counters.
     pub hal: HalCounters,
 }
+
+/// Describes an allocation in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct AllocationReport {
+    /// The name provided to the `allocate()` function.
+    pub name: String,
+    /// The offset in bytes of the allocation in its memory block.
+    pub offset: u64,
+    /// The size in bytes of the allocation.
+    pub size: u64,
+}
+
+/// Describes a memory block in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct MemoryBlockReport {
+    /// The size in bytes of this memory block.
+    pub size: u64,
+    /// The range of allocations in [`AllocatorReport::allocations`] that are associated
+    /// to this memory block.
+    pub allocations: Range<usize>,
+}
+
+/// A report that can be generated for informational purposes using `Allocator::generate_report()`.
+#[derive(Clone)]
+pub struct AllocatorReport {
+    /// All live allocations, sub-allocated from memory blocks.
+    pub allocations: Vec<AllocationReport>,
+    /// All memory blocks.
+    pub blocks: Vec<MemoryBlockReport>,
+    /// Sum of the memory used by all allocations, in bytes.
+    pub total_allocated_bytes: u64,
+    /// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
+    pub total_reserved_bytes: u64,
+}
+
+impl fmt::Debug for AllocationReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let name = if !self.name.is_empty() {
+            self.name.as_str()
+        } else {
+            "--"
+        };
+        write!(f, "{name:?}: {}", FmtBytes(self.size))
+    }
+}
+
+impl fmt::Debug for AllocatorReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut allocations = self.allocations.clone();
+        allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));
+
+        let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
+        allocations.truncate(max_num_allocations_to_print);
+
+        f.debug_struct("AllocatorReport")
+            .field(
+                "summary",
+                &std::format_args!(
+                    "{} / {}",
+                    FmtBytes(self.total_allocated_bytes),
+                    FmtBytes(self.total_reserved_bytes)
+                ),
+            )
+            .field("blocks", &self.blocks.len())
+            .field("allocations", &self.allocations.len())
+            .field("largest", &allocations.as_slice())
+            .finish()
+    }
+}
+
+struct FmtBytes(u64);
+
+impl fmt::Display for FmtBytes {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
+        let mut idx = 0;
+        let mut amount = self.0 as f64;
+        loop {
+            if amount < 1024.0 || idx == SUFFIX.len() - 1 {
+                return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
+            }
+
+            amount /= 1024.0;
+            idx += 1;
+        }
+    }
+}
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 8e158359c2..be3d9b42cd 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -2986,6 +2986,14 @@ impl crate::context::Context for ContextWebGpu {
         Default::default()
     }
 
+    fn device_generate_allocator_report(
+        &self,
+        _device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        None
+    }
+
     fn pipeline_cache_get_data(
         &self,
         _: &Self::PipelineCacheId,
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 91629d638c..88e0a9f503 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -2367,6 +2367,14 @@ impl crate::Context for ContextWgpuCore {
         wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
     }
 
+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device))
+    }
+
     fn pipeline_cache_get_data(
         &self,
         cache: &Self::PipelineCacheId,
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 7ff2adbaf7..2c2c82c4bc 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -618,6 +618,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         _device_data: &Self::DeviceData,
     ) -> wgt::InternalCounters;
 
+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport>;
+
     fn pipeline_cache_get_data(
         &self,
         cache: &Self::PipelineCacheId,
@@ -1617,6 +1623,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         device_data: &crate::Data,
     ) -> wgt::InternalCounters;
 
+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport>;
+
     fn pipeline_cache_get_data(
         &self,
         cache: &ObjectId,
@@ -3101,6 +3113,16 @@ where
         Context::device_get_internal_counters(self, &device, device_data)
     }
 
+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport> {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        Context::device_generate_allocator_report(self, &device, device_data)
+    }
+
     fn pipeline_cache_get_data(
         &self,
         cache: &ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index d895b696cf..04ce09aa7d 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -3238,6 +3238,15 @@ impl Device {
         DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
     }
 
+    /// Generate an GPU memory allocation report if the underlying backend supports it.
+    ///
+    /// Backends that do not support producing these reports return `None`. A backend may
+    /// Support it and still return `None` if it is not using performing sub-allocation,
+    /// for example as a workaround for driver issues.
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
+    }
+
     /// Apply a callback to this `Device`'s underlying backend device.
     ///
     /// If this `Device` is implemented by the backend API given by `A` (Vulkan,

From 6cd387412f049ccbd1c2467181caa1250b587e6a Mon Sep 17 00:00:00 2001
From: Brad Werth <werth@efn.org>
Date: Tue, 4 Jun 2024 10:11:03 -0700
Subject: [PATCH 056/226] Remove vertex_pulling_transfrom from
 PipelineCompilationOptions.

This option was only evaluated for Metal backends, and now it's required
there so the option is going away. It is still configurable for tests
via the PipelineOptions struct, deserialized from .ron files.

This also fixes some type problems with the unpack functions in
writer.rs. Metal << operator extends operand to int-sized, which then
has to be cast back down to the real size before as_type bit conversion.
The math for the snorm values is corrected, in some cases using the
metal unpack_snorm2x16_to_float function because we can't directly
cast a bit-shifted ushort value to half.
---
 deno_webgpu/pipeline.rs                       |   3 -
 naga/CHANGELOG.md                             |   1 +
 naga/src/back/msl/mod.rs                      |   4 +-
 naga/src/back/msl/writer.rs                   |  68 +--
 tests/tests/root.rs                           |   1 +
 tests/tests/vertex_formats/draw.vert.wgsl     | 316 ++++++++++++++
 tests/tests/vertex_formats/mod.rs             | 388 ++++++++++++++++++
 tests/tests/vertex_indices/mod.rs             |  53 +--
 wgpu-core/src/device/global.rs                |   3 -
 wgpu-core/src/device/resource.rs              |   3 -
 wgpu-core/src/pipeline.rs                     |   4 -
 wgpu-hal/examples/halmark/main.rs             |   2 -
 wgpu-hal/examples/ray-traced-triangle/main.rs |   1 -
 wgpu-hal/src/lib.rs                           |   3 -
 wgpu-hal/src/metal/device.rs                  |   2 +-
 wgpu/src/backend/wgpu_core.rs                 |   6 -
 wgpu/src/lib.rs                               |   3 -
 17 files changed, 756 insertions(+), 105 deletions(-)
 create mode 100644 tests/tests/vertex_formats/draw.vert.wgsl
 create mode 100644 tests/tests/vertex_formats/mod.rs

diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index f925705119..86d530332f 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -112,7 +112,6 @@ pub fn op_webgpu_create_compute_pipeline(
             entry_point: compute.entry_point.map(Cow::from),
             constants: Cow::Owned(compute.constants.unwrap_or_default()),
             zero_initialize_workgroup_memory: true,
-            vertex_pulling_transform: false,
         },
         cache: None,
     };
@@ -348,7 +347,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(fragment.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             targets: Cow::Owned(fragment.targets),
         })
@@ -374,7 +372,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             buffers: Cow::Owned(vertex_buffers),
         },
diff --git a/naga/CHANGELOG.md b/naga/CHANGELOG.md
index 2a00f01f86..49cde4e212 100644
--- a/naga/CHANGELOG.md
+++ b/naga/CHANGELOG.md
@@ -81,6 +81,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md).
 - Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop**
 - Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth**
 - Fixup some generated MSL for vertex buffer unpack functions. ([#5829](https://github.com/gfx-rs/wgpu/pull/5829)) **@bradwerth**
+- Make vertex pulling transform on by default. ([#5773](https://github.com/gfx-rs/wgpu/pull/5773)) **@bradwerth**
 
 #### GLSL-OUT
 
diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs
index 3b33ee7a71..626475debc 100644
--- a/naga/src/back/msl/mod.rs
+++ b/naga/src/back/msl/mod.rs
@@ -354,7 +354,9 @@ pub struct PipelineOptions {
     /// to receive the vertex buffers, lengths, and vertex id as args,
     /// and bounds-check the vertex id and use the index into the
     /// vertex buffers to access attributes, rather than using Metal's
-    /// [[stage-in]] assembled attribute data.
+    /// [[stage-in]] assembled attribute data. This is true by default,
+    /// but remains configurable for use by tests via deserialization
+    /// of this struct. There is no user-facing way to set this value.
     pub vertex_pulling_transform: bool,
 
     /// vertex_buffer_mappings are used during shader translation to
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index 8b86897007..7ec22009bd 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -3953,8 +3953,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2((float(b0) - 128.0f) / 255.0f, \
-                                            (float(b1) - 128.0f) / 255.0f);",
+                    "{}return metal::float2(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b1) / 127.0f));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -3971,10 +3971,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float4((float(b0) - 128.0f) / 255.0f, \
-                                            (float(b1) - 128.0f) / 255.0f, \
-                                            (float(b2) - 128.0f) / 255.0f, \
-                                            (float(b3) - 128.0f) / 255.0f);",
+                    "{}return metal::float4(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b1) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b2) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b3) / 127.0f));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4033,8 +4033,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int2(as_type<metal::short>(b1 << 8 | b0), \
-                                          as_type<metal::short>(b3 << 8 | b2));",
+                    "{}return metal::int2(as_type<short>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<short>(metal::ushort(b3 << 8 | b2)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4055,10 +4055,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int4(as_type<metal::short>(b1 << 8 | b0), \
-                                          as_type<metal::short>(b3 << 8 | b2), \
-                                          as_type<metal::short>(b5 << 8 | b4), \
-                                          as_type<metal::short>(b7 << 8 | b6));",
+                    "{}return metal::int4(as_type<short>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<short>(metal::ushort(b3 << 8 | b2)), \
+                                          as_type<short>(metal::ushort(b5 << 8 | b4)), \
+                                          as_type<short>(metal::ushort(b7 << 8 | b6)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4117,8 +4117,7 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
-                                            (float(b3 << 8 | b2) - 32767.0f) / 65535.0f);",
+                    "{}return metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2);",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4139,10 +4138,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float4((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
-                                            (float(b3 << 8 | b2) - 32767.0f) / 65535.0f, \
-                                            (float(b5 << 8 | b4) - 32767.0f) / 65535.0f, \
-                                            (float(b7 << 8 | b6) - 32767.0f) / 65535.0f);",
+                    "{}return metal::float4(metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2), \
+                                            metal::unpack_snorm2x16_to_float(b5 << 24 | b4 << 16 | b7 << 8 | b6));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4159,8 +4156,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2(as_type<metal::half>(b1 << 8 | b0), \
-                                            as_type<metal::half>(b3 << 8 | b2));",
+                    "{}return metal::float2(as_type<half>(metal::ushort(b1 << 8 | b0)), \
+                                            as_type<half>(metal::ushort(b3 << 8 | b2)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4170,7 +4167,7 @@ impl<W: Write> Writer<W> {
                 let name = self.namer.call("unpackFloat16x4");
                 writeln!(
                     self.out,
-                    "metal::int4 {name}(metal::ushort b0, \
+                    "metal::float4 {name}(metal::ushort b0, \
                                         metal::ushort b1, \
                                         metal::ushort b2, \
                                         metal::ushort b3, \
@@ -4181,10 +4178,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int4(as_type<metal::half>(b1 << 8 | b0), \
-                                          as_type<metal::half>(b3 << 8 | b2), \
-                                          as_type<metal::half>(b5 << 8 | b4), \
-                                          as_type<metal::half>(b7 << 8 | b6));",
+                    "{}return metal::float4(as_type<half>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<half>(metal::ushort(b3 << 8 | b2)), \
+                                          as_type<half>(metal::ushort(b5 << 8 | b4)), \
+                                          as_type<half>(metal::ushort(b7 << 8 | b6)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4390,10 +4387,10 @@ impl<W: Write> Writer<W> {
                 let name = self.namer.call("unpackSint32");
                 writeln!(
                     self.out,
-                    "metal::int {name}(uint b0, \
-                                       uint b1, \
-                                       uint b2, \
-                                       uint b3) {{"
+                    "int {name}(uint b0, \
+                                uint b1, \
+                                uint b2, \
+                                uint b3) {{"
                 )?;
                 writeln!(
                     self.out,
@@ -4495,7 +4492,18 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
+                    // The following is correct for RGBA packing, but our format seems to
+                    // match ABGR, which can be fed into the Metal builtin function
+                    // unpack_unorm10a2_to_float.
+                    /*
+                    "{}uint v = (b3 << 24 | b2 << 16 | b1 << 8 | b0); \
+                       uint r = (v & 0xFFC00000) >> 22; \
+                       uint g = (v & 0x003FF000) >> 12; \
+                       uint b = (v & 0x00000FFC) >> 2; \
+                       uint a = (v & 0x00000003); \
+                       return metal::float4(float(r) / 1023.0f, float(g) / 1023.0f, float(b) / 1023.0f, float(a) / 3.0f);",
+                    */
+                    "{}return metal::unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 6ceb3818df..384cfcf78f 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -42,6 +42,7 @@ mod subgroup_operations;
 mod texture_bounds;
 mod texture_view_creation;
 mod transfer;
+mod vertex_formats;
 mod vertex_indices;
 mod write_texture;
 mod zero_init_texture_after_discard;
diff --git a/tests/tests/vertex_formats/draw.vert.wgsl b/tests/tests/vertex_formats/draw.vert.wgsl
new file mode 100644
index 0000000000..bf6a08aac6
--- /dev/null
+++ b/tests/tests/vertex_formats/draw.vert.wgsl
@@ -0,0 +1,316 @@
+@group(0) @binding(0)
+var<storage, read_write> checksums: array<f32>;
+
+const index_uint = 0u;
+const index_sint = 1u;
+const index_unorm = 2u;
+const index_snorm = 3u;
+const index_float16 = 4u;
+const index_float32 = 5u;
+
+fn init_checksums() {
+  checksums[index_uint] = 0.0;
+  checksums[index_sint] = 0.0;
+  checksums[index_unorm] = 0.0;
+  checksums[index_snorm] = 0.0;
+  checksums[index_float16] = 0.0;
+  checksums[index_float32] = 0.0;
+}
+
+// Break down the 31 vertex formats specified at
+// https://gpuweb.github.io/gpuweb/#vertex-formats into blocks
+// of 8, to keep under the limits of max locations. Each
+// AttributeBlockX structure will get a corresponding
+// vertex_block_X function to process its attributes into
+// values written to the checksums buffer.
+
+struct AttributeBlock0 {
+  // 4-byte-aligned unorm formats
+  @location(0) unorm8x4: vec4<f32>,
+  @location(1) unorm16x2: vec2<f32>,
+  @location(2) unorm16x4: vec4<f32>,
+
+  // 4-byte-aligned snorm formats
+  @location(3) snorm8x4: vec4<f32>,
+  @location(4) snorm16x2: vec2<f32>,
+  @location(5) snorm16x4: vec4<f32>,
+
+  // 2-byte-aligned formats
+  @location(6) unorm8x2: vec2<f32>,
+  @location(7) snorm8x2: vec2<f32>,
+}
+
+@vertex
+fn vertex_block_0(v_in: AttributeBlock0) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.w);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.w);
+
+  checksums[index_unorm] = f32(all_unorm);
+
+  // Accumulate all snorm into one checksum value.
+  var all_snorm: f32 = 0.0;
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.w);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.w);
+
+  checksums[index_snorm] = f32(all_snorm);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock1 {
+  // 4-byte-aligned uint formats
+  @location(0) uint8x4: vec4<u32>,
+  @location(1) uint16x2: vec2<u32>,
+  @location(2) uint16x4: vec4<u32>,
+
+  // 4-byte-aligned sint formats
+  @location(3) sint8x4: vec4<i32>,
+  @location(4) sint16x2: vec2<i32>,
+  @location(5) sint16x4: vec4<i32>,
+
+  // 2-byte-aligned formats
+  @location(6) uint8x2: vec2<u32>,
+  @location(7) sint8x2: vec2<i32>,
+}
+
+@vertex
+fn vertex_block_1(v_in: AttributeBlock1) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.w);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.w);
+
+  checksums[index_uint] = f32(all_uint);
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.w);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.w);
+
+  checksums[index_sint] = f32(all_sint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock2 {
+  @location(0) uint32: u32,
+  @location(1) uint32x2: vec2<u32>,
+  @location(2) uint32x3: vec3<u32>,
+  @location(3) uint32x4: vec4<u32>,
+}
+
+@vertex
+fn vertex_block_2(v_in: AttributeBlock2) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint32);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.z);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.w);
+
+  checksums[index_uint] = f32(all_uint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock3 {
+  @location(0) sint32: i32,
+  @location(1) sint32x2: vec2<i32>,
+  @location(2) sint32x3: vec3<i32>,
+  @location(3) sint32x4: vec4<i32>,
+}
+
+@vertex
+fn vertex_block_3(v_in: AttributeBlock3) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint32);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.z);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.w);
+
+  checksums[index_sint] = f32(all_sint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock4{
+  @location(0) float32: f32,
+  @location(1) float32x2: vec2<f32>,
+  @location(2) float32x3: vec3<f32>,
+  @location(3) float32x4: vec4<f32>,
+  @location(4) float16x2: vec2<f32>,
+  @location(5) float16x4: vec4<f32>,
+}
+
+@vertex
+fn vertex_block_4(v_in: AttributeBlock4) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all float32 into one checksum value.
+  var all_float32: f32 = 0.0;
+  all_float32 = accumulate_float32(all_float32, v_in.float32);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.y);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.z);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.z);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.w);
+
+  checksums[index_float32] = f32(all_float32);
+
+  // Accumulate all float16 into one checksum value.
+  var all_float16: f32 = 0.0;
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.y);
+
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.y);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.z);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.w);
+
+  checksums[index_float16] = f32(all_float16);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock5{
+  @location(0) unorm10_10_10_2: vec4<f32>,
+}
+
+@vertex
+fn vertex_block_5(v_in: AttributeBlock5) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.w);
+
+  checksums[index_unorm] = f32(all_unorm);
+
+  return vec4(0.0);
+}
+
+fn accumulate_uint(accum: u32, val: u32) -> u32 {
+  return accum + val;
+}
+
+fn accumulate_sint(accum: i32, val: i32) -> i32 {
+  return accum + val;
+}
+
+fn accumulate_unorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_snorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float16(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float32(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+@fragment
+fn fragment_main() -> @location(0) vec4<f32> {
+    return vec4<f32>(0.0);
+}
diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs
new file mode 100644
index 0000000000..1d6aca5968
--- /dev/null
+++ b/tests/tests/vertex_formats/mod.rs
@@ -0,0 +1,388 @@
+//! Tests that vertex formats pass through to vertex shaders accurately.
+
+use std::num::NonZeroU64;
+
+use wgpu::util::{BufferInitDescriptor, DeviceExt};
+
+use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext};
+
+#[derive(Debug, Copy, Clone)]
+enum TestCase {
+    UnormsAndSnorms,
+    UintsAndSintsSmall,
+    UintsBig,
+    SintsBig,
+    Floats,
+    Unorm1010102,
+}
+
+struct Test<'a> {
+    case: TestCase,
+    entry_point: &'a str,
+    attributes: &'a [wgt::VertexAttribute],
+    input: &'a [u8],
+    checksums: &'a [f32],
+}
+
+async fn vertex_formats_all(ctx: TestingContext) {
+    let attributes_block_0 = &wgpu::vertex_attr_array![
+        0 => Unorm8x4,
+        1 => Unorm16x2,
+        2 => Unorm16x4,
+        3 => Snorm8x4,
+        4 => Snorm16x2,
+        5 => Snorm16x4,
+        6 => Unorm8x2,
+        7 => Snorm8x2,
+    ];
+
+    let attributes_block_1 = &wgpu::vertex_attr_array![
+        0 => Uint8x4,
+        1 => Uint16x2,
+        2 => Uint16x4,
+        3 => Sint8x4,
+        4 => Sint16x2,
+        5 => Sint16x4,
+        6 => Uint8x2,
+        7 => Sint8x2,
+    ];
+
+    let attributes_block_2 = &wgpu::vertex_attr_array![
+        0 => Uint32,
+        1 => Uint32x2,
+        2 => Uint32x3,
+        3 => Uint32x4,
+    ];
+
+    let attributes_block_3 = &wgpu::vertex_attr_array![
+        0 => Sint32,
+        1 => Sint32x2,
+        2 => Sint32x3,
+        3 => Sint32x4,
+    ];
+
+    let attributes_block_4 = &wgpu::vertex_attr_array![
+        0 => Float32,
+        1 => Float32x2,
+        2 => Float32x3,
+        3 => Float32x4,
+        4 => Float16x2,
+        5 => Float16x4,
+    ];
+
+    let tests = vec![
+        Test {
+            case: TestCase::UnormsAndSnorms,
+            entry_point: "vertex_block_0",
+            attributes: attributes_block_0,
+            input: &[
+                128u8, 128u8, 128u8, 128u8, // Unorm8x4 (0.5, 0.5, 0.5, 0.5)
+                0u8, 128u8, 0u8, 128u8, // Unorm16x2 (0.5, 0.5)
+                0u8, 64u8, 0u8, 64u8, 0u8, 64u8, 0u8,
+                64u8, // Unorm16x4 (0.25, 0.25, 0.25, 0.25)
+                127u8, 127u8, 127u8, 127u8, // Snorm8x4 (1, 1, 1, 1)
+                0u8, 128u8, 0u8, 128u8, // Snorm16x2 (-1, -1)
+                255u8, 127u8, 255u8, 127u8, 255u8, 127u8, 255u8,
+                127u8, // Snorm16x4 (1, 1, 1, 1)
+                255u8, 255u8, // Unorm8x2 (1, 1)
+                128u8, 128u8, // Snorm8x2 (-1, -1)
+            ],
+            checksums: &[0.0, 0.0, 6.0, 4.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::UintsAndSintsSmall,
+            entry_point: "vertex_block_1",
+            attributes: attributes_block_1,
+            input: &[
+                4u8, 8u8, 16u8, 32u8, // Uint8x4 (4, 8, 16, 32)
+                64u8, 0u8, 128u8, 0u8, // Uint16x2 (64, 128)
+                0u8, 1u8, 0u8, 2u8, 0u8, 4u8, 0u8, 8u8, // Uint16x4 (256, 512, 1024, 2048)
+                127u8, 127u8, 2u8, 0u8, // Sint8x4 (127, 127, 2, 0)
+                255u8, 255u8, 1u8, 0u8, // Sint16x2 (-1, 1)
+                128u8, 255u8, 128u8, 255u8, 0u8, 1u8, 240u8,
+                255u8, // Sint16x4 (-128, -128, 256, -16)
+                1u8, 2u8, // Uint8x2 (1, 2)
+                128u8, 128u8, // Sint8x2 (-128, -128)
+            ],
+            checksums: &[4095.0, -16.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::UintsBig,
+            entry_point: "vertex_block_2",
+            attributes: attributes_block_2,
+            input: &[
+                1u8, 0u8, 0u8, 0u8, // Uint32x2 (1)
+                2u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, // Uint32x2 (2, 4)
+                8u8, 0u8, 0u8, 0u8, 16u8, 0u8, 0u8, 0u8, 32u8, 0u8, 0u8,
+                0u8, // Uint32x3 (8, 16, 32)
+                64u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8, 0u8, 0u8, 2u8, 0u8,
+                0u8, // Uint32x4 (64, 128, 256, 512)
+            ],
+            checksums: &[1023.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::SintsBig,
+            entry_point: "vertex_block_3",
+            attributes: attributes_block_3,
+            input: &[
+                128u8, 255u8, 255u8, 255u8, // Sint32 (-128)
+                120u8, 0u8, 0u8, 0u8, 8u8, 0u8, 0u8, 0u8, // Sint32x2 (120, 8)
+                252u8, 255u8, 255u8, 255u8, 2u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8,
+                0u8, // Sint32x3 (-4, 2, 2)
+                24u8, 252u8, 255u8, 255u8, 88u8, 2u8, 0u8, 0u8, 44u8, 1u8, 0u8, 0u8, 99u8, 0u8,
+                0u8, 0u8, // Sint32x4 (-1000, 600, 300, 99)
+            ],
+            checksums: &[0.0, -1.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::Floats,
+            entry_point: "vertex_block_4",
+            attributes: attributes_block_4,
+            input: &[
+                0u8, 0u8, 0u8, 63u8, // Float32 (0.5)
+                0u8, 0u8, 0u8, 191u8, 0u8, 0u8, 128u8, 64u8, // Float32x2 (-0.5, 4.0)
+                0u8, 0u8, 0u8, 192u8, 0u8, 0u8, 204u8, 194u8, 0u8, 0u8, 200u8,
+                66u8, // Float32x3 (-2.0, -102.0, 100.0)
+                0u8, 0u8, 92u8, 66u8, 0u8, 0u8, 72u8, 194u8, 0u8, 0u8, 32u8, 65u8, 0u8, 0u8, 128u8,
+                63u8, // Float32x4 (55.0, -50.0, 10.0, 1.0)
+                0u8, 60u8, 72u8, 53u8, // Float16x2 (1.0, 0.33)
+                72u8, 57u8, 0u8, 192u8, 0u8, 188u8, 0u8,
+                184u8, // Float16x4 (0.66, -2.0, -1.0, -0.5)
+            ],
+            checksums: &[0.0, 0.0, 0.0, 0.0, -1.5, 16.0],
+        },
+    ];
+
+    vertex_formats_common(ctx, &tests).await;
+}
+
+async fn vertex_formats_10_10_10_2(ctx: TestingContext) {
+    let attributes_block_5 = &wgpu::vertex_attr_array![
+        0 => Unorm10_10_10_2,
+    ];
+
+    let tests = vec![Test {
+        case: TestCase::Unorm1010102,
+        entry_point: "vertex_block_5",
+        attributes: attributes_block_5,
+        input: &[
+            // We are aiming for rgba of (0.5, 0.5, 0.5, 0.66)
+            // Packing   AA BB BBBB BBBB GGGG GGGG GG RR RRRR RRRR
+            // Binary    10 10 0000 0000 1000 0000 00 10 0000 0000
+            // Hex               A0        08         02        00
+            // Decimal          160         8          2         0
+            // unorm   0.66          0.5          0.5          0.5 = 2.16
+            0u8, 2u8, 8u8, 160u8, // Unorm10_10_10_2
+        ],
+        checksums: &[0.0, 0.0, 2.16, 0.0, 0.0, 0.0],
+    }];
+
+    vertex_formats_common(ctx, &tests).await;
+}
+
+async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) {
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("draw.vert.wgsl"));
+
+    let bgl = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: None,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(4),
+                },
+                visibility: wgpu::ShaderStages::VERTEX,
+                count: None,
+            }],
+        });
+
+    let ppl = ctx
+        .device
+        .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: None,
+            bind_group_layouts: &[&bgl],
+            push_constant_ranges: &[],
+        });
+
+    let dummy = ctx
+        .device
+        .create_texture_with_data(
+            &ctx.queue,
+            &wgpu::TextureDescriptor {
+                label: Some("dummy"),
+                size: wgpu::Extent3d {
+                    width: 1,
+                    height: 1,
+                    depth_or_array_layers: 1,
+                },
+                mip_level_count: 1,
+                sample_count: 1,
+                dimension: wgpu::TextureDimension::D2,
+                format: wgpu::TextureFormat::Rgba8Unorm,
+                usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_DST,
+                view_formats: &[],
+            },
+            wgpu::util::TextureDataOrder::LayerMajor,
+            &[0, 0, 0, 1],
+        )
+        .create_view(&wgpu::TextureViewDescriptor::default());
+
+    let mut failed = false;
+    for test in tests {
+        let buffer_input = ctx.device.create_buffer_init(&BufferInitDescriptor {
+            label: None,
+            contents: bytemuck::cast_slice(test.input),
+            usage: wgpu::BufferUsages::VERTEX,
+        });
+
+        let pipeline_desc = wgpu::RenderPipelineDescriptor {
+            label: None,
+            layout: Some(&ppl),
+            vertex: wgpu::VertexState {
+                buffers: &[wgpu::VertexBufferLayout {
+                    array_stride: 0, // Calculate, please!
+                    step_mode: wgpu::VertexStepMode::Vertex,
+                    attributes: test.attributes,
+                }],
+                module: &shader,
+                entry_point: test.entry_point,
+                compilation_options: Default::default(),
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: "fragment_main",
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        };
+
+        let pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
+
+        let expected = test.checksums;
+        let buffer_size = (std::mem::size_of_val(&expected[0]) * expected.len()) as u64;
+        let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+            mapped_at_creation: false,
+        });
+
+        let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::STORAGE,
+            mapped_at_creation: false,
+        });
+
+        let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &bgl,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: gpu_buffer.as_entire_binding(),
+            }],
+        });
+
+        let mut encoder1 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        let mut rpass = encoder1.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: None,
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                ops: wgpu::Operations::default(),
+                resolve_target: None,
+                view: &dummy,
+            })],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        rpass.set_vertex_buffer(0, buffer_input.slice(..));
+        rpass.set_pipeline(&pipeline);
+        rpass.set_bind_group(0, &bg, &[]);
+
+        // Draw three vertices and no instance, which is enough to generate the
+        // checksums.
+        rpass.draw(0..3, 0..1);
+
+        drop(rpass);
+
+        let mut encoder2 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        encoder2.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size);
+
+        // See https://github.com/gfx-rs/wgpu/issues/4732 for why this is split between two submissions
+        // with a hard wait in between.
+        ctx.queue.submit([encoder1.finish()]);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        ctx.queue.submit([encoder2.finish()]);
+        let slice = cpu_buffer.slice(..);
+        slice.map_async(wgpu::MapMode::Read, |_| ());
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        let data: Vec<f32> = bytemuck::cast_slice(&slice.get_mapped_range()).to_vec();
+
+        let case_name = format!("Case {:?}", test.case);
+
+        // Calculate the difference between data and expected. Since the data is
+        // a bunch of float checksums, we allow a fairly large epsilon, which helps
+        // with the accumulation of float rounding errors.
+        const EPSILON: f32 = 0.01;
+
+        let mut deltas = data.iter().zip(expected.iter()).map(|(d, e)| (d - e).abs());
+        if deltas.any(|x| x > EPSILON) {
+            eprintln!(
+                "Failed: Got: {:?} Expected: {:?} - {case_name}",
+                data, expected,
+            );
+            failed = true;
+            continue;
+        }
+
+        eprintln!("Passed: {case_name}");
+    }
+
+    assert!(!failed);
+}
+
+#[gpu_test]
+static VERTEX_FORMATS_ALL: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
+    )
+    .run_async(vertex_formats_all);
+
+// Some backends can handle Unorm-10-10-2, but GL backends seem to throw this error:
+// Validation Error: GL_INVALID_ENUM in glVertexAttribFormat(type = GL_UNSIGNED_INT_10_10_10_2)
+#[gpu_test]
+static VERTEX_FORMATS_10_10_10_2: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL))
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
+    )
+    .run_async(vertex_formats_10_10_10_2);
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index 59048ef31c..dcc2ca82f5 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -166,7 +166,6 @@ struct Test {
     id_source: IdSource,
     draw_call_kind: DrawCallKind,
     encoder_kind: EncoderKind,
-    vertex_pulling_transform: bool,
 }
 
 impl Test {
@@ -280,15 +279,6 @@ async fn vertex_index_common(ctx: TestingContext) {
         cache: None,
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = true;
-    let builtin_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = false;
 
     pipeline_desc.vertex.entry_point = "vs_main_buffers";
     pipeline_desc.vertex.buffers = &[
@@ -304,15 +294,6 @@ async fn vertex_index_common(ctx: TestingContext) {
         },
     ];
     let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = true;
-    let buffer_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = false;
 
     let dummy = ctx
         .device
@@ -341,18 +322,12 @@ async fn vertex_index_common(ctx: TestingContext) {
         .cartesian_product(IdSource::iter())
         .cartesian_product(DrawCallKind::iter())
         .cartesian_product(EncoderKind::iter())
-        .cartesian_product([false, true])
-        .map(
-            |((((case, id_source), draw_call_kind), encoder_kind), vertex_pulling_transform)| {
-                Test {
-                    case,
-                    id_source,
-                    draw_call_kind,
-                    encoder_kind,
-                    vertex_pulling_transform,
-                }
-            },
-        )
+        .map(|(((case, id_source), draw_call_kind), encoder_kind)| Test {
+            case,
+            id_source,
+            draw_call_kind,
+            encoder_kind,
+        })
         .collect::<Vec<_>>();
 
     let features = ctx.adapter.features();
@@ -360,20 +335,8 @@ async fn vertex_index_common(ctx: TestingContext) {
     let mut failed = false;
     for test in tests {
         let pipeline = match test.id_source {
-            IdSource::Buffers => {
-                if test.vertex_pulling_transform {
-                    &buffer_pipeline_vpt
-                } else {
-                    &buffer_pipeline
-                }
-            }
-            IdSource::Builtins => {
-                if test.vertex_pulling_transform {
-                    &builtin_pipeline_vpt
-                } else {
-                    &builtin_pipeline
-                }
-            }
+            IdSource::Buffers => &buffer_pipeline,
+            IdSource::Builtins => &builtin_pipeline,
         };
 
         let expected = test.expectation(&ctx);
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 0942fda46c..ba2b94dd24 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1484,7 +1484,6 @@ impl Global {
                         .vertex
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: desc.vertex.stage.vertex_pulling_transform,
                 };
                 ResolvedVertexState {
                     stage,
@@ -1511,7 +1510,6 @@ impl Global {
                         .vertex
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: state.stage.vertex_pulling_transform,
                 };
                 Some(ResolvedFragmentState {
                     stage,
@@ -1720,7 +1718,6 @@ impl Global {
                 entry_point: desc.stage.entry_point.clone(),
                 constants: desc.stage.constants.clone(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: desc.stage.vertex_pulling_transform,
             };
 
             let desc = ResolvedComputePipelineDescriptor {
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 25f95f8a2a..e0f2ddfe57 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2694,7 +2694,6 @@ impl<A: HalApi> Device<A> {
                 entry_point: final_entry_point_name.as_ref(),
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: false,
             },
             cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
@@ -3114,7 +3113,6 @@ impl<A: HalApi> Device<A> {
                 entry_point: &vertex_entry_point_name,
                 constants: stage_desc.constants.as_ref(),
                 zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: stage_desc.vertex_pulling_transform,
             }
         };
 
@@ -3171,7 +3169,6 @@ impl<A: HalApi> Device<A> {
                     zero_initialize_workgroup_memory: fragment_state
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: false,
                 })
             }
             None => None,
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 6366279eff..2ab49f83d0 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -147,8 +147,6 @@ pub struct ProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
 /// Describes a programmable pipeline stage.
@@ -176,8 +174,6 @@ pub struct ResolvedProgrammableStageDescriptor<'a, A: HalApi> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
 /// Number of implicit bind groups derived at pipeline creation.
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index daed0c1d35..30ff45ff5b 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -257,7 +257,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "vs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             vertex_buffers: &[],
             fragment_stage: Some(hal::ProgrammableStage {
@@ -265,7 +264,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "fs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             }),
             primitive: wgt::PrimitiveState {
                 topology: wgt::PrimitiveTopology::TriangleStrip,
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index 1cde9fa251..7cd6547f2c 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -379,7 +379,6 @@ impl<A: hal::Api> Example<A> {
                     entry_point: "main",
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
-                    vertex_pulling_transform: false,
                 },
                 cache: None,
             })
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index bd047b5ff6..550befd146 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1870,8 +1870,6 @@ pub struct ProgrammableStage<'a, A: Api> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
 // Rust gets confused about the impl requirements for `A`
@@ -1882,7 +1880,6 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
-            vertex_pulling_transform: self.vertex_pulling_transform,
         }
     }
 }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index efafc98e1b..d9525999d8 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -158,7 +158,7 @@ impl super::Device {
                 metal::MTLPrimitiveTopologyClass::Point => true,
                 _ => false,
             },
-            vertex_pulling_transform: stage.vertex_pulling_transform,
+            vertex_pulling_transform: true,
             vertex_buffer_mappings: vertex_buffer_mappings.to_vec(),
         };
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 88e0a9f503..7491d01557 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1180,10 +1180,6 @@ impl crate::Context for ContextWgpuCore {
                         .vertex
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: desc
-                        .vertex
-                        .compilation_options
-                        .vertex_pulling_transform,
                 },
                 buffers: Borrowed(&vertex_buffers),
             },
@@ -1198,7 +1194,6 @@ impl crate::Context for ContextWgpuCore {
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: false,
                 },
                 targets: Borrowed(frag.targets),
             }),
@@ -1244,7 +1239,6 @@ impl crate::Context for ContextWgpuCore {
                 zero_initialize_workgroup_memory: desc
                     .compilation_options
                     .zero_initialize_workgroup_memory,
-                vertex_pulling_transform: false,
             },
             cache: desc.cache.map(|c| c.id.into()),
         };
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 04ce09aa7d..fb3e611c94 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -2059,8 +2059,6 @@ pub struct PipelineCompilationOptions<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
 impl<'a> Default for PipelineCompilationOptions<'a> {
@@ -2074,7 +2072,6 @@ impl<'a> Default for PipelineCompilationOptions<'a> {
         Self {
             constants,
             zero_initialize_workgroup_memory: true,
-            vertex_pulling_transform: false,
         }
     }
 }

From b5934e89f7aa4619deffddd7cb42f954f48cc88b Mon Sep 17 00:00:00 2001
From: Kevin Reid <kpreid@switchb.org>
Date: Thu, 18 Jul 2024 15:46:04 -0700
Subject: [PATCH 057/226] Disable wgpu-core documentation as a workaround for
 #4905.

This enables `cargo doc` to succeed in a reasonable amount of time,
as long as the reader isn't looking for documentation for `wgpu-core`
itself.
---
 CHANGELOG.md         |  1 +
 wgpu-core/src/lib.rs | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 149c58cde9..8f5f499740 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -44,6 +44,7 @@ Bottom level categories:
 #### General
 
 - Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
+- As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
 
 ## 22.0.0 (2024-07-17)
 
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 36105c90e6..7bc6cfcefe 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -2,6 +2,20 @@
 //! It is designed for integration into browsers, as well as wrapping
 //! into other language-specific user-friendly libraries.
 //!
+#![cfg_attr(
+    not(any(not(doc), wgpu_core_doc)),
+    doc = r#"\
+## Documentation hidden
+
+As a workaround for [an issue in rustdoc](https://github.com/rust-lang/rust/issues/114891)
+that [affects `wgpu-core` documentation builds \
+severely](https://github.com/gfx-rs/wgpu/issues/4905),
+the documentation for `wgpu-core` is empty unless built with
+`RUSTFLAGS="--cfg wgpu_core_doc"`, which may take a very long time.
+"#
+)]
+#![cfg(any(not(doc), wgpu_core_doc))]
+//!
 //! ## Feature flags
 #![doc = document_features::document_features!()]
 //!

From 56d418f121f0b25938bcb9574e38e79f06083cd9 Mon Sep 17 00:00:00 2001
From: Kevin Reid <kpreid@switchb.org>
Date: Thu, 18 Jul 2024 21:40:03 -0700
Subject: [PATCH 058/226] Partial revert "disable rustdoc in CI (#5839)"

This reverts most of the changes in a2fcd72606f83cbb58c1aca2e7e1ad52a11d2067,
but the "document private features" step is still disabled since it
operates only on wgpu-core which is exactly the problem.
---
 .github/workflows/ci.yml | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cd254fda6a..426227adf1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -150,10 +150,10 @@ jobs:
           cargo -V
 
       # Use special toolchain for rustdoc, see https://github.com/gfx-rs/wgpu/issues/4905
-      # - name: Install Rustdoc Toolchain
-      #   run: |
-      #     rustup toolchain install ${{ env.DOCS_RUST_VERSION }} --no-self-update --profile=minimal --component rust-docs --target ${{ matrix.target }}
-      #     cargo +${{ env.DOCS_RUST_VERSION }} -V
+      - name: Install Rustdoc Toolchain
+        run: |
+          rustup toolchain install ${{ env.DOCS_RUST_VERSION }} --no-self-update --profile=minimal --component rust-docs --target ${{ matrix.target }}
+          cargo +${{ env.DOCS_RUST_VERSION }} -V
 
       - name: disable debug
         shell: bash
@@ -195,11 +195,11 @@ jobs:
           # build for WebGPU
           cargo clippy --target ${{ matrix.target }} --tests --features glsl,spirv,fragile-send-sync-non-atomic-wasm
           cargo clippy --target ${{ matrix.target }} --tests --features glsl,spirv
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --features glsl,spirv
+          cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --features glsl,spirv
 
           # all features
           cargo clippy --target ${{ matrix.target }} --tests --all-features
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --all-features
+          cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --all-features
 
       - name: check em
         if: matrix.kind == 'em'
@@ -229,13 +229,15 @@ jobs:
           cargo clippy --target ${{ matrix.target }} --tests --benches --all-features
 
           # build docs
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps
+          cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps
+      # wgpu-core docs are not feasible due to <https://github.com/gfx-rs/wgpu/issues/4905>
+      #
       # - name: check private item docs
       #   if: matrix.kind == 'native'
       #   shell: bash
       #   run: |
       #     set -e
-
+      #
       #     # wgpu_core package
       #     cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} \
       #           --package wgpu-core \

From 7a73c75e6aa5dcf3b998648863bbc66e1a2314bd Mon Sep 17 00:00:00 2001
From: sagudev <16504129+sagudev@users.noreply.github.com>
Date: Fri, 19 Jul 2024 15:43:21 +0200
Subject: [PATCH 059/226] Make RequestDeviceError (de)serializable

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>
---
 wgpu-core/src/instance.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 8c580588ff..ba58f6d2ef 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -23,6 +23,7 @@ type HalInstance<A> = <A as hal::Api>::Instance;
 type HalSurface<A> = <A as hal::Api>::Surface;
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("Limit '{name}' value {requested} is better than allowed {allowed}")]
 pub struct FailedLimit {
     name: &'static str,
@@ -391,6 +392,8 @@ pub enum GetSurfaceSupportError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 /// Error when requesting a device from the adaptor
 #[non_exhaustive]
 pub enum RequestDeviceError {

From d03b7e239d3d782102ecc9174361350b1035003c Mon Sep 17 00:00:00 2001
From: sagudev <16504129+sagudev@users.noreply.github.com>
Date: Fri, 19 Jul 2024 15:44:19 +0200
Subject: [PATCH 060/226] Make RequestAdapterError (de)serializable

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>
---
 wgpu-core/src/instance.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index ba58f6d2ef..a16fb0a29f 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -438,6 +438,7 @@ impl<M: Marker> AdapterInputs<'_, M> {
 pub struct InvalidAdapter;
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum RequestAdapterError {
     #[error("No suitable adapter found")]

From fc87033b30c371ce7aafaab57c3c92dcbc2e34cd Mon Sep 17 00:00:00 2001
From: sagudev <16504129+sagudev@users.noreply.github.com>
Date: Fri, 19 Jul 2024 15:50:48 +0200
Subject: [PATCH 061/226] Make BufferAccessError (de)serializable

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>
---
 wgpu-core/src/device/mod.rs | 4 ++++
 wgpu-core/src/resource.rs   | 7 +++++++
 2 files changed, 11 insertions(+)

diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index e37291ef20..222c50248a 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -364,6 +364,8 @@ fn map_buffer<A: HalApi>(
 }
 
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 pub struct DeviceMismatch {
     pub(super) res: ResourceErrorIdent,
     pub(super) res_device: ResourceErrorIdent,
@@ -388,6 +390,8 @@ impl std::fmt::Display for DeviceMismatch {
 impl std::error::Error for DeviceMismatch {}
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[non_exhaustive]
 pub enum DeviceError {
     #[error("{0} is invalid.")]
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 4e94f1731c..5b11525126 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -76,6 +76,7 @@ impl TrackingData {
 }
 
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct ResourceErrorIdent {
     r#type: &'static str,
     label: String,
@@ -341,6 +342,8 @@ pub struct BufferMapOperation {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[non_exhaustive]
 pub enum BufferAccessError {
     #[error(transparent)]
@@ -389,6 +392,8 @@ pub enum BufferAccessError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[error("Usage flags {actual:?} of {res} do not contain required usage flags {expected:?}")]
 pub struct MissingBufferUsageError {
     pub(crate) res: ResourceErrorIdent,
@@ -405,6 +410,8 @@ pub struct MissingTextureUsageError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[error("{0} has been destroyed")]
 pub struct DestroyedResourceError(pub ResourceErrorIdent);
 

From c20946d02f6092d2f6dff6776bd1af7bce1e361f Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Sat, 20 Jul 2024 00:49:14 +0700
Subject: [PATCH 062/226] wgpu-types: Use explicit feature for `serde`

This helps to prepare for the coming day when explicit features
will be required.
---
 wgpu-types/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index 915cdde6f0..8c211e1839 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -30,6 +30,7 @@ targets = [
 [features]
 strict_asserts = []
 fragile-send-sync-non-atomic-wasm = []
+serde = ["dep:serde"]
 # Enables some internal instrumentation for debugging purposes.
 counters = []
 

From cf5798291fb57381033d132c051206a145ffdcb1 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Sat, 20 Jul 2024 10:35:19 +0700
Subject: [PATCH 063/226] Remove `allow(unknown_lints)` and
 `clippy::if_then_panic` (#6000)

---
 player/tests/test.rs  | 1 -
 wgpu-hal/src/lib.rs   | 4 ----
 wgpu-types/src/lib.rs | 1 -
 3 files changed, 6 deletions(-)

diff --git a/player/tests/test.rs b/player/tests/test.rs
index a5aba15bd6..864f9429a9 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -171,7 +171,6 @@ impl Test<'_> {
                     .collect::<Vec<u8>>(),
             };
 
-            #[allow(unknown_lints, clippy::if_then_panic)]
             if &expected_data[..] != contents {
                 panic!(
                     "Test expectation is not met!\nBuffer content was:\n{:?}\nbut expected:\n{:?}",
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 550befd146..706c369eb5 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -207,8 +207,6 @@
 #![allow(
     // this happens on the GL backend, where it is both thread safe and non-thread safe in the same code.
     clippy::arc_with_non_send_sync,
-    // for `if_then_panic` until it reaches stable
-    unknown_lints,
     // We don't use syntax sugar where it's not necessary.
     clippy::match_like_matches_macro,
     // Redundant matching is more explicit.
@@ -221,8 +219,6 @@
     clippy::single_match,
     // Push commands are more regular than macros.
     clippy::vec_init_then_push,
-    // "if panic" is a good uniform construct.
-    clippy::if_then_panic,
     // We unsafe impl `Send` for a reason.
     clippy::non_send_fields_in_send_ty,
     // TODO!
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 59b5bf57a0..dbe3a010b1 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1482,7 +1482,6 @@ impl Limits {
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct DownlevelLimits {}
 
-#[allow(unknown_lints)] // derivable_impls is nightly only currently
 #[allow(clippy::derivable_impls)]
 impl Default for DownlevelLimits {
     fn default() -> Self {

From 164b7bd3e7bffc07c2d3da2eaca06d37105a16fd Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Sat, 20 Jul 2024 02:58:56 -0400
Subject: [PATCH 064/226] Split `wgpu` Crate into Modules (#5998)

* Split wgpu/lib.rs into Modules
* Use crate::* Imports
---
 wgpu/src/api/adapter.rs               |  255 ++
 wgpu/src/api/bind_group.rs            |  151 +
 wgpu/src/api/bind_group_layout.rs     |   59 +
 wgpu/src/api/buffer.rs                |  730 +++
 wgpu/src/api/command_buffer.rs        |   31 +
 wgpu/src/api/command_encoder.rs       |  382 ++
 wgpu/src/api/common_pipeline.rs       |   64 +
 wgpu/src/api/compute_pass.rs          |  256 ++
 wgpu/src/api/compute_pipeline.rs      |   76 +
 wgpu/src/api/device.rs                |  727 +++
 wgpu/src/api/id.rs                    |   67 +
 wgpu/src/api/instance.rs              |  400 ++
 wgpu/src/api/mod.rs                   |   80 +
 wgpu/src/api/pipeline_cache.rs        |   98 +
 wgpu/src/api/pipeline_layout.rs       |   61 +
 wgpu/src/api/query_set.rs             |   46 +
 wgpu/src/api/queue.rs                 |  300 ++
 wgpu/src/api/render_bundle.rs         |   50 +
 wgpu/src/api/render_bundle_encoder.rs |  278 ++
 wgpu/src/api/render_pass.rs           |  817 ++++
 wgpu/src/api/render_pipeline.rs       |  141 +
 wgpu/src/api/sampler.rs               |   94 +
 wgpu/src/api/shader_module.rs         |  249 +
 wgpu/src/api/surface.rs               |  425 ++
 wgpu/src/api/surface_texture.rs       |   86 +
 wgpu/src/api/texture.rs               |  160 +
 wgpu/src/api/texture_view.rs          |   98 +
 wgpu/src/lib.rs                       | 6091 +------------------------
 wgpu/src/send_sync.rs                 |   27 +
 wgpu/src/util/mod.rs                  |    2 +-
 30 files changed, 6247 insertions(+), 6054 deletions(-)
 create mode 100644 wgpu/src/api/adapter.rs
 create mode 100644 wgpu/src/api/bind_group.rs
 create mode 100644 wgpu/src/api/bind_group_layout.rs
 create mode 100644 wgpu/src/api/buffer.rs
 create mode 100644 wgpu/src/api/command_buffer.rs
 create mode 100644 wgpu/src/api/command_encoder.rs
 create mode 100644 wgpu/src/api/common_pipeline.rs
 create mode 100644 wgpu/src/api/compute_pass.rs
 create mode 100644 wgpu/src/api/compute_pipeline.rs
 create mode 100644 wgpu/src/api/device.rs
 create mode 100644 wgpu/src/api/id.rs
 create mode 100644 wgpu/src/api/instance.rs
 create mode 100644 wgpu/src/api/mod.rs
 create mode 100644 wgpu/src/api/pipeline_cache.rs
 create mode 100644 wgpu/src/api/pipeline_layout.rs
 create mode 100644 wgpu/src/api/query_set.rs
 create mode 100644 wgpu/src/api/queue.rs
 create mode 100644 wgpu/src/api/render_bundle.rs
 create mode 100644 wgpu/src/api/render_bundle_encoder.rs
 create mode 100644 wgpu/src/api/render_pass.rs
 create mode 100644 wgpu/src/api/render_pipeline.rs
 create mode 100644 wgpu/src/api/sampler.rs
 create mode 100644 wgpu/src/api/shader_module.rs
 create mode 100644 wgpu/src/api/surface.rs
 create mode 100644 wgpu/src/api/surface_texture.rs
 create mode 100644 wgpu/src/api/texture.rs
 create mode 100644 wgpu/src/api/texture_view.rs
 create mode 100644 wgpu/src/send_sync.rs

diff --git a/wgpu/src/api/adapter.rs b/wgpu/src/api/adapter.rs
new file mode 100644
index 0000000000..5f43a461f1
--- /dev/null
+++ b/wgpu/src/api/adapter.rs
@@ -0,0 +1,255 @@
+use std::{future::Future, sync::Arc, thread};
+
+use crate::context::{DeviceRequest, DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a physical graphics and/or compute device.
+///
+/// Adapters can be used to open a connection to the corresponding [`Device`]
+/// on the host system by using [`Adapter::request_device`].
+///
+/// Does not have to be kept alive.
+///
+/// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
+#[derive(Debug)]
+pub struct Adapter {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Adapter: Send, Sync);
+
+impl Drop for Adapter {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.adapter_drop(&self.id, self.data.as_ref())
+        }
+    }
+}
+
+pub use wgt::RequestAdapterOptions as RequestAdapterOptionsBase;
+/// Additional information required when requesting an adapter.
+///
+/// For use with [`Instance::request_adapter`].
+///
+/// Corresponds to [WebGPU `GPURequestAdapterOptions`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurequestadapteroptions).
+pub type RequestAdapterOptions<'a, 'b> = RequestAdapterOptionsBase<&'a Surface<'b>>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RequestAdapterOptions<'_, '_>: Send, Sync);
+
+impl Adapter {
+    /// Returns a globally-unique identifier for this `Adapter`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Requests a connection to a physical device, creating a logical device.
+    ///
+    /// Returns the [`Device`] together with a [`Queue`] that executes command buffers.
+    ///
+    /// [Per the WebGPU specification], an [`Adapter`] may only be used once to create a device.
+    /// If another device is wanted, call [`Instance::request_adapter()`] again to get a fresh
+    /// [`Adapter`].
+    /// However, `wgpu` does not currently enforce this restriction.
+    ///
+    /// # Arguments
+    ///
+    /// - `desc` - Description of the features and limits requested from the given device.
+    /// - `trace_path` - Can be used for API call tracing, if that feature is
+    ///   enabled in `wgpu-core`.
+    ///
+    /// # Panics
+    ///
+    /// - `request_device()` was already called on this `Adapter`.
+    /// - Features specified by `desc` are not supported by this adapter.
+    /// - Unsafe features were requested but not enabled when requesting the adapter.
+    /// - Limits requested exceed the values provided by the adapter.
+    /// - Adapter does not support all features wgpu requires to safely operate.
+    ///
+    /// [Per the WebGPU specification]: https://www.w3.org/TR/webgpu/#dom-gpuadapter-requestdevice
+    pub fn request_device(
+        &self,
+        desc: &DeviceDescriptor<'_>,
+        trace_path: Option<&std::path::Path>,
+    ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
+        let context = Arc::clone(&self.context);
+        let device = DynContext::adapter_request_device(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            trace_path,
+        );
+        async move {
+            device.await.map(
+                |DeviceRequest {
+                     device_id,
+                     device_data,
+                     queue_id,
+                     queue_data,
+                 }| {
+                    (
+                        Device {
+                            context: Arc::clone(&context),
+                            id: device_id,
+                            data: device_data,
+                        },
+                        Queue {
+                            context,
+                            id: queue_id,
+                            data: queue_data,
+                        },
+                    )
+                },
+            )
+        }
+    }
+
+    /// Create a wgpu [`Device`] and [`Queue`] from a wgpu-hal `OpenDevice`
+    ///
+    /// # Safety
+    ///
+    /// - `hal_device` must be created from this adapter internal handle.
+    /// - `desc.features` must be a subset of `hal_device` features.
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_device_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_device: hal::OpenDevice<A>,
+        desc: &DeviceDescriptor<'_>,
+        trace_path: Option<&std::path::Path>,
+    ) -> Result<(Device, Queue), RequestDeviceError> {
+        let context = Arc::clone(&self.context);
+        unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the device was generated from the same adapter.
+                // Therefore, unwrap is fine here since only WgpuCoreContext based adapters have the ability to create hal devices.
+                .unwrap()
+                .create_device_from_hal(&self.id.into(), hal_device, desc, trace_path)
+        }
+        .map(|(device, queue)| {
+            (
+                Device {
+                    context: Arc::clone(&context),
+                    id: device.id().into(),
+                    data: Box::new(device),
+                },
+                Queue {
+                    context,
+                    id: queue.id().into(),
+                    data: Box::new(queue),
+                },
+            )
+        })
+    }
+
+    /// Apply a callback to this `Adapter`'s underlying backend adapter.
+    ///
+    /// If this `Adapter` is implemented by the backend API given by `A` (Vulkan,
+    /// Dx12, etc.), then apply `hal_adapter_callback` to `Some(&adapter)`, where
+    /// `adapter` is the underlying backend adapter type, [`A::Adapter`].
+    ///
+    /// If this `Adapter` uses a different backend, apply `hal_adapter_callback`
+    /// to `None`.
+    ///
+    /// The adapter is locked for reading while `hal_adapter_callback` runs. If
+    /// the callback attempts to perform any `wgpu` operations that require
+    /// write access to the adapter, deadlock will occur. The locks are
+    /// automatically released when the callback returns.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle passed to the callback must not be manually destroyed.
+    ///
+    /// [`A::Adapter`]: hal::Api::Adapter
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>(
+        &self,
+        hal_adapter_callback: F,
+    ) -> R {
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.adapter_as_hal::<A, F, R>(self.id.into(), hal_adapter_callback) }
+        } else {
+            hal_adapter_callback(None)
+        }
+    }
+
+    /// Returns whether this adapter may present to the passed surface.
+    pub fn is_surface_supported(&self, surface: &Surface<'_>) -> bool {
+        DynContext::adapter_is_surface_supported(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &surface.id,
+            surface.surface_data.as_ref(),
+        )
+    }
+
+    /// The features which can be used to create devices on this adapter.
+    pub fn features(&self) -> Features {
+        DynContext::adapter_features(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// The best limits which can be used to create devices on this adapter.
+    pub fn limits(&self) -> Limits {
+        DynContext::adapter_limits(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Get info about the adapter itself.
+    pub fn get_info(&self) -> AdapterInfo {
+        DynContext::adapter_get_info(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Get info about the adapter itself.
+    pub fn get_downlevel_capabilities(&self) -> DownlevelCapabilities {
+        DynContext::adapter_downlevel_capabilities(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Returns the features supported for a given texture format by this adapter.
+    ///
+    /// Note that the WebGPU spec further restricts the available usages/features.
+    /// To disable these restrictions on a device, request the [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] feature.
+    pub fn get_texture_format_features(&self, format: TextureFormat) -> TextureFormatFeatures {
+        DynContext::adapter_get_texture_format_features(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            format,
+        )
+    }
+
+    /// Generates a timestamp using the clock used by the presentation engine.
+    ///
+    /// When comparing completely opaque timestamp systems, we need a way of generating timestamps that signal
+    /// the exact same time. You can do this by calling your own timestamp function immediately after a call to
+    /// this function. This should result in timestamps that are 0.5 to 5 microseconds apart. There are locks
+    /// that must be taken during the call, so don't call your function before.
+    ///
+    /// ```no_run
+    /// # let adapter: wgpu::Adapter = panic!();
+    /// # let some_code = || wgpu::PresentationTimestamp::INVALID_TIMESTAMP;
+    /// use std::time::{Duration, Instant};
+    /// let presentation = adapter.get_presentation_timestamp();
+    /// let instant = Instant::now();
+    ///
+    /// // We can now turn a new presentation timestamp into an Instant.
+    /// let some_pres_timestamp = some_code();
+    /// let duration = Duration::from_nanos((some_pres_timestamp.0 - presentation.0) as u64);
+    /// let new_instant: Instant = instant + duration;
+    /// ```
+    //
+    /// [Instant]: std::time::Instant
+    pub fn get_presentation_timestamp(&self) -> PresentationTimestamp {
+        DynContext::adapter_get_presentation_timestamp(&*self.context, &self.id, self.data.as_ref())
+    }
+}
diff --git a/wgpu/src/api/bind_group.rs b/wgpu/src/api/bind_group.rs
new file mode 100644
index 0000000000..51c1efac74
--- /dev/null
+++ b/wgpu/src/api/bind_group.rs
@@ -0,0 +1,151 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a binding group.
+///
+/// A `BindGroup` represents the set of resources bound to the bindings described by a
+/// [`BindGroupLayout`]. It can be created with [`Device::create_bind_group`]. A `BindGroup` can
+/// be bound to a particular [`RenderPass`] with [`RenderPass::set_bind_group`], or to a
+/// [`ComputePass`] with [`ComputePass::set_bind_group`].
+///
+/// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
+#[derive(Debug)]
+pub struct BindGroup {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroup: Send, Sync);
+
+impl BindGroup {
+    /// Returns a globally-unique identifier for this `BindGroup`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for BindGroup {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.bind_group_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Resource that can be bound to a pipeline.
+///
+/// Corresponds to [WebGPU `GPUBindingResource`](
+/// https://gpuweb.github.io/gpuweb/#typedefdef-gpubindingresource).
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum BindingResource<'a> {
+    /// Binding is backed by a buffer.
+    ///
+    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
+    /// with [`BindGroupLayoutEntry::count`] set to None.
+    Buffer(BufferBinding<'a>),
+    /// Binding is backed by an array of buffers.
+    ///
+    /// [`Features::BUFFER_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
+    /// with [`BindGroupLayoutEntry::count`] set to Some.
+    BufferArray(&'a [BufferBinding<'a>]),
+    /// Binding is a sampler.
+    ///
+    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set to None.
+    Sampler(&'a Sampler),
+    /// Binding is backed by an array of samplers.
+    ///
+    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set
+    /// to Some.
+    SamplerArray(&'a [&'a Sampler]),
+    /// Binding is backed by a texture.
+    ///
+    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
+    /// [`BindGroupLayoutEntry::count`] set to None.
+    TextureView(&'a TextureView),
+    /// Binding is backed by an array of textures.
+    ///
+    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
+    /// [`BindGroupLayoutEntry::count`] set to Some.
+    TextureViewArray(&'a [&'a TextureView]),
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
+
+/// Describes the segment of a buffer to bind.
+///
+/// Corresponds to [WebGPU `GPUBufferBinding`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferbinding).
+#[derive(Clone, Debug)]
+pub struct BufferBinding<'a> {
+    /// The buffer to bind.
+    pub buffer: &'a Buffer,
+
+    /// Base offset of the buffer, in bytes.
+    ///
+    /// If the [`has_dynamic_offset`] field of this buffer's layout entry is
+    /// `true`, the offset here will be added to the dynamic offset passed to
+    /// [`RenderPass::set_bind_group`] or [`ComputePass::set_bind_group`].
+    ///
+    /// If the buffer was created with [`BufferUsages::UNIFORM`], then this
+    /// offset must be a multiple of
+    /// [`Limits::min_uniform_buffer_offset_alignment`].
+    ///
+    /// If the buffer was created with [`BufferUsages::STORAGE`], then this
+    /// offset must be a multiple of
+    /// [`Limits::min_storage_buffer_offset_alignment`].
+    ///
+    /// [`has_dynamic_offset`]: BindingType::Buffer::has_dynamic_offset
+    pub offset: BufferAddress,
+
+    /// Size of the binding in bytes, or `None` for using the rest of the buffer.
+    pub size: Option<BufferSize>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BufferBinding<'_>: Send, Sync);
+
+/// An element of a [`BindGroupDescriptor`], consisting of a bindable resource
+/// and the slot to bind it to.
+///
+/// Corresponds to [WebGPU `GPUBindGroupEntry`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupentry).
+#[derive(Clone, Debug)]
+pub struct BindGroupEntry<'a> {
+    /// Slot for which binding provides resource. Corresponds to an entry of the same
+    /// binding index in the [`BindGroupLayoutDescriptor`].
+    pub binding: u32,
+    /// Resource to attach to the binding
+    pub resource: BindingResource<'a>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupEntry<'_>: Send, Sync);
+
+/// Describes a group of bindings and the resources to be bound.
+///
+/// For use with [`Device::create_bind_group`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupdescriptor).
+#[derive(Clone, Debug)]
+pub struct BindGroupDescriptor<'a> {
+    /// Debug label of the bind group. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The [`BindGroupLayout`] that corresponds to this bind group.
+    pub layout: &'a BindGroupLayout,
+    /// The resources to bind to this bind group.
+    pub entries: &'a [BindGroupEntry<'a>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/bind_group_layout.rs b/wgpu/src/api/bind_group_layout.rs
new file mode 100644
index 0000000000..1268c664f1
--- /dev/null
+++ b/wgpu/src/api/bind_group_layout.rs
@@ -0,0 +1,59 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a binding group layout.
+///
+/// A `BindGroupLayout` is a handle to the GPU-side layout of a binding group. It can be used to
+/// create a [`BindGroupDescriptor`] object, which in turn can be used to create a [`BindGroup`]
+/// object with [`Device::create_bind_group`]. A series of `BindGroupLayout`s can also be used to
+/// create a [`PipelineLayoutDescriptor`], which can be used to create a [`PipelineLayout`].
+///
+/// It can be created with [`Device::create_bind_group_layout`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupLayout`](
+/// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
+#[derive(Debug)]
+pub struct BindGroupLayout {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
+
+impl BindGroupLayout {
+    /// Returns a globally-unique identifier for this `BindGroupLayout`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for BindGroupLayout {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .bind_group_layout_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`BindGroupLayout`].
+///
+/// For use with [`Device::create_bind_group_layout`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupLayoutDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgrouplayoutdescriptor).
+#[derive(Clone, Debug)]
+pub struct BindGroupLayoutDescriptor<'a> {
+    /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+
+    /// Array of entries in this BindGroupLayout
+    pub entries: &'a [BindGroupLayoutEntry],
+}
+static_assertions::assert_impl_all!(BindGroupLayoutDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
new file mode 100644
index 0000000000..6f54637994
--- /dev/null
+++ b/wgpu/src/api/buffer.rs
@@ -0,0 +1,730 @@
+use std::{
+    error, fmt,
+    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
+    sync::Arc,
+    thread,
+};
+
+use parking_lot::Mutex;
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a GPU-accessible buffer.
+///
+/// Created with [`Device::create_buffer`] or
+/// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init).
+///
+/// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
+///
+/// A `Buffer`'s bytes have "interior mutability": functions like
+/// [`Queue::write_buffer`] or [mapping] a buffer for writing only require a
+/// `&Buffer`, not a `&mut Buffer`, even though they modify its contents. `wgpu`
+/// prevents simultaneous reads and writes of buffer contents using run-time
+/// checks.
+///
+/// [mapping]: Buffer#mapping-buffers
+///
+/// # Mapping buffers
+///
+/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
+/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or
+/// `&mut [u8]` slice of bytes. Buffers created with the
+/// [`mapped_at_creation`][mac] flag set are also mapped initially.
+///
+/// Depending on the hardware, the buffer could be memory shared between CPU and
+/// GPU, so that the CPU has direct access to the same bytes the GPU will
+/// consult; or it may be ordinary CPU memory, whose contents the system must
+/// copy to/from the GPU as needed. This crate's API is designed to work the
+/// same way in either case: at any given time, a buffer is either mapped and
+/// available to the CPU, or unmapped and ready for use by the GPU, but never
+/// both. This makes it impossible for either side to observe changes by the
+/// other immediately, and any necessary transfers can be carried out when the
+/// buffer transitions from one state to the other.
+///
+/// There are two ways to map a buffer:
+///
+/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire
+///   buffer is mapped when it is created. This is the easiest way to initialize
+///   a new buffer. You can set `mapped_at_creation` on any kind of buffer,
+///   regardless of its [`usage`] flags.
+///
+/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`]
+///   flags, then you can call `buffer.slice(range).map_async(mode, callback)`
+///   to map the portion of `buffer` given by `range`. This waits for the GPU to
+///   finish using the buffer, and invokes `callback` as soon as the buffer is
+///   safe for the CPU to access.
+///
+/// Once a buffer is mapped:
+///
+/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a
+///   [`BufferView`], which dereferences to a `&[u8]` that you can use to read
+///   the buffer's contents.
+///
+/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a
+///   [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to
+///   read and write the buffer's contents.
+///
+/// The given `range` must fall within the mapped portion of the buffer. If you
+/// attempt to access overlapping ranges, even for shared access only, these
+/// methods panic.
+///
+/// While a buffer is mapped, you may not submit any commands to the GPU that
+/// access it. You may record command buffers that use the buffer, but if you
+/// submit them while the buffer is mapped, submission will panic.
+///
+/// When you are done using the buffer on the CPU, you must call
+/// [`Buffer::unmap`] to make it available for use by the GPU again. All
+/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
+/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
+///
+/// # Example
+///
+/// If `buffer` was created with [`BufferUsages::MAP_WRITE`], we could fill it
+/// with `f32` values like this:
+///
+/// ```no_run
+/// # mod bytemuck {
+/// #     pub fn cast_slice_mut(bytes: &mut [u8]) -> &mut [f32] { todo!() }
+/// # }
+/// # let device: wgpu::Device = todo!();
+/// # let buffer: wgpu::Buffer = todo!();
+/// let buffer = std::sync::Arc::new(buffer);
+/// let capturable = buffer.clone();
+/// buffer.slice(..).map_async(wgpu::MapMode::Write, move |result| {
+///     if result.is_ok() {
+///         let mut view = capturable.slice(..).get_mapped_range_mut();
+///         let floats: &mut [f32] = bytemuck::cast_slice_mut(&mut view);
+///         floats.fill(42.0);
+///         drop(view);
+///         capturable.unmap();
+///     }
+/// });
+/// ```
+///
+/// This code takes the following steps:
+///
+/// - First, it moves `buffer` into an [`Arc`], and makes a clone for capture by
+///   the callback passed to [`map_async`]. Since a [`map_async`] callback may be
+///   invoked from another thread, interaction between the callback and the
+///   thread calling [`map_async`] generally requires some sort of shared heap
+///   data like this. In real code, the [`Arc`] would probably own some larger
+///   structure that itself owns `buffer`.
+///
+/// - Then, it calls [`Buffer::slice`] to make a [`BufferSlice`] referring to
+///   the buffer's entire contents.
+///
+/// - Next, it calls [`BufferSlice::map_async`] to request that the bytes to
+///   which the slice refers be made accessible to the CPU ("mapped"). This may
+///   entail waiting for previously enqueued operations on `buffer` to finish.
+///   Although [`map_async`] itself always returns immediately, it saves the
+///   callback function to be invoked later.
+///
+/// - When some later call to [`Device::poll`] or [`Instance::poll_all`] (not
+///   shown in this example) determines that the buffer is mapped and ready for
+///   the CPU to use, it invokes the callback function.
+///
+/// - The callback function calls [`Buffer::slice`] and then
+///   [`BufferSlice::get_mapped_range_mut`] to obtain a [`BufferViewMut`], which
+///   dereferences to a `&mut [u8]` slice referring to the buffer's bytes.
+///
+/// - It then uses the [`bytemuck`] crate to turn the `&mut [u8]` into a `&mut
+///   [f32]`, and calls the slice [`fill`] method to fill the buffer with a
+///   useful value.
+///
+/// - Finally, the callback drops the view and calls [`Buffer::unmap`] to unmap
+///   the buffer. In real code, the callback would also need to do some sort of
+///   synchronization to let the rest of the program know that it has completed
+///   its work.
+///
+/// If using [`map_async`] directly is awkward, you may find it more convenient to
+/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
+/// However, those each have their own tradeoffs; the asynchronous nature of GPU
+/// execution makes it hard to avoid friction altogether.
+///
+/// [`Arc`]: std::sync::Arc
+/// [`map_async`]: BufferSlice::map_async
+/// [`bytemuck`]: https://crates.io/crates/bytemuck
+/// [`fill`]: slice::fill
+///
+/// ## Mapping buffers on the web
+///
+/// When compiled to WebAssembly and running in a browser content process,
+/// `wgpu` implements its API in terms of the browser's WebGPU implementation.
+/// In this context, `wgpu` is further isolated from the GPU:
+///
+/// - Depending on the browser's WebGPU implementation, mapping and unmapping
+///   buffers probably entails copies between WebAssembly linear memory and the
+///   graphics driver's buffers.
+///
+/// - All modern web browsers isolate web content in its own sandboxed process,
+///   which can only interact with the GPU via interprocess communication (IPC).
+///   Although most browsers' IPC systems use shared memory for large data
+///   transfers, there will still probably need to be copies into and out of the
+///   shared memory buffers.
+///
+/// All of these copies contribute to the cost of buffer mapping in this
+/// configuration.
+///
+/// [`usage`]: BufferDescriptor::usage
+/// [mac]: BufferDescriptor::mapped_at_creation
+/// [`MAP_READ`]: BufferUsages::MAP_READ
+/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
+#[derive(Debug)]
+pub struct Buffer {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) map_context: Mutex<MapContext>,
+    pub(crate) size: wgt::BufferAddress,
+    pub(crate) usage: BufferUsages,
+    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Buffer: Send, Sync);
+
+impl Buffer {
+    /// Returns a globally-unique identifier for this `Buffer`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Return the binding view of the entire buffer.
+    pub fn as_entire_binding(&self) -> BindingResource<'_> {
+        BindingResource::Buffer(self.as_entire_buffer_binding())
+    }
+
+    /// Return the binding view of the entire buffer.
+    pub fn as_entire_buffer_binding(&self) -> BufferBinding<'_> {
+        BufferBinding {
+            buffer: self,
+            offset: 0,
+            size: None,
+        }
+    }
+
+    /// Returns the inner hal Buffer using a callback. The hal buffer will be `None` if the
+    /// backend type argument does not match with this wgpu Buffer
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Buffer must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Buffer>) -> R, R>(
+        &self,
+        hal_buffer_callback: F,
+    ) -> R {
+        let id = self.id;
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.buffer_as_hal::<A, F, R>(id.into(), hal_buffer_callback) }
+        } else {
+            hal_buffer_callback(None)
+        }
+    }
+
+    /// Return a slice of a [`Buffer`]'s bytes.
+    ///
+    /// Return a [`BufferSlice`] referring to the portion of `self`'s contents
+    /// indicated by `bounds`. Regardless of what sort of data `self` stores,
+    /// `bounds` start and end are given in bytes.
+    ///
+    /// A [`BufferSlice`] can be used to supply vertex and index data, or to map
+    /// buffer contents for access from the CPU. See the [`BufferSlice`]
+    /// documentation for details.
+    ///
+    /// The `range` argument can be half or fully unbounded: for example,
+    /// `buffer.slice(..)` refers to the entire buffer, and `buffer.slice(n..)`
+    /// refers to the portion starting at the `n`th byte and extending to the
+    /// end of the buffer.
+    pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
+        let (offset, size) = range_to_offset_size(bounds);
+        BufferSlice {
+            buffer: self,
+            offset,
+            size,
+        }
+    }
+
+    /// Flushes any pending write operations and unmaps the buffer from host memory.
+    pub fn unmap(&self) {
+        self.map_context.lock().reset();
+        DynContext::buffer_unmap(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Destroy the associated native resources as soon as possible.
+    pub fn destroy(&self) {
+        DynContext::buffer_destroy(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Returns the length of the buffer allocation in bytes.
+    ///
+    /// This is always equal to the `size` that was specified when creating the buffer.
+    pub fn size(&self) -> BufferAddress {
+        self.size
+    }
+
+    /// Returns the allowed usages for this `Buffer`.
+    ///
+    /// This is always equal to the `usage` that was specified when creating the buffer.
+    pub fn usage(&self) -> BufferUsages {
+        self.usage
+    }
+}
+
+/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like.
+///
+/// You can create a `BufferSlice` by calling [`Buffer::slice`]:
+///
+/// ```no_run
+/// # let buffer: wgpu::Buffer = todo!();
+/// let slice = buffer.slice(10..20);
+/// ```
+///
+/// This returns a slice referring to the second ten bytes of `buffer`. To get a
+/// slice of the entire `Buffer`:
+///
+/// ```no_run
+/// # let buffer: wgpu::Buffer = todo!();
+/// let whole_buffer_slice = buffer.slice(..);
+/// ```
+///
+/// You can pass buffer slices to methods like [`RenderPass::set_vertex_buffer`]
+/// and [`RenderPass::set_index_buffer`] to indicate which portion of the buffer
+/// a draw call should consult.
+///
+/// To access the slice's contents on the CPU, you must first [map] the buffer,
+/// and then call [`BufferSlice::get_mapped_range`] or
+/// [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
+/// contents. See the documentation on [mapping][map] for more details,
+/// including example code.
+///
+/// Unlike a Rust shared slice `&[T]`, whose existence guarantees that
+/// nobody else is modifying the `T` values to which it refers, a
+/// [`BufferSlice`] doesn't guarantee that the buffer's contents aren't
+/// changing. You can still record and submit commands operating on the
+/// buffer while holding a [`BufferSlice`]. A [`BufferSlice`] simply
+/// represents a certain range of the buffer's bytes.
+///
+/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
+/// specification, an offset and size are specified as arguments to each call
+/// working with the [`Buffer`], instead.
+///
+/// [map]: Buffer#mapping-buffers
+#[derive(Copy, Clone, Debug)]
+pub struct BufferSlice<'a> {
+    pub(crate) buffer: &'a Buffer,
+    pub(crate) offset: BufferAddress,
+    pub(crate) size: Option<BufferSize>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BufferSlice<'_>: Send, Sync);
+
+impl<'a> BufferSlice<'a> {
+    /// Map the buffer. Buffer is ready to map once the callback is called.
+    ///
+    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
+    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    ///
+    /// The callback will be called on the thread that first calls the above functions after the gpu work
+    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
+    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
+    /// and used to set flags, send messages, etc.
+    pub fn map_async(
+        &self,
+        mode: MapMode,
+        callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
+    ) {
+        let mut mc = self.buffer.map_context.lock();
+        assert_eq!(
+            mc.initial_range,
+            0..0,
+            "Buffer {:?} is already mapped",
+            self.buffer.id
+        );
+        let end = match self.size {
+            Some(s) => self.offset + s.get(),
+            None => mc.total_size,
+        };
+        mc.initial_range = self.offset..end;
+
+        DynContext::buffer_map_async(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            mode,
+            self.offset..end,
+            Box::new(callback),
+        )
+    }
+
+    /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferView`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferView`] for details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
+    pub fn get_mapped_range(&self) -> BufferView<'a> {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let data = DynContext::buffer_get_mapped_range(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        );
+        BufferView { slice: *self, data }
+    }
+
+    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
+    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will fail.
+    ///
+    /// This is useful when targeting WebGPU and you want to pass mapped data directly to js.
+    /// Unlike `get_mapped_range` which unconditionally copies mapped data into the wasm heap,
+    /// this function directly hands you the ArrayBuffer that we mapped the data into in js.
+    ///
+    /// This is only available on WebGPU, on any other backends this will return `None`.
+    #[cfg(webgpu)]
+    pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
+        self.buffer
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWebGpu>()
+            .map(|ctx| {
+                let buffer_data = crate::context::downcast_ref(self.buffer.data.as_ref());
+                let end = self.buffer.map_context.lock().add(self.offset, self.size);
+                ctx.buffer_get_mapped_range_as_array_buffer(buffer_data, self.offset..end)
+            })
+    }
+
+    /// Gain write access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferViewMut`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferViewMut`] for more details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range_mut` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
+    pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let data = DynContext::buffer_get_mapped_range(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        );
+        BufferViewMut {
+            slice: *self,
+            data,
+            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
+        }
+    }
+}
+
+/// The mapped portion of a buffer, if any, and its outstanding views.
+///
+/// This ensures that views fall within the mapped range and don't overlap, and
+/// also takes care of turning `Option<BufferSize>` sizes into actual buffer
+/// offsets.
+#[derive(Debug)]
+pub(crate) struct MapContext {
+    /// The overall size of the buffer.
+    ///
+    /// This is just a convenient copy of [`Buffer::size`].
+    pub(crate) total_size: BufferAddress,
+
+    /// The range of the buffer that is mapped.
+    ///
+    /// This is `0..0` if the buffer is not mapped. This becomes non-empty when
+    /// the buffer is mapped at creation time, and when you call `map_async` on
+    /// some [`BufferSlice`] (so technically, it indicates the portion that is
+    /// *or has been requested to be* mapped.)
+    ///
+    /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range.
+    pub(crate) initial_range: Range<BufferAddress>,
+
+    /// The ranges covered by all outstanding [`BufferView`]s and
+    /// [`BufferViewMut`]s. These are non-overlapping, and are all contained
+    /// within `initial_range`.
+    sub_ranges: Vec<Range<BufferAddress>>,
+}
+
+impl MapContext {
+    pub(crate) fn new(total_size: BufferAddress) -> Self {
+        Self {
+            total_size,
+            initial_range: 0..0,
+            sub_ranges: Vec::new(),
+        }
+    }
+
+    /// Record that the buffer is no longer mapped.
+    fn reset(&mut self) {
+        self.initial_range = 0..0;
+
+        assert!(
+            self.sub_ranges.is_empty(),
+            "You cannot unmap a buffer that still has accessible mapped views"
+        );
+    }
+
+    /// Record that the `size` bytes of the buffer at `offset` are now viewed.
+    ///
+    /// Return the byte offset within the buffer of the end of the viewed range.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given range overlaps with any existing range.
+    fn add(&mut self, offset: BufferAddress, size: Option<BufferSize>) -> BufferAddress {
+        let end = match size {
+            Some(s) => offset + s.get(),
+            None => self.initial_range.end,
+        };
+        assert!(self.initial_range.start <= offset && end <= self.initial_range.end);
+        // This check is essential for avoiding undefined behavior: it is the
+        // only thing that ensures that `&mut` references to the buffer's
+        // contents don't alias anything else.
+        for sub in self.sub_ranges.iter() {
+            assert!(
+                end <= sub.start || offset >= sub.end,
+                "Intersecting map range with {sub:?}"
+            );
+        }
+        self.sub_ranges.push(offset..end);
+        end
+    }
+
+    /// Record that the `size` bytes of the buffer at `offset` are no longer viewed.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given range does not exactly match one previously
+    /// passed to [`add`].
+    ///
+    /// [`add]`: MapContext::add
+    fn remove(&mut self, offset: BufferAddress, size: Option<BufferSize>) {
+        let end = match size {
+            Some(s) => offset + s.get(),
+            None => self.initial_range.end,
+        };
+
+        let index = self
+            .sub_ranges
+            .iter()
+            .position(|r| *r == (offset..end))
+            .expect("unable to remove range from map context");
+        self.sub_ranges.swap_remove(index);
+    }
+}
+
+/// Describes a [`Buffer`].
+///
+/// For use with [`Device::create_buffer`].
+///
+/// Corresponds to [WebGPU `GPUBufferDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferdescriptor).
+pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(BufferDescriptor<'_>: Send, Sync);
+
+/// Error occurred when trying to async map a buffer.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct BufferAsyncError;
+static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
+
+impl fmt::Display for BufferAsyncError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Error occurred when trying to async map a buffer")
+    }
+}
+
+impl error::Error for BufferAsyncError {}
+
+/// Type of buffer mapping.
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub enum MapMode {
+    /// Map only for reading
+    Read,
+    /// Map only for writing
+    Write,
+}
+static_assertions::assert_impl_all!(MapMode: Send, Sync);
+
+/// A read-only view of a mapped buffer's bytes.
+///
+/// To get a `BufferView`, first [map] the buffer, and then
+/// call `buffer.slice(range).get_mapped_range()`.
+///
+/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust
+/// slice methods to access the buffer's contents. It also implements
+/// `AsRef<[u8]>`, if that's more convenient.
+///
+/// Before the buffer can be unmapped, all `BufferView`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
+///
+/// [map]: Buffer#mapping-buffers
+/// [`map_async`]: BufferSlice::map_async
+#[derive(Debug)]
+pub struct BufferView<'a> {
+    slice: BufferSlice<'a>,
+    data: Box<dyn crate::context::BufferMappedRange>,
+}
+
+impl std::ops::Deref for BufferView<'_> {
+    type Target = [u8];
+
+    #[inline]
+    fn deref(&self) -> &[u8] {
+        self.data.slice()
+    }
+}
+
+impl AsRef<[u8]> for BufferView<'_> {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.data.slice()
+    }
+}
+
+/// A write-only view of a mapped buffer's bytes.
+///
+/// To get a `BufferViewMut`, first [map] the buffer, and then
+/// call `buffer.slice(range).get_mapped_range_mut()`.
+///
+/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual
+/// Rust slice methods to access the buffer's contents. It also implements
+/// `AsMut<[u8]>`, if that's more convenient.
+///
+/// It is possible to read the buffer using this view, but doing so is not
+/// recommended, as it is likely to be slow.
+///
+/// Before the buffer can be unmapped, all `BufferViewMut`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
+///
+/// [map]: Buffer#mapping-buffers
+#[derive(Debug)]
+pub struct BufferViewMut<'a> {
+    slice: BufferSlice<'a>,
+    data: Box<dyn crate::context::BufferMappedRange>,
+    readable: bool,
+}
+
+impl AsMut<[u8]> for BufferViewMut<'_> {
+    #[inline]
+    fn as_mut(&mut self) -> &mut [u8] {
+        self.data.slice_mut()
+    }
+}
+
+impl Deref for BufferViewMut<'_> {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        if !self.readable {
+            log::warn!("Reading from a BufferViewMut is slow and not recommended.");
+        }
+
+        self.data.slice()
+    }
+}
+
+impl DerefMut for BufferViewMut<'_> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.data.slice_mut()
+    }
+}
+
+impl Drop for BufferView<'_> {
+    fn drop(&mut self) {
+        self.slice
+            .buffer
+            .map_context
+            .lock()
+            .remove(self.slice.offset, self.slice.size);
+    }
+}
+
+impl Drop for BufferViewMut<'_> {
+    fn drop(&mut self) {
+        self.slice
+            .buffer
+            .map_context
+            .lock()
+            .remove(self.slice.offset, self.slice.size);
+    }
+}
+
+impl Drop for Buffer {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.buffer_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
+    bounds: S,
+) -> (BufferAddress, Option<BufferSize>) {
+    let offset = match bounds.start_bound() {
+        Bound::Included(&bound) => bound,
+        Bound::Excluded(&bound) => bound + 1,
+        Bound::Unbounded => 0,
+    };
+    let size = match bounds.end_bound() {
+        Bound::Included(&bound) => Some(bound + 1 - offset),
+        Bound::Excluded(&bound) => Some(bound - offset),
+        Bound::Unbounded => None,
+    }
+    .map(|size| BufferSize::new(size).expect("Buffer slices can not be empty"));
+
+    (offset, size)
+}
+#[cfg(test)]
+mod tests {
+    use super::{range_to_offset_size, BufferSize};
+
+    #[test]
+    fn range_to_offset_size_works() {
+        assert_eq!(range_to_offset_size(0..2), (0, BufferSize::new(2)));
+        assert_eq!(range_to_offset_size(2..5), (2, BufferSize::new(3)));
+        assert_eq!(range_to_offset_size(..), (0, None));
+        assert_eq!(range_to_offset_size(21..), (21, None));
+        assert_eq!(range_to_offset_size(0..), (0, None));
+        assert_eq!(range_to_offset_size(..21), (0, BufferSize::new(21)));
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_empty_range() {
+        range_to_offset_size(123..123);
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_unbounded_empty_range() {
+        range_to_offset_size(..0);
+    }
+}
diff --git a/wgpu/src/api/command_buffer.rs b/wgpu/src/api/command_buffer.rs
new file mode 100644
index 0000000000..4d56fe9b2f
--- /dev/null
+++ b/wgpu/src/api/command_buffer.rs
@@ -0,0 +1,31 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a command buffer on the GPU.
+///
+/// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
+/// queue with [`Queue::submit`]. A `CommandBuffer` is obtained by recording a series of commands to
+/// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
+///
+/// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
+#[derive(Debug)]
+pub struct CommandBuffer {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: Option<ObjectId>,
+    pub(crate) data: Option<Box<Data>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
+
+impl Drop for CommandBuffer {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            if let Some(id) = self.id.take() {
+                self.context
+                    .command_buffer_drop(&id, self.data.take().unwrap().as_ref());
+            }
+        }
+    }
+}
diff --git a/wgpu/src/api/command_encoder.rs b/wgpu/src/api/command_encoder.rs
new file mode 100644
index 0000000000..d8e8594a89
--- /dev/null
+++ b/wgpu/src/api/command_encoder.rs
@@ -0,0 +1,382 @@
+use std::{marker::PhantomData, ops::Range, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Encodes a series of GPU operations.
+///
+/// A command encoder can record [`RenderPass`]es, [`ComputePass`]es,
+/// and transfer operations between driver-managed resources like [`Buffer`]s and [`Texture`]s.
+///
+/// When finished recording, call [`CommandEncoder::finish`] to obtain a [`CommandBuffer`] which may
+/// be submitted for execution.
+///
+/// Corresponds to [WebGPU `GPUCommandEncoder`](https://gpuweb.github.io/gpuweb/#command-encoder).
+#[derive(Debug)]
+pub struct CommandEncoder {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: Option<ObjectId>,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(CommandEncoder: Send, Sync);
+
+impl Drop for CommandEncoder {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            if let Some(id) = self.id.take() {
+                self.context.command_encoder_drop(&id, self.data.as_ref());
+            }
+        }
+    }
+}
+
+/// Describes a [`CommandEncoder`].
+///
+/// For use with [`Device::create_command_encoder`].
+///
+/// Corresponds to [WebGPU `GPUCommandEncoderDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucommandencoderdescriptor).
+pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
+
+pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
+/// View of a buffer which can be used to copy to/from a texture.
+///
+/// Corresponds to [WebGPU `GPUImageCopyBuffer`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopybuffer).
+pub type ImageCopyBuffer<'a> = ImageCopyBufferBase<&'a Buffer>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyBuffer<'_>: Send, Sync);
+
+pub use wgt::ImageCopyTexture as ImageCopyTextureBase;
+/// View of a texture which can be used to copy to/from a buffer/texture.
+///
+/// Corresponds to [WebGPU `GPUImageCopyTexture`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexture).
+pub type ImageCopyTexture<'a> = ImageCopyTextureBase<&'a Texture>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
+
+pub use wgt::ImageCopyTextureTagged as ImageCopyTextureTaggedBase;
+/// View of a texture which can be used to copy to a texture, including
+/// color space and alpha premultiplication information.
+///
+/// Corresponds to [WebGPU `GPUImageCopyTextureTagged`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexturetagged).
+pub type ImageCopyTextureTagged<'a> = ImageCopyTextureTaggedBase<&'a Texture>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
+
+impl CommandEncoder {
+    /// Finishes recording and returns a [`CommandBuffer`] that can be submitted for execution.
+    pub fn finish(mut self) -> CommandBuffer {
+        let (id, data) = DynContext::command_encoder_finish(
+            &*self.context,
+            self.id.take().unwrap(),
+            self.data.as_mut(),
+        );
+        CommandBuffer {
+            context: Arc::clone(&self.context),
+            id: Some(id),
+            data: Some(data),
+        }
+    }
+
+    /// Begins recording of a render pass.
+    ///
+    /// This function returns a [`RenderPass`] object which records a single render pass.
+    ///
+    /// As long as the returned  [`RenderPass`] has not ended,
+    /// any mutating operation on this command encoder causes an error and invalidates it.
+    /// Note that the `'encoder` lifetime relationship protects against this,
+    /// but it is possible to opt out of it by calling [`RenderPass::forget_lifetime`].
+    /// This can be useful for runtime handling of the encoder->pass
+    /// dependency e.g. when pass and encoder are stored in the same data structure.
+    pub fn begin_render_pass<'encoder>(
+        &'encoder mut self,
+        desc: &RenderPassDescriptor<'_>,
+    ) -> RenderPass<'encoder> {
+        let id = self.id.as_ref().unwrap();
+        let (id, data) = DynContext::command_encoder_begin_render_pass(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderPass {
+            inner: RenderPassInner {
+                id,
+                data,
+                context: self.context.clone(),
+            },
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Begins recording of a compute pass.
+    ///
+    /// This function returns a [`ComputePass`] object which records a single compute pass.
+    ///
+    /// As long as the returned  [`ComputePass`] has not ended,
+    /// any mutating operation on this command encoder causes an error and invalidates it.
+    /// Note that the `'encoder` lifetime relationship protects against this,
+    /// but it is possible to opt out of it by calling [`ComputePass::forget_lifetime`].
+    /// This can be useful for runtime handling of the encoder->pass
+    /// dependency e.g. when pass and encoder are stored in the same data structure.
+    pub fn begin_compute_pass<'encoder>(
+        &'encoder mut self,
+        desc: &ComputePassDescriptor<'_>,
+    ) -> ComputePass<'encoder> {
+        let id = self.id.as_ref().unwrap();
+        let (id, data) = DynContext::command_encoder_begin_compute_pass(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            desc,
+        );
+        ComputePass {
+            inner: ComputePassInner {
+                id,
+                data,
+                context: self.context.clone(),
+            },
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Copy data from one buffer to another.
+    ///
+    /// # Panics
+    ///
+    /// - Buffer offsets or copy size not a multiple of [`COPY_BUFFER_ALIGNMENT`].
+    /// - Copy would overrun buffer.
+    /// - Copy within the same buffer.
+    pub fn copy_buffer_to_buffer(
+        &mut self,
+        source: &Buffer,
+        source_offset: BufferAddress,
+        destination: &Buffer,
+        destination_offset: BufferAddress,
+        copy_size: BufferAddress,
+    ) {
+        DynContext::command_encoder_copy_buffer_to_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            &source.id,
+            source.data.as_ref(),
+            source_offset,
+            &destination.id,
+            destination.data.as_ref(),
+            destination_offset,
+            copy_size,
+        );
+    }
+
+    /// Copy data from a buffer to a texture.
+    pub fn copy_buffer_to_texture(
+        &mut self,
+        source: ImageCopyBuffer<'_>,
+        destination: ImageCopyTexture<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_buffer_to_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Copy data from a texture to a buffer.
+    pub fn copy_texture_to_buffer(
+        &mut self,
+        source: ImageCopyTexture<'_>,
+        destination: ImageCopyBuffer<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_texture_to_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Copy data from one texture to another.
+    ///
+    /// # Panics
+    ///
+    /// - Textures are not the same type
+    /// - If a depth texture, or a multisampled texture, the entire texture must be copied
+    /// - Copy would overrun either texture
+    pub fn copy_texture_to_texture(
+        &mut self,
+        source: ImageCopyTexture<'_>,
+        destination: ImageCopyTexture<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_texture_to_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Clears texture to zero.
+    ///
+    /// Note that unlike with clear_buffer, `COPY_DST` usage is not required.
+    ///
+    /// # Implementation notes
+    ///
+    /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages
+    /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized)
+    ///
+    /// # Panics
+    ///
+    /// - `CLEAR_TEXTURE` extension not enabled
+    /// - Range is out of bounds
+    pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
+        DynContext::command_encoder_clear_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            texture,
+            subresource_range,
+        );
+    }
+
+    /// Clears buffer to zero.
+    ///
+    /// # Panics
+    ///
+    /// - Buffer does not have `COPY_DST` usage.
+    /// - Range is out of bounds
+    pub fn clear_buffer(
+        &mut self,
+        buffer: &Buffer,
+        offset: BufferAddress,
+        size: Option<BufferAddress>,
+    ) {
+        DynContext::command_encoder_clear_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            buffer,
+            offset,
+            size,
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_insert_debug_marker(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_push_debug_group(&*self.context, id, self.data.as_ref(), label);
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_pop_debug_group(&*self.context, id, self.data.as_ref());
+    }
+
+    /// Resolves a query set, writing the results into the supplied destination buffer.
+    ///
+    /// Occlusion and timestamp queries are 8 bytes each (see [`crate::QUERY_SIZE`]). For pipeline statistics queries,
+    /// see [`PipelineStatisticsTypes`] for more information.
+    pub fn resolve_query_set(
+        &mut self,
+        query_set: &QuerySet,
+        query_range: Range<u32>,
+        destination: &Buffer,
+        destination_offset: BufferAddress,
+    ) {
+        DynContext::command_encoder_resolve_query_set(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_range.start,
+            query_range.end - query_range.start,
+            &destination.id,
+            destination.data.as_ref(),
+            destination_offset,
+        )
+    }
+
+    /// Returns the inner hal CommandEncoder using a callback. The hal command encoder will be `None` if the
+    /// backend type argument does not match with this wgpu CommandEncoder
+    ///
+    /// This method will start the wgpu_core level command recording.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal CommandEncoder must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal_mut<
+        A: wgc::hal_api::HalApi,
+        F: FnOnce(Option<&mut A::CommandEncoder>) -> R,
+        R,
+    >(
+        &mut self,
+        hal_command_encoder_callback: F,
+    ) -> Option<R> {
+        use wgc::id::CommandEncoderId;
+
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.command_encoder_as_hal_mut::<A, F, R>(
+                    CommandEncoderId::from(self.id.unwrap()),
+                    hal_command_encoder_callback,
+                )
+            })
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_ENCODERS`] must be enabled on the device in order to call these functions.
+impl CommandEncoder {
+    /// Issue a timestamp command at this point in the queue.
+    /// The timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    ///
+    /// Attention: Since commands within a command recorder may be reordered,
+    /// there is no strict guarantee that timestamps are taken after all commands
+    /// recorded so far and all before all commands recorded after.
+    /// This may depend both on the backend and the driver.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::command_encoder_write_timestamp(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs
new file mode 100644
index 0000000000..697507bca2
--- /dev/null
+++ b/wgpu/src/api/common_pipeline.rs
@@ -0,0 +1,64 @@
+use std::collections::HashMap;
+
+use crate::*;
+
+#[derive(Clone, Debug)]
+/// Advanced options for use when a pipeline is compiled
+///
+/// This implements `Default`, and for most users can be set to `Default::default()`
+pub struct PipelineCompilationOptions<'a> {
+    /// Specifies the values of pipeline-overridable constants in the shader module.
+    ///
+    /// If an `@id` attribute was specified on the declaration,
+    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
+    /// the key must be the constant's identifier name.
+    ///
+    /// The value may represent any of WGSL's concrete scalar types.
+    pub constants: &'a HashMap<String, f64>,
+    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
+    ///
+    /// This is required by the WebGPU spec, but may have overhead which can be avoided
+    /// for cross-platform applications
+    pub zero_initialize_workgroup_memory: bool,
+}
+
+impl<'a> Default for PipelineCompilationOptions<'a> {
+    fn default() -> Self {
+        // HashMap doesn't have a const constructor, due to the use of RandomState
+        // This does introduce some synchronisation costs, but these should be minor,
+        // and might be cheaper than the alternative of getting new random state
+        static DEFAULT_CONSTANTS: std::sync::OnceLock<HashMap<String, f64>> =
+            std::sync::OnceLock::new();
+        let constants = DEFAULT_CONSTANTS.get_or_init(Default::default);
+        Self {
+            constants,
+            zero_initialize_workgroup_memory: true,
+        }
+    }
+}
+
+/// Describes a pipeline cache, which allows reusing compilation work
+/// between program runs.
+///
+/// For use with [`Device::create_pipeline_cache`]
+///
+/// This type is unique to the Rust API of `wgpu`.
+#[derive(Clone, Debug)]
+pub struct PipelineCacheDescriptor<'a> {
+    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
+    pub label: Label<'a>,
+    /// The data used to initialise the cache initialise
+    ///
+    /// # Safety
+    ///
+    /// This data must have been provided from a previous call to
+    /// [`PipelineCache::get_data`], if not `None`
+    pub data: Option<&'a [u8]>,
+    /// Whether to create a cache without data when the provided data
+    /// is invalid.
+    ///
+    /// Recommended to set to true
+    pub fallback: bool,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/compute_pass.rs b/wgpu/src/api/compute_pass.rs
new file mode 100644
index 0000000000..30123b8052
--- /dev/null
+++ b/wgpu/src/api/compute_pass.rs
@@ -0,0 +1,256 @@
+use std::{marker::PhantomData, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// In-progress recording of a compute pass.
+///
+/// It can be created with [`CommandEncoder::begin_compute_pass`].
+///
+/// Corresponds to [WebGPU `GPUComputePassEncoder`](
+/// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
+#[derive(Debug)]
+pub struct ComputePass<'encoder> {
+    /// The inner data of the compute pass, separated out so it's easy to replace the lifetime with 'static if desired.
+    pub(crate) inner: ComputePassInner,
+
+    /// This lifetime is used to protect the [`CommandEncoder`] from being used
+    /// while the pass is alive.
+    pub(crate) encoder_guard: PhantomData<&'encoder ()>,
+}
+
+impl<'encoder> ComputePass<'encoder> {
+    /// Drops the lifetime relationship to the parent command encoder, making usage of
+    /// the encoder while this pass is recorded a run-time error instead.
+    ///
+    /// Attention: As long as the compute pass has not been ended, any mutating operation on the parent
+    /// command encoder will cause a run-time error and invalidate it!
+    /// By default, the lifetime constraint prevents this, but it can be useful
+    /// to handle this at run time, such as when storing the pass and encoder in the same
+    /// data structure.
+    ///
+    /// This operation has no effect on pass recording.
+    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
+    /// regardless of the lifetime constraint or its absence.
+    pub fn forget_lifetime(self) -> ComputePass<'static> {
+        ComputePass {
+            inner: self.inner,
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in the binding order.
+    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
+    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::compute_pass_set_bind_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        );
+    }
+
+    /// Sets the active compute pipeline.
+    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
+        DynContext::compute_pass_set_pipeline(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        DynContext::compute_pass_insert_debug_marker(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        DynContext::compute_pass_push_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        DynContext::compute_pass_pop_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+
+    /// Dispatches compute work operations.
+    ///
+    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
+    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
+        DynContext::compute_pass_dispatch_workgroups(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            z,
+        );
+    }
+
+    /// Dispatches compute work operations, based on the contents of the `indirect_buffer`.
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DispatchIndirectArgs`](crate::util::DispatchIndirectArgs).
+    pub fn dispatch_workgroups_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::compute_pass_dispatch_workgroups_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Set push constant data for subsequent dispatch calls.
+    ///
+    /// Write the bytes in `data` at offset `offset` within push constant
+    /// storage.  Both `offset` and the length of `data` must be
+    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
+    ///
+    /// For example, if `offset` is `4` and `data` is eight bytes long, this
+    /// call will write `data` to bytes `4..12` of push constant storage.
+    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
+        DynContext::compute_pass_set_push_constants(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            offset,
+            data,
+        );
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::compute_pass_write_timestamp(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
+
+/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Start a pipeline statistics query on this compute pass. It can be ended with
+    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::compute_pass_begin_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        );
+    }
+
+    /// End the pipeline statistics query on this compute pass. It can be started with
+    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn end_pipeline_statistics_query(&mut self) {
+        DynContext::compute_pass_end_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ComputePassInner {
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) context: Arc<C>,
+}
+
+impl Drop for ComputePassInner {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .compute_pass_end(&mut self.id, self.data.as_mut());
+        }
+    }
+}
+
+/// Describes the timestamp writes of a compute pass.
+///
+/// For use with [`ComputePassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPUComputePassTimestampWrites`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct ComputePassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePassTimestampWrites<'_>: Send, Sync);
+
+/// Describes the attachments of a compute pass.
+///
+/// For use with [`CommandEncoder::begin_compute_pass`].
+///
+/// Corresponds to [WebGPU `GPUComputePassDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
+#[derive(Clone, Default, Debug)]
+pub struct ComputePassDescriptor<'a> {
+    /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/compute_pipeline.rs b/wgpu/src/api/compute_pipeline.rs
new file mode 100644
index 0000000000..d226dd5500
--- /dev/null
+++ b/wgpu/src/api/compute_pipeline.rs
@@ -0,0 +1,76 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a compute pipeline.
+///
+/// A `ComputePipeline` object represents a compute pipeline and its single shader stage.
+/// It can be created with [`Device::create_compute_pipeline`].
+///
+/// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
+#[derive(Debug)]
+pub struct ComputePipeline {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
+
+impl ComputePipeline {
+    /// Returns a globally-unique identifier for this `ComputePipeline`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get an object representing the bind group layout at a given index.
+    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
+        let context = Arc::clone(&self.context);
+        let (id, data) = self.context.compute_pipeline_get_bind_group_layout(
+            &self.id,
+            self.data.as_ref(),
+            index,
+        );
+        BindGroupLayout { context, id, data }
+    }
+}
+
+impl Drop for ComputePipeline {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .compute_pipeline_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a compute pipeline.
+///
+/// For use with [`Device::create_compute_pipeline`].
+///
+/// Corresponds to [WebGPU `GPUComputePipelineDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepipelinedescriptor).
+#[derive(Clone, Debug)]
+pub struct ComputePipelineDescriptor<'a> {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<&'a PipelineLayout>,
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader. There must be a function with this name
+    /// and no return value in the shader.
+    pub entry_point: &'a str,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/device.rs b/wgpu/src/api/device.rs
new file mode 100644
index 0000000000..fff1cf1bb2
--- /dev/null
+++ b/wgpu/src/api/device.rs
@@ -0,0 +1,727 @@
+use std::{error, fmt, future::Future, sync::Arc, thread};
+
+use parking_lot::Mutex;
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Open connection to a graphics and/or compute device.
+///
+/// Responsible for the creation of most rendering and compute resources.
+/// These are then used in commands, which are submitted to a [`Queue`].
+///
+/// A device may be requested from an adapter with [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
+#[derive(Debug)]
+pub struct Device {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Device: Send, Sync);
+
+/// Describes a [`Device`].
+///
+/// For use with [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUDeviceDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpudevicedescriptor).
+pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(DeviceDescriptor<'_>: Send, Sync);
+
+impl Device {
+    /// Returns a globally-unique identifier for this `Device`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Check for resource cleanups and mapping callbacks. Will block if [`Maintain::Wait`] is passed.
+    ///
+    /// Return `true` if the queue is empty, or `false` if there are more queue
+    /// submissions still in flight. (Note that, unless access to the [`Queue`] is
+    /// coordinated somehow, this information could be out of date by the time
+    /// the caller receives it. `Queue`s can be shared between threads, so
+    /// other threads could submit new work at any time.)
+    ///
+    /// When running on WebGPU, this is a no-op. `Device`s are automatically polled.
+    pub fn poll(&self, maintain: Maintain) -> MaintainResult {
+        DynContext::device_poll(&*self.context, &self.id, self.data.as_ref(), maintain)
+    }
+
+    /// The features which can be used on this device.
+    ///
+    /// No additional features can be used, even if the underlying adapter can support them.
+    pub fn features(&self) -> Features {
+        DynContext::device_features(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// The limits which can be used on this device.
+    ///
+    /// No better limits can be used, even if the underlying adapter can support them.
+    pub fn limits(&self) -> Limits {
+        DynContext::device_limits(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Creates a shader module from either SPIR-V or WGSL source code.
+    ///
+    /// <div class="warning">
+    // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`!
+    // NOTE: Keep this in sync with `wgpu_core::Global::device_create_shader_module`!
+    ///
+    /// This function may consume a lot of stack space. Compiler-enforced limits for parsing
+    /// recursion exist; if shader compilation runs into them, it will return an error gracefully.
+    /// However, on some build profiles and platforms, the default stack size for a thread may be
+    /// exceeded before this limit is reached during parsing. Callers should ensure that there is
+    /// enough stack space for this, particularly if calls to this method are exposed to user
+    /// input.
+    ///
+    /// </div>
+    pub fn create_shader_module(&self, desc: ShaderModuleDescriptor<'_>) -> ShaderModule {
+        let (id, data) = DynContext::device_create_shader_module(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            wgt::ShaderBoundChecks::new(),
+        );
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a shader module from either SPIR-V or WGSL source code without runtime checks.
+    ///
+    /// # Safety
+    /// In contrast with [`create_shader_module`](Self::create_shader_module) this function
+    /// creates a shader module without runtime checks which allows shaders to perform
+    /// operations which can lead to undefined behavior like indexing out of bounds, thus it's
+    /// the caller responsibility to pass a shader which doesn't perform any of this
+    /// operations.
+    ///
+    /// This has no effect on web.
+    pub unsafe fn create_shader_module_unchecked(
+        &self,
+        desc: ShaderModuleDescriptor<'_>,
+    ) -> ShaderModule {
+        let (id, data) = DynContext::device_create_shader_module(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            unsafe { wgt::ShaderBoundChecks::unchecked() },
+        );
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a shader module from SPIR-V binary directly.
+    ///
+    /// # Safety
+    ///
+    /// This function passes binary data to the backend as-is and can potentially result in a
+    /// driver crash or bogus behaviour. No attempt is made to ensure that data is valid SPIR-V.
+    ///
+    /// See also [`include_spirv_raw!`] and [`util::make_spirv_raw`].
+    pub unsafe fn create_shader_module_spirv(
+        &self,
+        desc: &ShaderModuleDescriptorSpirV<'_>,
+    ) -> ShaderModule {
+        let (id, data) = unsafe {
+            DynContext::device_create_shader_module_spirv(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        };
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates an empty [`CommandEncoder`].
+    pub fn create_command_encoder(&self, desc: &CommandEncoderDescriptor<'_>) -> CommandEncoder {
+        let (id, data) = DynContext::device_create_command_encoder(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        CommandEncoder {
+            context: Arc::clone(&self.context),
+            id: Some(id),
+            data,
+        }
+    }
+
+    /// Creates an empty [`RenderBundleEncoder`].
+    pub fn create_render_bundle_encoder(
+        &self,
+        desc: &RenderBundleEncoderDescriptor<'_>,
+    ) -> RenderBundleEncoder<'_> {
+        let (id, data) = DynContext::device_create_render_bundle_encoder(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderBundleEncoder {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            parent: self,
+            _p: Default::default(),
+        }
+    }
+
+    /// Creates a new [`BindGroup`].
+    pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
+        let (id, data) = DynContext::device_create_bind_group(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        BindGroup {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`BindGroupLayout`].
+    pub fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor<'_>,
+    ) -> BindGroupLayout {
+        let (id, data) = DynContext::device_create_bind_group_layout(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        BindGroupLayout {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`PipelineLayout`].
+    pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
+        let (id, data) = DynContext::device_create_pipeline_layout(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        PipelineLayout {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`RenderPipeline`].
+    pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
+        let (id, data) = DynContext::device_create_render_pipeline(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderPipeline {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`ComputePipeline`].
+    pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
+        let (id, data) = DynContext::device_create_compute_pipeline(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        ComputePipeline {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`Buffer`].
+    pub fn create_buffer(&self, desc: &BufferDescriptor<'_>) -> Buffer {
+        let mut map_context = MapContext::new(desc.size);
+        if desc.mapped_at_creation {
+            map_context.initial_range = 0..desc.size;
+        }
+
+        let (id, data) =
+            DynContext::device_create_buffer(&*self.context, &self.id, self.data.as_ref(), desc);
+
+        Buffer {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            map_context: Mutex::new(map_context),
+            size: desc.size,
+            usage: desc.usage,
+        }
+    }
+
+    /// Creates a new [`Texture`].
+    ///
+    /// `desc` specifies the general format of the texture.
+    pub fn create_texture(&self, desc: &TextureDescriptor<'_>) -> Texture {
+        let (id, data) =
+            DynContext::device_create_texture(&*self.context, &self.id, self.data.as_ref(), desc);
+        Texture {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            owned: true,
+            descriptor: TextureDescriptor {
+                label: None,
+                view_formats: &[],
+                ..desc.clone()
+            },
+        }
+    }
+
+    /// Creates a [`Texture`] from a wgpu-hal Texture.
+    ///
+    /// # Safety
+    ///
+    /// - `hal_texture` must be created from this device internal handle
+    /// - `hal_texture` must be created respecting `desc`
+    /// - `hal_texture` must be initialized
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_texture_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_texture: A::Texture,
+        desc: &TextureDescriptor<'_>,
+    ) -> Texture {
+        let texture = unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the texture was generated from the same hal device.
+                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal textures.
+                .unwrap()
+                .create_texture_from_hal::<A>(
+                    hal_texture,
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    desc,
+                )
+        };
+        Texture {
+            context: Arc::clone(&self.context),
+            id: ObjectId::from(texture.id()),
+            data: Box::new(texture),
+            owned: true,
+            descriptor: TextureDescriptor {
+                label: None,
+                view_formats: &[],
+                ..desc.clone()
+            },
+        }
+    }
+
+    /// Creates a [`Buffer`] from a wgpu-hal Buffer.
+    ///
+    /// # Safety
+    ///
+    /// - `hal_buffer` must be created from this device internal handle
+    /// - `hal_buffer` must be created respecting `desc`
+    /// - `hal_buffer` must be initialized
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_buffer_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_buffer: A::Buffer,
+        desc: &BufferDescriptor<'_>,
+    ) -> Buffer {
+        let mut map_context = MapContext::new(desc.size);
+        if desc.mapped_at_creation {
+            map_context.initial_range = 0..desc.size;
+        }
+
+        let (id, buffer) = unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the buffer was generated from the same hal device.
+                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal buffers.
+                .unwrap()
+                .create_buffer_from_hal::<A>(
+                    hal_buffer,
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    desc,
+                )
+        };
+
+        Buffer {
+            context: Arc::clone(&self.context),
+            id: ObjectId::from(id),
+            data: Box::new(buffer),
+            map_context: Mutex::new(map_context),
+            size: desc.size,
+            usage: desc.usage,
+        }
+    }
+
+    /// Creates a new [`Sampler`].
+    ///
+    /// `desc` specifies the behavior of the sampler.
+    pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
+        let (id, data) =
+            DynContext::device_create_sampler(&*self.context, &self.id, self.data.as_ref(), desc);
+        Sampler {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a new [`QuerySet`].
+    pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
+        let (id, data) =
+            DynContext::device_create_query_set(&*self.context, &self.id, self.data.as_ref(), desc);
+        QuerySet {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Set a callback for errors that are not handled in error scopes.
+    pub fn on_uncaptured_error(&self, handler: Box<dyn UncapturedErrorHandler>) {
+        self.context
+            .device_on_uncaptured_error(&self.id, self.data.as_ref(), handler);
+    }
+
+    /// Push an error scope.
+    pub fn push_error_scope(&self, filter: ErrorFilter) {
+        self.context
+            .device_push_error_scope(&self.id, self.data.as_ref(), filter);
+    }
+
+    /// Pop an error scope.
+    pub fn pop_error_scope(&self) -> impl Future<Output = Option<Error>> + WasmNotSend {
+        self.context
+            .device_pop_error_scope(&self.id, self.data.as_ref())
+    }
+
+    /// Starts frame capture.
+    pub fn start_capture(&self) {
+        DynContext::device_start_capture(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Stops frame capture.
+    pub fn stop_capture(&self) {
+        DynContext::device_stop_capture(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Query internal counters from the native backend for debugging purposes.
+    ///
+    /// Some backends may not set all counters, or may not set any counter at all.
+    /// The `counters` cargo feature must be enabled for any counter to be set.
+    ///
+    /// If a counter is not set, its contains its default value (zero).
+    pub fn get_internal_counters(&self) -> wgt::InternalCounters {
+        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Generate an GPU memory allocation report if the underlying backend supports it.
+    ///
+    /// Backends that do not support producing these reports return `None`. A backend may
+    /// Support it and still return `None` if it is not using performing sub-allocation,
+    /// for example as a workaround for driver issues.
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Apply a callback to this `Device`'s underlying backend device.
+    ///
+    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,
+    /// Dx12, etc.), then apply `hal_device_callback` to `Some(&device)`, where
+    /// `device` is the underlying backend device type, [`A::Device`].
+    ///
+    /// If this `Device` uses a different backend, apply `hal_device_callback`
+    /// to `None`.
+    ///
+    /// The device is locked for reading while `hal_device_callback` runs. If
+    /// the callback attempts to perform any `wgpu` operations that require
+    /// write access to the device (destroying a buffer, say), deadlock will
+    /// occur. The locks are automatically released when the callback returns.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle passed to the callback must not be manually destroyed.
+    ///
+    /// [`A::Device`]: hal::Api::Device
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Device>) -> R, R>(
+        &self,
+        hal_device_callback: F,
+    ) -> Option<R> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.device_as_hal::<A, F, R>(
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    hal_device_callback,
+                )
+            })
+    }
+
+    /// Destroy this device.
+    pub fn destroy(&self) {
+        DynContext::device_destroy(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Set a DeviceLostCallback on this device.
+    pub fn set_device_lost_callback(
+        &self,
+        callback: impl Fn(DeviceLostReason, String) + Send + 'static,
+    ) {
+        DynContext::device_set_device_lost_callback(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            Box::new(callback),
+        )
+    }
+
+    /// Test-only function to make this device invalid.
+    #[doc(hidden)]
+    pub fn make_invalid(&self) {
+        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Create a [`PipelineCache`] with initial data
+    ///
+    /// This can be passed to [`Device::create_compute_pipeline`]
+    /// and [`Device::create_render_pipeline`] to either accelerate these
+    /// or add the cache results from those.
+    ///
+    /// # Safety
+    ///
+    /// If the `data` field of `desc` is set, it must have previously been returned from a call
+    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
+    /// from an adapter with the same [`util::pipeline_cache_key`].
+    /// This *is* compatible across wgpu versions, as any data format change will
+    /// be accounted for.
+    ///
+    /// It is *not* supported to bring caches from previous direct uses of backend APIs
+    /// into this method.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error value if:
+    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
+    ///  * this device is invalid; or
+    ///  * the device is out of memory
+    ///
+    /// This method also returns an error value if:
+    ///  * The `fallback` field on `desc` is false; and
+    ///  * the `data` provided would not be used[^data_not_used]
+    ///
+    /// If an error value is used in subsequent calls, default caching will be used.
+    ///
+    /// [^saving]: We do recognise that saving this data to disk means this condition
+    /// is impossible to fully prove. Consider the risks for your own application in this case.
+    ///
+    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
+    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
+    /// update. In some cases, the data might not be used and a real value is returned,
+    /// this is left to the discretion of GPU drivers.
+    pub unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> PipelineCache {
+        let (id, data) = unsafe {
+            DynContext::device_create_pipeline_cache(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        };
+        PipelineCache {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+}
+
+impl Drop for Device {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.device_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Requesting a device from an [`Adapter`] failed.
+#[derive(Clone, Debug)]
+pub struct RequestDeviceError {
+    pub(crate) inner: RequestDeviceErrorKind,
+}
+#[derive(Clone, Debug)]
+pub(crate) enum RequestDeviceErrorKind {
+    /// Error from [`wgpu_core`].
+    // must match dependency cfg
+    #[cfg(wgpu_core)]
+    Core(wgc::instance::RequestDeviceError),
+
+    /// Error from web API that was called by `wgpu` to request a device.
+    ///
+    /// (This is currently never used by the webgl backend, but it could be.)
+    #[cfg(webgpu)]
+    WebGpu(wasm_bindgen::JsValue),
+}
+
+#[cfg(send_sync)]
+unsafe impl Send for RequestDeviceErrorKind {}
+#[cfg(send_sync)]
+unsafe impl Sync for RequestDeviceErrorKind {}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
+
+impl fmt::Display for RequestDeviceError {
+    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            RequestDeviceErrorKind::Core(error) => error.fmt(_f),
+            #[cfg(webgpu)]
+            RequestDeviceErrorKind::WebGpu(error_js_value) => {
+                // wasm-bindgen provides a reasonable error stringification via `Debug` impl
+                write!(_f, "{error_js_value:?}")
+            }
+            #[cfg(not(any(webgpu, wgpu_core)))]
+            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
+        }
+    }
+}
+
+impl error::Error for RequestDeviceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            RequestDeviceErrorKind::Core(error) => error.source(),
+            #[cfg(webgpu)]
+            RequestDeviceErrorKind::WebGpu(_) => None,
+            #[cfg(not(any(webgpu, wgpu_core)))]
+            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
+        }
+    }
+}
+
+#[cfg(wgpu_core)]
+impl From<wgc::instance::RequestDeviceError> for RequestDeviceError {
+    fn from(error: wgc::instance::RequestDeviceError) -> Self {
+        Self {
+            inner: RequestDeviceErrorKind::Core(error),
+        }
+    }
+}
+
+/// Type for the callback of uncaptured error handler
+pub trait UncapturedErrorHandler: Fn(Error) + Send + 'static {}
+impl<T> UncapturedErrorHandler for T where T: Fn(Error) + Send + 'static {}
+
+/// Filter for error scopes.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
+pub enum ErrorFilter {
+    /// Catch only out-of-memory errors.
+    OutOfMemory,
+    /// Catch only validation errors.
+    Validation,
+    /// Catch only internal errors.
+    Internal,
+}
+static_assertions::assert_impl_all!(ErrorFilter: Send, Sync);
+
+/// Error type
+#[derive(Debug)]
+pub enum Error {
+    /// Out of memory error
+    OutOfMemory {
+        /// Lower level source of the error.
+        #[cfg(send_sync)]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + Send + Sync + 'static>,
+        /// Lower level source of the error.
+        #[cfg(not(send_sync))]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + 'static>,
+    },
+    /// Validation error, signifying a bug in code or data
+    Validation {
+        /// Lower level source of the error.
+        #[cfg(send_sync)]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + Send + Sync + 'static>,
+        /// Lower level source of the error.
+        #[cfg(not(send_sync))]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + 'static>,
+        /// Description of the validation error.
+        description: String,
+    },
+    /// Internal error. Used for signalling any failures not explicitly expected by WebGPU.
+    ///
+    /// These could be due to internal implementation or system limits being reached.
+    Internal {
+        /// Lower level source of the error.
+        #[cfg(send_sync)]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + Send + Sync + 'static>,
+        /// Lower level source of the error.
+        #[cfg(not(send_sync))]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + 'static>,
+        /// Description of the internal GPU error.
+        description: String,
+    },
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Error: Send, Sync);
+
+impl error::Error for Error {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match self {
+            Error::OutOfMemory { source } => Some(source.as_ref()),
+            Error::Validation { source, .. } => Some(source.as_ref()),
+            Error::Internal { source, .. } => Some(source.as_ref()),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
+            Error::Validation { description, .. } => f.write_str(description),
+            Error::Internal { description, .. } => f.write_str(description),
+        }
+    }
+}
diff --git a/wgpu/src/api/id.rs b/wgpu/src/api/id.rs
new file mode 100644
index 0000000000..d9041883b2
--- /dev/null
+++ b/wgpu/src/api/id.rs
@@ -0,0 +1,67 @@
+use std::{cmp::Ordering, fmt, marker::PhantomData, num::NonZeroU64};
+
+use crate::context::ObjectId;
+
+/// Opaque globally-unique identifier
+#[repr(transparent)]
+pub struct Id<T>(NonZeroU64, PhantomData<*mut T>);
+
+impl<T> Id<T> {
+    /// Create a new `Id` from a ObjectID.
+    pub(crate) fn new(id: ObjectId) -> Self {
+        Id(id.global_id(), PhantomData)
+    }
+
+    /// For testing use only. We provide no guarantees about the actual value of the ids.
+    #[doc(hidden)]
+    pub fn inner(&self) -> u64 {
+        self.0.get()
+    }
+}
+
+// SAFETY: `Id` is a bare `NonZeroU64`, the type parameter is a marker purely to avoid confusing Ids
+// returned for different types , so `Id` can safely implement Send and Sync.
+unsafe impl<T> Send for Id<T> {}
+
+// SAFETY: See the implementation for `Send`.
+unsafe impl<T> Sync for Id<T> {}
+
+impl<T> Clone for Id<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T> Copy for Id<T> {}
+
+impl<T> fmt::Debug for Id<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("Id").field(&self.0).finish()
+    }
+}
+
+impl<T> PartialEq for Id<T> {
+    fn eq(&self, other: &Id<T>) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T> Eq for Id<T> {}
+
+impl<T> PartialOrd for Id<T> {
+    fn partial_cmp(&self, other: &Id<T>) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<T> Ord for Id<T> {
+    fn cmp(&self, other: &Id<T>) -> Ordering {
+        self.0.cmp(&other.0)
+    }
+}
+
+impl<T> std::hash::Hash for Id<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.0.hash(state)
+    }
+}
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
new file mode 100644
index 0000000000..26d8b863b1
--- /dev/null
+++ b/wgpu/src/api/instance.rs
@@ -0,0 +1,400 @@
+use parking_lot::Mutex;
+
+use crate::*;
+
+use std::{future::Future, sync::Arc};
+
+/// Context for all other wgpu objects. Instance of wgpu.
+///
+/// This is the first thing you create when using wgpu.
+/// Its primary use is to create [`Adapter`]s and [`Surface`]s.
+///
+/// Does not have to be kept alive.
+///
+/// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
+#[derive(Debug)]
+pub struct Instance {
+    context: Arc<C>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Instance: Send, Sync);
+
+impl Default for Instance {
+    /// Creates a new instance of wgpu with default options.
+    ///
+    /// Backends are set to `Backends::all()`, and FXC is chosen as the `dx12_shader_compiler`.
+    ///
+    /// # Panics
+    ///
+    /// If no backend feature for the active target platform is enabled,
+    /// this method will panic, see [`Instance::enabled_backend_features()`].
+    fn default() -> Self {
+        Self::new(InstanceDescriptor::default())
+    }
+}
+
+impl Instance {
+    /// Returns which backends can be picked for the current build configuration.
+    ///
+    /// The returned set depends on a combination of target platform and enabled features.
+    /// This does *not* do any runtime checks and is exclusively based on compile time information.
+    ///
+    /// `InstanceDescriptor::backends` does not need to be a subset of this,
+    /// but any backend that is not in this set, will not be picked.
+    ///
+    /// TODO: Right now it's otherwise not possible yet to opt-out of all features on some platforms.
+    /// See <https://github.com/gfx-rs/wgpu/issues/3514>
+    /// * Windows/Linux/Android: always enables Vulkan and GLES with no way to opt out
+    pub const fn enabled_backend_features() -> Backends {
+        let mut backends = Backends::empty();
+
+        if cfg!(native) {
+            if cfg!(metal) {
+                backends = backends.union(Backends::METAL);
+            }
+            if cfg!(dx12) {
+                backends = backends.union(Backends::DX12);
+            }
+
+            // Windows, Android, Linux currently always enable Vulkan and OpenGL.
+            // See <https://github.com/gfx-rs/wgpu/issues/3514>
+            if cfg!(target_os = "windows") || cfg!(unix) {
+                backends = backends.union(Backends::VULKAN).union(Backends::GL);
+            }
+
+            // Vulkan on Mac/iOS is only available through vulkan-portability.
+            if (cfg!(target_os = "ios") || cfg!(target_os = "macos"))
+                && cfg!(feature = "vulkan-portability")
+            {
+                backends = backends.union(Backends::VULKAN);
+            }
+
+            // GL on Mac is only available through angle.
+            if cfg!(target_os = "macos") && cfg!(feature = "angle") {
+                backends = backends.union(Backends::GL);
+            }
+        } else {
+            if cfg!(webgpu) {
+                backends = backends.union(Backends::BROWSER_WEBGPU);
+            }
+            if cfg!(webgl) {
+                backends = backends.union(Backends::GL);
+            }
+        }
+
+        backends
+    }
+
+    /// Create an new instance of wgpu.
+    ///
+    /// # Arguments
+    ///
+    /// - `instance_desc` - Has fields for which [backends][Backends] wgpu will choose
+    ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
+    ///
+    ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
+    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
+    ///   WebGPU adapters. If you instead want to force use of WebGL, either
+    ///   disable the `webgpu` compile-time feature or do add the [`Backends::BROWSER_WEBGPU`]
+    ///   flag to the the `instance_desc`'s `backends` field.
+    ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
+    ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
+    ///   a WebGL adapter.
+    ///
+    /// # Panics
+    ///
+    /// If no backend feature for the active target platform is enabled,
+    /// this method will panic, see [`Instance::enabled_backend_features()`].
+    #[allow(unreachable_code)]
+    pub fn new(_instance_desc: InstanceDescriptor) -> Self {
+        if Self::enabled_backend_features().is_empty() {
+            panic!(
+                "No wgpu backend feature that is implemented for the target platform was enabled. \
+                 See `wgpu::Instance::enabled_backend_features()` for more information."
+            );
+        }
+
+        #[cfg(webgpu)]
+        {
+            let is_only_available_backend = !cfg!(wgpu_core);
+            let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU);
+            let support_webgpu =
+                crate::backend::get_browser_gpu_property().map_or(false, |gpu| !gpu.is_undefined());
+
+            if is_only_available_backend || (requested_webgpu && support_webgpu) {
+                return Self {
+                    context: Arc::from(crate::backend::ContextWebGpu::init(_instance_desc)),
+                };
+            }
+        }
+
+        #[cfg(wgpu_core)]
+        {
+            return Self {
+                context: Arc::from(crate::backend::ContextWgpuCore::init(_instance_desc)),
+            };
+        }
+
+        unreachable!(
+            "Earlier check of `enabled_backend_features` should have prevented getting here!"
+        );
+    }
+
+    /// Create an new instance of wgpu from a wgpu-hal instance.
+    ///
+    /// # Arguments
+    ///
+    /// - `hal_instance` - wgpu-hal instance.
+    ///
+    /// # Safety
+    ///
+    /// Refer to the creation of wgpu-hal Instance for every backend.
+    #[cfg(wgpu_core)]
+    pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
+        Self {
+            context: Arc::new(unsafe {
+                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance)
+            }),
+        }
+    }
+
+    /// Return a reference to a specific backend instance, if available.
+    ///
+    /// If this `Instance` has a wgpu-hal [`Instance`] for backend
+    /// `A`, return a reference to it. Otherwise, return `None`.
+    ///
+    /// # Safety
+    ///
+    /// - The raw instance handle returned must not be manually destroyed.
+    ///
+    /// [`Instance`]: hal::Api::Instance
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi>(&self) -> Option<&A::Instance> {
+        self.context
+            .as_any()
+            // If we don't have a wgpu-core instance, we don't have a hal instance either.
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .and_then(|ctx| unsafe { ctx.instance_as_hal::<A>() })
+    }
+
+    /// Create an new instance of wgpu from a wgpu-core instance.
+    ///
+    /// # Arguments
+    ///
+    /// - `core_instance` - wgpu-core instance.
+    ///
+    /// # Safety
+    ///
+    /// Refer to the creation of wgpu-core Instance.
+    #[cfg(wgpu_core)]
+    pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
+        Self {
+            context: Arc::new(unsafe {
+                crate::backend::ContextWgpuCore::from_core_instance(core_instance)
+            }),
+        }
+    }
+
+    /// Retrieves all available [`Adapter`]s that match the given [`Backends`].
+    ///
+    /// # Arguments
+    ///
+    /// - `backends` - Backends from which to enumerate adapters.
+    #[cfg(native)]
+    pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
+        use crate::context::ObjectId;
+
+        let context = Arc::clone(&self.context);
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| {
+                ctx.enumerate_adapters(backends)
+                    .into_iter()
+                    .map(move |id| crate::Adapter {
+                        context: Arc::clone(&context),
+                        id: ObjectId::from(id),
+                        data: Box::new(()),
+                    })
+                    .collect()
+            })
+            .unwrap()
+    }
+
+    /// Retrieves an [`Adapter`] which matches the given [`RequestAdapterOptions`].
+    ///
+    /// Some options are "soft", so treated as non-mandatory. Others are "hard".
+    ///
+    /// If no adapters are found that suffice all the "hard" options, `None` is returned.
+    ///
+    /// A `compatible_surface` is required when targeting WebGL2.
+    pub fn request_adapter(
+        &self,
+        options: &RequestAdapterOptions<'_, '_>,
+    ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
+        let context = Arc::clone(&self.context);
+        let adapter = self.context.instance_request_adapter(options);
+        async move {
+            adapter
+                .await
+                .map(|(id, data)| Adapter { context, id, data })
+        }
+    }
+
+    /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
+    ///
+    /// # Safety
+    ///
+    /// `hal_adapter` must be created from this instance internal handle.
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_adapter_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_adapter: hal::ExposedAdapter<A>,
+    ) -> Adapter {
+        let context = Arc::clone(&self.context);
+        let id = unsafe {
+            context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                .unwrap()
+                .create_adapter_from_hal(hal_adapter)
+                .into()
+        };
+        Adapter {
+            context,
+            id,
+            data: Box::new(()),
+        }
+    }
+
+    /// Creates a new surface targeting a given window/canvas/surface/etc..
+    ///
+    /// Internally, this creates surfaces for all backends that are enabled for this instance.
+    ///
+    /// See [`SurfaceTarget`] for what targets are supported.
+    /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants.
+    ///
+    /// Most commonly used are window handles (or provider of windows handles)
+    /// which can be passed directly as they're automatically converted to [`SurfaceTarget`].
+    pub fn create_surface<'window>(
+        &self,
+        target: impl Into<SurfaceTarget<'window>>,
+    ) -> Result<Surface<'window>, CreateSurfaceError> {
+        // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window.
+        let handle_source;
+
+        let target = target.into();
+        let mut surface = match target {
+            SurfaceTarget::Window(window) => unsafe {
+                let surface = self.create_surface_unsafe(
+                    SurfaceTargetUnsafe::from_window(&window).map_err(|e| CreateSurfaceError {
+                        inner: CreateSurfaceErrorKind::RawHandle(e),
+                    })?,
+                );
+                handle_source = Some(window);
+
+                surface
+            }?,
+
+            #[cfg(any(webgpu, webgl))]
+            SurfaceTarget::Canvas(canvas) => {
+                handle_source = None;
+
+                let value: &wasm_bindgen::JsValue = &canvas;
+                let obj = std::ptr::NonNull::from(value).cast();
+                let raw_window_handle = raw_window_handle::WebCanvasWindowHandle::new(obj).into();
+                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
+
+                // Note that we need to call this while we still have `value` around.
+                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
+                unsafe {
+                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
+                        raw_display_handle,
+                        raw_window_handle,
+                    })
+                }?
+            }
+
+            #[cfg(any(webgpu, webgl))]
+            SurfaceTarget::OffscreenCanvas(canvas) => {
+                handle_source = None;
+
+                let value: &wasm_bindgen::JsValue = &canvas;
+                let obj = std::ptr::NonNull::from(value).cast();
+                let raw_window_handle =
+                    raw_window_handle::WebOffscreenCanvasWindowHandle::new(obj).into();
+                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
+
+                // Note that we need to call this while we still have `value` around.
+                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
+                unsafe {
+                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
+                        raw_display_handle,
+                        raw_window_handle,
+                    })
+                }?
+            }
+        };
+
+        surface._handle_source = handle_source;
+
+        Ok(surface)
+    }
+
+    /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target.
+    ///
+    /// Internally, this creates surfaces for all backends that are enabled for this instance.
+    ///
+    /// See [`SurfaceTargetUnsafe`] for what targets are supported.
+    /// See [`Instance::create_surface`] for surface creation with safe target variants.
+    ///
+    /// # Safety
+    ///
+    /// - See respective [`SurfaceTargetUnsafe`] variants for safety requirements.
+    pub unsafe fn create_surface_unsafe<'window>(
+        &self,
+        target: SurfaceTargetUnsafe,
+    ) -> Result<Surface<'window>, CreateSurfaceError> {
+        let (id, data) = unsafe { self.context.instance_create_surface(target) }?;
+
+        Ok(Surface {
+            context: Arc::clone(&self.context),
+            _handle_source: None,
+            id,
+            surface_data: data,
+            config: Mutex::new(None),
+        })
+    }
+
+    /// Polls all devices.
+    ///
+    /// If `force_wait` is true and this is not running on the web, then this
+    /// function will block until all in-flight buffers have been mapped and
+    /// all submitted commands have finished execution.
+    ///
+    /// Return `true` if all devices' queues are empty, or `false` if there are
+    /// queue submissions still in flight. (Note that, unless access to all
+    /// [`Queue`s] associated with this [`Instance`] is coordinated somehow,
+    /// this information could be out of date by the time the caller receives
+    /// it. `Queue`s can be shared between threads, and other threads could
+    /// submit new work at any time.)
+    ///
+    /// On the web, this is a no-op. `Device`s are automatically polled.
+    ///
+    /// [`Queue`s]: Queue
+    pub fn poll_all(&self, force_wait: bool) -> bool {
+        self.context.instance_poll_all_devices(force_wait)
+    }
+
+    /// Generates memory report.
+    ///
+    /// Returns `None` if the feature is not supported by the backend
+    /// which happens only when WebGPU is pre-selected by the instance creation.
+    #[cfg(wgpu_core)]
+    pub fn generate_report(&self) -> Option<wgc::global::GlobalReport> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| ctx.generate_report())
+    }
+}
diff --git a/wgpu/src/api/mod.rs b/wgpu/src/api/mod.rs
new file mode 100644
index 0000000000..819f6847cf
--- /dev/null
+++ b/wgpu/src/api/mod.rs
@@ -0,0 +1,80 @@
+//! Types and functions which define our public api and their
+//! helper functionality.
+//!
+//! # Conventions
+//!
+//! Each major type gets its own module. The module is laid out as follows:
+//!
+//! - The type itself
+//! - `impl` block for the type
+//! - `Drop` implementation for the type (if needed)
+//! - Descriptor types and their subtypes.
+//! - Any non-public helper types or functions.
+//!
+//! # Imports
+//!
+//! Because our public api is "flat" (i.e. all types are directly under the `wgpu` module),
+//! we use a single `crate::*` import at the top of each module to bring in all the types in
+//! the public api. This is done to:
+//! - Avoid having to write out a long list of imports for each module.
+//! - Allow docs to be written naturally, without needing to worry about needing dedicated doc imports.
+//! - Treat wgpu-types types and wgpu-core types as a single set.
+//!
+
+mod adapter;
+mod bind_group;
+mod bind_group_layout;
+mod buffer;
+mod command_buffer;
+mod command_encoder;
+// Not a root type, but common descriptor types for pipelines.
+mod common_pipeline;
+mod compute_pass;
+mod compute_pipeline;
+mod device;
+mod id;
+mod instance;
+mod pipeline_cache;
+mod pipeline_layout;
+mod query_set;
+mod queue;
+mod render_bundle;
+mod render_bundle_encoder;
+mod render_pass;
+mod render_pipeline;
+mod sampler;
+mod shader_module;
+mod surface;
+mod surface_texture;
+mod texture;
+mod texture_view;
+
+pub use adapter::*;
+pub use bind_group::*;
+pub use bind_group_layout::*;
+pub use buffer::*;
+pub use command_buffer::*;
+pub use command_encoder::*;
+pub use common_pipeline::*;
+pub use compute_pass::*;
+pub use compute_pipeline::*;
+pub use device::*;
+pub use id::*;
+pub use instance::*;
+pub use pipeline_cache::*;
+pub use pipeline_layout::*;
+pub use query_set::*;
+pub use queue::*;
+pub use render_bundle::*;
+pub use render_bundle_encoder::*;
+pub use render_pass::*;
+pub use render_pipeline::*;
+pub use sampler::*;
+pub use shader_module::*;
+pub use surface::*;
+pub use surface_texture::*;
+pub use texture::*;
+pub use texture_view::*;
+
+/// Object debugging label.
+pub type Label<'a> = Option<&'a str>;
diff --git a/wgpu/src/api/pipeline_cache.rs b/wgpu/src/api/pipeline_cache.rs
new file mode 100644
index 0000000000..42ab15b8ba
--- /dev/null
+++ b/wgpu/src/api/pipeline_cache.rs
@@ -0,0 +1,98 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a pipeline cache, which is used to accelerate
+/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
+/// in subsequent executions
+///
+/// This reuse is only applicable for the same or similar devices.
+/// See [`util::pipeline_cache_key`] for some details.
+///
+/// # Background
+///
+/// In most GPU drivers, shader code must be converted into a machine code
+/// which can be executed on the GPU.
+/// Generating this machine code can require a lot of computation.
+/// Pipeline caches allow this computation to be reused between executions
+/// of the program.
+/// This can be very useful for reducing program startup time.
+///
+/// Note that most desktop GPU drivers will manage their own caches,
+/// meaning that little advantage can be gained from this on those platforms.
+/// However, on some platforms, especially Android, drivers leave this to the
+/// application to implement.
+///
+/// Unfortunately, drivers do not expose whether they manage their own caches.
+/// Some reasonable policies for applications to use are:
+/// - Manage their own pipeline cache on all platforms
+/// - Only manage pipeline caches on Android
+///
+/// # Usage
+///
+/// It is valid to use this resource when creating multiple pipelines, in
+/// which case it will likely cache each of those pipelines.
+/// It is also valid to create a new cache for each pipeline.
+///
+/// This resource is most useful when the data produced from it (using
+/// [`PipelineCache::get_data`]) is persisted.
+/// Care should be taken that pipeline caches are only used for the same device,
+/// as pipeline caches from compatible devices are unlikely to provide any advantage.
+/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
+///
+/// It is recommended to store pipeline caches atomically. If persisting to disk,
+/// this can usually be achieved by creating a temporary file, then moving/[renaming]
+/// the temporary file over the existing cache
+///
+/// # Storage Usage
+///
+/// There is not currently an API available to reduce the size of a cache.
+/// This is due to limitations in the underlying graphics APIs used.
+/// This is especially impactful if your application is being updated, so
+/// previous caches are no longer being used.
+///
+/// One option to work around this is to regenerate the cache.
+/// That is, creating the pipelines which your program runs using
+/// with the stored cached data, then recreating the *same* pipelines
+/// using a new cache, which your application then store.
+///
+/// # Implementations
+///
+/// This resource currently only works on the following backends:
+///  - Vulkan
+///
+/// This type is unique to the Rust API of `wgpu`.
+///
+/// [renaming]: std::fs::rename
+#[derive(Debug)]
+pub struct PipelineCache {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
+
+impl PipelineCache {
+    /// Get the data associated with this pipeline cache.
+    /// The data format is an implementation detail of `wgpu`.
+    /// The only defined operation on this data setting it as the `data` field
+    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
+    ///
+    /// This function is unique to the Rust API of `wgpu`.
+    pub fn get_data(&self) -> Option<Vec<u8>> {
+        self.context
+            .pipeline_cache_get_data(&self.id, self.data.as_ref())
+    }
+}
+
+impl Drop for PipelineCache {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_cache_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
diff --git a/wgpu/src/api/pipeline_layout.rs b/wgpu/src/api/pipeline_layout.rs
new file mode 100644
index 0000000000..f47ea1a174
--- /dev/null
+++ b/wgpu/src/api/pipeline_layout.rs
@@ -0,0 +1,61 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a pipeline layout.
+///
+/// A `PipelineLayout` object describes the available binding groups of a pipeline.
+/// It can be created with [`Device::create_pipeline_layout`].
+///
+/// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
+#[derive(Debug)]
+pub struct PipelineLayout {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
+
+impl PipelineLayout {
+    /// Returns a globally-unique identifier for this `PipelineLayout`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for PipelineLayout {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_layout_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`PipelineLayout`].
+///
+/// For use with [`Device::create_pipeline_layout`].
+///
+/// Corresponds to [WebGPU `GPUPipelineLayoutDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpupipelinelayoutdescriptor).
+#[derive(Clone, Debug, Default)]
+pub struct PipelineLayoutDescriptor<'a> {
+    /// Debug label of the pipeline layout. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
+    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
+    pub bind_group_layouts: &'a [&'a BindGroupLayout],
+    /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants
+    /// must define the range in push constant memory that corresponds to its single `layout(push_constant)`
+    /// uniform block.
+    ///
+    /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled.
+    pub push_constant_ranges: &'a [PushConstantRange],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineLayoutDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/query_set.rs b/wgpu/src/api/query_set.rs
new file mode 100644
index 0000000000..41c262bd98
--- /dev/null
+++ b/wgpu/src/api/query_set.rs
@@ -0,0 +1,46 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a query set.
+///
+/// It can be created with [`Device::create_query_set`].
+///
+/// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
+#[derive(Debug)]
+pub struct QuerySet {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(QuerySet: Send, Sync);
+
+impl QuerySet {
+    /// Returns a globally-unique identifier for this `QuerySet`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for QuerySet {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.query_set_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`QuerySet`].
+///
+/// For use with [`Device::create_query_set`].
+///
+/// Corresponds to [WebGPU `GPUQuerySetDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor).
+pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(QuerySetDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
new file mode 100644
index 0000000000..c675f9f926
--- /dev/null
+++ b/wgpu/src/api/queue.rs
@@ -0,0 +1,300 @@
+use std::{
+    ops::{Deref, DerefMut},
+    sync::Arc,
+    thread,
+};
+
+use crate::context::{DynContext, ObjectId, QueueWriteBuffer};
+use crate::*;
+
+/// Handle to a command queue on a device.
+///
+/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
+/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
+/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
+#[derive(Debug)]
+pub struct Queue {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Queue: Send, Sync);
+
+impl Drop for Queue {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.queue_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Identifier for a particular call to [`Queue::submit`]. Can be used
+/// as part of an argument to [`Device::poll`] to block for a particular
+/// submission to finish.
+///
+/// This type is unique to the Rust API of `wgpu`.
+/// There is no analogue in the WebGPU specification.
+#[derive(Debug, Clone)]
+pub struct SubmissionIndex(pub(crate) Arc<crate::Data>);
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
+
+pub use wgt::Maintain as MaintainBase;
+/// Passed to [`Device::poll`] to control how and if it should block.
+pub type Maintain = wgt::Maintain<SubmissionIndex>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Maintain: Send, Sync);
+
+/// A write-only view into a staging buffer.
+///
+/// Reading into this buffer won't yield the contents of the buffer from the
+/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
+/// implemented for this type, [`AsRef`] is not.
+pub struct QueueWriteBufferView<'a> {
+    queue: &'a Queue,
+    buffer: &'a Buffer,
+    offset: BufferAddress,
+    inner: Box<dyn QueueWriteBuffer>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
+
+impl Deref for QueueWriteBufferView<'_> {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
+        self.inner.slice()
+    }
+}
+
+impl DerefMut for QueueWriteBufferView<'_> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.inner.slice_mut()
+    }
+}
+
+impl<'a> AsMut<[u8]> for QueueWriteBufferView<'a> {
+    fn as_mut(&mut self) -> &mut [u8] {
+        self.inner.slice_mut()
+    }
+}
+
+impl<'a> Drop for QueueWriteBufferView<'a> {
+    fn drop(&mut self) {
+        DynContext::queue_write_staging_buffer(
+            &*self.queue.context,
+            &self.queue.id,
+            self.queue.data.as_ref(),
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset,
+            &*self.inner,
+        );
+    }
+}
+
+impl Queue {
+    /// Schedule a data write into `buffer` starting at `offset`.
+    ///
+    /// This method fails if `data` overruns the size of `buffer` starting at `offset`.
+    ///
+    /// This does *not* submit the transfer to the GPU immediately. Calls to
+    /// `write_buffer` begin execution only on the next call to
+    /// [`Queue::submit`]. To get a set of scheduled transfers started
+    /// immediately, it's fine to call `submit` with no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// However, `data` will be immediately copied into staging memory, so the
+    /// caller may discard it any time after this call completes.
+    ///
+    /// If possible, consider using [`Queue::write_buffer_with`] instead. That
+    /// method avoids an intermediate copy and is often able to transfer data
+    /// more efficiently than this one.
+    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
+        DynContext::queue_write_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &buffer.id,
+            buffer.data.as_ref(),
+            offset,
+            data,
+        )
+    }
+
+    /// Write to a buffer via a directly mapped staging buffer.
+    ///
+    /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy
+    /// of its contents into `buffer` at `offset`. The returned view
+    /// dereferences to a `size`-byte long `&mut [u8]`, in which you should
+    /// store the data you would like written to `buffer`.
+    ///
+    /// This method may perform transfers faster than [`Queue::write_buffer`],
+    /// because the returned [`QueueWriteBufferView`] is actually the staging
+    /// buffer for the write, mapped into the caller's address space. Writing
+    /// your data directly into this staging buffer avoids the temporary
+    /// CPU-side buffer needed by `write_buffer`.
+    ///
+    /// Reading from the returned view is slow, and will not yield the current
+    /// contents of `buffer`.
+    ///
+    /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the
+    /// transfer to the GPU immediately. The transfer begins only on the next
+    /// call to [`Queue::submit`] after the view is dropped. To get a set of
+    /// scheduled transfers started immediately, it's fine to call `submit` with
+    /// no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// This method fails if `size` is greater than the size of `buffer` starting at `offset`.
+    #[must_use]
+    pub fn write_buffer_with<'a>(
+        &'a self,
+        buffer: &'a Buffer,
+        offset: BufferAddress,
+        size: BufferSize,
+    ) -> Option<QueueWriteBufferView<'a>> {
+        profiling::scope!("Queue::write_buffer_with");
+        DynContext::queue_validate_write_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &buffer.id,
+            buffer.data.as_ref(),
+            offset,
+            size,
+        )?;
+        let staging_buffer = DynContext::queue_create_staging_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            size,
+        )?;
+        Some(QueueWriteBufferView {
+            queue: self,
+            buffer,
+            offset,
+            inner: staging_buffer,
+        })
+    }
+
+    /// Schedule a write of some data into a texture.
+    ///
+    /// * `data` contains the texels to be written, which must be in
+    ///   [the same format as the texture](TextureFormat).
+    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
+    ///   have to have tightly packed rows.
+    /// * `texture` specifies the texture to write into, and the location within the
+    ///   texture (coordinate offset, mip level) that will be overwritten.
+    /// * `size` is the size, in texels, of the region to be written.
+    ///
+    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
+    ///
+    /// This does *not* submit the transfer to the GPU immediately. Calls to
+    /// `write_texture` begin execution only on the next call to
+    /// [`Queue::submit`]. To get a set of scheduled transfers started
+    /// immediately, it's fine to call `submit` with no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// However, `data` will be immediately copied into staging memory, so the
+    /// caller may discard it any time after this call completes.
+    pub fn write_texture(
+        &self,
+        texture: ImageCopyTexture<'_>,
+        data: &[u8],
+        data_layout: ImageDataLayout,
+        size: Extent3d,
+    ) {
+        DynContext::queue_write_texture(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            texture,
+            data,
+            data_layout,
+            size,
+        )
+    }
+
+    /// Schedule a copy of data from `image` into `texture`.
+    #[cfg(any(webgpu, webgl))]
+    pub fn copy_external_image_to_texture(
+        &self,
+        source: &wgt::ImageCopyExternalImage,
+        dest: crate::ImageCopyTextureTagged<'_>,
+        size: Extent3d,
+    ) {
+        DynContext::queue_copy_external_image_to_texture(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            source,
+            dest,
+            size,
+        )
+    }
+
+    /// Submits a series of finished command buffers for execution.
+    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
+        &self,
+        command_buffers: I,
+    ) -> SubmissionIndex {
+        let mut command_buffers = command_buffers
+            .into_iter()
+            .map(|mut comb| (comb.id.take().unwrap(), comb.data.take().unwrap()));
+
+        let data = DynContext::queue_submit(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &mut command_buffers,
+        );
+
+        SubmissionIndex(data)
+    }
+
+    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
+    ///
+    /// Returns zero if timestamp queries are unsupported.
+    ///
+    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
+    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
+    pub fn get_timestamp_period(&self) -> f32 {
+        DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
+    /// being called implies that all mapped buffer callbacks which were registered before this call will
+    /// have been called.
+    ///
+    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
+    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    ///
+    /// The callback will be called on the thread that first calls the above functions after the gpu work
+    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
+    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
+    /// and used to set flags, send messages, etc.
+    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
+        DynContext::queue_on_submitted_work_done(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            Box::new(callback),
+        )
+    }
+}
diff --git a/wgpu/src/api/render_bundle.rs b/wgpu/src/api/render_bundle.rs
new file mode 100644
index 0000000000..e80da93e2d
--- /dev/null
+++ b/wgpu/src/api/render_bundle.rs
@@ -0,0 +1,50 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Pre-prepared reusable bundle of GPU operations.
+///
+/// It only supports a handful of render commands, but it makes them reusable. Executing a
+/// [`RenderBundle`] is often more efficient than issuing the underlying commands manually.
+///
+/// It can be created by use of a [`RenderBundleEncoder`], and executed onto a [`CommandEncoder`]
+/// using [`RenderPass::execute_bundles`].
+///
+/// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
+#[derive(Debug)]
+pub struct RenderBundle {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
+
+impl RenderBundle {
+    /// Returns a globally-unique identifier for this `RenderBundle`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for RenderBundle {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_bundle_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`RenderBundle`].
+///
+/// For use with [`RenderBundleEncoder::finish`].
+///
+/// Corresponds to [WebGPU `GPURenderBundleDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundledescriptor).
+pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(RenderBundleDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/render_bundle_encoder.rs b/wgpu/src/api/render_bundle_encoder.rs
new file mode 100644
index 0000000000..ae5829bee1
--- /dev/null
+++ b/wgpu/src/api/render_bundle_encoder.rs
@@ -0,0 +1,278 @@
+use std::{marker::PhantomData, num::NonZeroU32, ops::Range, sync::Arc};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Encodes a series of GPU operations into a reusable "render bundle".
+///
+/// It only supports a handful of render commands, but it makes them reusable.
+/// It can be created with [`Device::create_render_bundle_encoder`].
+/// It can be executed onto a [`CommandEncoder`] using [`RenderPass::execute_bundles`].
+///
+/// Executing a [`RenderBundle`] is often more efficient than issuing the underlying commands
+/// manually.
+///
+/// Corresponds to [WebGPU `GPURenderBundleEncoder`](
+/// https://gpuweb.github.io/gpuweb/#gpurenderbundleencoder).
+#[derive(Debug)]
+pub struct RenderBundleEncoder<'a> {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) parent: &'a Device,
+    /// This type should be !Send !Sync, because it represents an allocation on this thread's
+    /// command buffer.
+    pub(crate) _p: PhantomData<*const u8>,
+}
+static_assertions::assert_not_impl_any!(RenderBundleEncoder<'_>: Send, Sync);
+
+/// Describes a [`RenderBundleEncoder`].
+///
+/// For use with [`Device::create_render_bundle_encoder`].
+///
+/// Corresponds to [WebGPU `GPURenderBundleEncoderDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundleencoderdescriptor).
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub struct RenderBundleEncoderDescriptor<'a> {
+    /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The formats of the color attachments that this render bundle is capable to rendering to. This
+    /// must match the formats of the color attachments in the render pass this render bundle is executed in.
+    pub color_formats: &'a [Option<TextureFormat>],
+    /// Information about the depth attachment that this render bundle is capable to rendering to. This
+    /// must match the format of the depth attachments in the render pass this render bundle is executed in.
+    pub depth_stencil: Option<RenderBundleDepthStencil>,
+    /// Sample count this render bundle is capable of rendering to. This must match the pipelines and
+    /// the render passes it is used in.
+    pub sample_count: u32,
+    /// If this render bundle will rendering to multiple array layers in the attachments at the same time.
+    pub multiview: Option<NonZeroU32>,
+}
+static_assertions::assert_impl_all!(RenderBundleEncoderDescriptor<'_>: Send, Sync);
+
+impl<'a> RenderBundleEncoder<'a> {
+    /// Finishes recording and returns a [`RenderBundle`] that can be executed in other render passes.
+    pub fn finish(self, desc: &RenderBundleDescriptor<'_>) -> RenderBundle {
+        let (id, data) =
+            DynContext::render_bundle_encoder_finish(&*self.context, self.id, self.data, desc);
+        RenderBundle {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in the binding order.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &'a BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::render_bundle_encoder_set_bind_group(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        )
+    }
+
+    /// Sets the active render pipeline.
+    ///
+    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
+    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
+        DynContext::render_bundle_encoder_set_pipeline(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        )
+    }
+
+    /// Sets the active index buffer.
+    ///
+    /// Subsequent calls to [`draw_indexed`](RenderBundleEncoder::draw_indexed) on this [`RenderBundleEncoder`] will
+    /// use `buffer` as the source index buffer.
+    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
+        DynContext::render_bundle_encoder_set_index_buffer(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            index_format,
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Assign a vertex buffer to a slot.
+    ///
+    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
+    /// [`RenderBundleEncoder`] will use `buffer` as one of the source vertex buffers.
+    ///
+    /// The `slot` refers to the index of the matching descriptor in
+    /// [`VertexState::buffers`].
+    ///
+    /// [`draw`]: RenderBundleEncoder::draw
+    /// [`draw_indexed`]: RenderBundleEncoder::draw_indexed
+    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
+        DynContext::render_bundle_encoder_set_vertex_buffer(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            slot,
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Draws primitives from the active vertex buffer(s).
+    ///
+    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    /// Does not use an Index Buffer. If you need this see [`RenderBundleEncoder::draw_indexed`]
+    ///
+    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
+    ///
+    /// vertices: The range of vertices to draw.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for vertex_id in vertex_range {
+    ///         let vertex = vertex[vertex_id];
+    ///         vertex_shader(vertex, vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
+        DynContext::render_bundle_encoder_draw(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            vertices,
+            instances,
+        )
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffer(s).
+    ///
+    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`].
+    /// The active vertex buffer(s) can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
+    ///
+    /// indices: The range of indices to draw.
+    /// base_vertex: value added to each index value before indexing into the vertex buffers.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for index_index in index_range {
+    ///         let vertex_id = index_buffer[index_index];
+    ///         let adjusted_vertex_id = vertex_id + base_vertex;
+    ///         let vertex = vertex[adjusted_vertex_id];
+    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
+        DynContext::render_bundle_encoder_draw_indexed(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            indices,
+            base_vertex,
+            instances,
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    ///
+    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
+        DynContext::render_bundle_encoder_draw_indirect(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`.
+    ///
+    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    pub fn draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &'a Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::render_bundle_encoder_draw_indexed_indirect(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'a> RenderBundleEncoder<'a> {
+    /// Set push constant data.
+    ///
+    /// Offset is measured in bytes, but must be a multiple of [`PUSH_CONSTANT_ALIGNMENT`].
+    ///
+    /// Data size must be a multiple of 4 and must have an alignment of 4.
+    /// For example, with an offset of 4 and an array of `[u8; 8]`, that will write to the range
+    /// of 4..12.
+    ///
+    /// For each byte in the range of push constant data written, the union of the stages of all push constant
+    /// ranges that covers that byte must be exactly `stages`. There's no good way of explaining this simply,
+    /// so here are some examples:
+    ///
+    /// ```text
+    /// For the given ranges:
+    /// - 0..4 Vertex
+    /// - 4..8 Fragment
+    /// ```
+    ///
+    /// You would need to upload this in two set_push_constants calls. First for the `Vertex` range, second for the `Fragment` range.
+    ///
+    /// ```text
+    /// For the given ranges:
+    /// - 0..8  Vertex
+    /// - 4..12 Fragment
+    /// ```
+    ///
+    /// You would need to upload this in three set_push_constants calls. First for the `Vertex` only range 0..4, second
+    /// for the `Vertex | Fragment` range 4..8, third for the `Fragment` range 8..12.
+    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
+        DynContext::render_bundle_encoder_set_push_constants(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            stages,
+            offset,
+            data,
+        );
+    }
+}
diff --git a/wgpu/src/api/render_pass.rs b/wgpu/src/api/render_pass.rs
new file mode 100644
index 0000000000..bdb8ebe372
--- /dev/null
+++ b/wgpu/src/api/render_pass.rs
@@ -0,0 +1,817 @@
+use std::{marker::PhantomData, ops::Range, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+#[derive(Debug)]
+pub(crate) struct RenderPassInner {
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) context: Arc<C>,
+}
+
+impl Drop for RenderPassInner {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_pass_end(&mut self.id, self.data.as_mut());
+        }
+    }
+}
+
+/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
+///
+/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
+/// specifies the attachments (textures) that will be rendered to.
+///
+/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
+///
+/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
+///   rasterize something and execute shaders).
+/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
+///   for future drawing commands.
+///
+/// A render pass may contain any number of drawing commands, and before/between each command the
+/// render state may be updated however you wish; each drawing command will be executed using the
+/// render state that has been set when the `draw_*()` function is called.
+///
+/// Corresponds to [WebGPU `GPURenderPassEncoder`](
+/// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
+#[derive(Debug)]
+pub struct RenderPass<'encoder> {
+    /// The inner data of the render pass, separated out so it's easy to replace the lifetime with 'static if desired.
+    pub(crate) inner: RenderPassInner,
+
+    /// This lifetime is used to protect the [`CommandEncoder`] from being used
+    /// while the pass is alive.
+    pub(crate) encoder_guard: PhantomData<&'encoder ()>,
+}
+
+impl<'encoder> RenderPass<'encoder> {
+    /// Drops the lifetime relationship to the parent command encoder, making usage of
+    /// the encoder while this pass is recorded a run-time error instead.
+    ///
+    /// Attention: As long as the render pass has not been ended, any mutating operation on the parent
+    /// command encoder will cause a run-time error and invalidate it!
+    /// By default, the lifetime constraint prevents this, but it can be useful
+    /// to handle this at run time, such as when storing the pass and encoder in the same
+    /// data structure.
+    ///
+    /// This operation has no effect on pass recording.
+    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
+    /// regardless of the lifetime constraint or its absence.
+    pub fn forget_lifetime(self) -> RenderPass<'static> {
+        RenderPass {
+            inner: self.inner,
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when any `draw_*()` method is called must match the layout of
+    /// this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in binding order.
+    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
+    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    ///
+    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::render_pass_set_bind_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        )
+    }
+
+    /// Sets the active render pipeline.
+    ///
+    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
+    pub fn set_pipeline(&mut self, pipeline: &RenderPipeline) {
+        DynContext::render_pass_set_pipeline(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        )
+    }
+
+    /// Sets the blend color as used by some of the blending modes.
+    ///
+    /// Subsequent blending tests will test against this value.
+    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
+    /// (all components zero).
+    pub fn set_blend_constant(&mut self, color: Color) {
+        DynContext::render_pass_set_blend_constant(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            color,
+        )
+    }
+
+    /// Sets the active index buffer.
+    ///
+    /// Subsequent calls to [`draw_indexed`](RenderPass::draw_indexed) on this [`RenderPass`] will
+    /// use `buffer` as the source index buffer.
+    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'_>, index_format: IndexFormat) {
+        DynContext::render_pass_set_index_buffer(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            index_format,
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Assign a vertex buffer to a slot.
+    ///
+    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
+    /// [`RenderPass`] will use `buffer` as one of the source vertex buffers.
+    ///
+    /// The `slot` refers to the index of the matching descriptor in
+    /// [`VertexState::buffers`].
+    ///
+    /// [`draw`]: RenderPass::draw
+    /// [`draw_indexed`]: RenderPass::draw_indexed
+    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'_>) {
+        DynContext::render_pass_set_vertex_buffer(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            slot,
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Sets the scissor rectangle used during the rasterization stage.
+    /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
+    ///
+    /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
+    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
+    /// the render targets.
+    ///
+    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
+    /// but it does not affect the coordinate system, only which fragments are discarded.
+    pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
+        DynContext::render_pass_set_scissor_rect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            width,
+            height,
+        );
+    }
+
+    /// Sets the viewport used during the rasterization stage to linearly map
+    /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
+    ///
+    /// Subsequent draw calls will only draw within this region.
+    /// If this method has not been called, the viewport defaults to the entire bounds of the render
+    /// targets.
+    pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
+        DynContext::render_pass_set_viewport(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            w,
+            h,
+            min_depth,
+            max_depth,
+        );
+    }
+
+    /// Sets the stencil reference.
+    ///
+    /// Subsequent stencil tests will test against this value.
+    /// If this method has not been called, the stencil reference value defaults to `0`.
+    pub fn set_stencil_reference(&mut self, reference: u32) {
+        DynContext::render_pass_set_stencil_reference(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            reference,
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        DynContext::render_pass_insert_debug_marker(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        DynContext::render_pass_push_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        DynContext::render_pass_pop_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s).
+    ///
+    /// The active vertex buffer(s) can be set with [`RenderPass::set_vertex_buffer`].
+    /// Does not use an Index Buffer. If you need this see [`RenderPass::draw_indexed`]
+    ///
+    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
+    ///
+    /// vertices: The range of vertices to draw.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for vertex_id in vertex_range {
+    ///         let vertex = vertex[vertex_id];
+    ///         vertex_shader(vertex, vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
+        DynContext::render_pass_draw(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            vertices,
+            instances,
+        )
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`]
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
+    ///
+    /// indices: The range of indices to draw.
+    /// base_vertex: value added to each index value before indexing into the vertex buffers.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for index_index in index_range {
+    ///         let vertex_id = index_buffer[index_index];
+    ///         let adjusted_vertex_id = vertex_id + base_vertex;
+    ///         let vertex = vertex[adjusted_vertex_id];
+    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
+        DynContext::render_pass_draw_indexed(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            indices,
+            base_vertex,
+            instances,
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    ///
+    /// This is like calling [`RenderPass::draw`] but the contents of the call are specified in the `indirect_buffer`.
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    ///
+    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
+    /// these and issue an error.
+    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
+    ///   [`DrawIndirect::first_instance`](crate::util::DrawIndirectArgs::first_instance) will be ignored.
+    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
+    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
+    ///
+    /// See details on the individual flags for more information.
+    pub fn draw_indirect(&mut self, indirect_buffer: &Buffer, indirect_offset: BufferAddress) {
+        DynContext::render_pass_draw_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`.
+    ///
+    /// This is like calling [`RenderPass::draw_indexed`] but the contents of the call are specified in the `indirect_buffer`.
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    ///
+    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
+    /// these and issue an error.
+    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
+    ///   [`DrawIndexedIndirect::first_instance`](crate::util::DrawIndexedIndirectArgs::first_instance) will be ignored.
+    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
+    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
+    ///
+    /// See details on the individual flags for more information.
+    pub fn draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::render_pass_draw_indexed_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
+    /// that can be run together.
+    ///
+    /// Commands in the bundle do not inherit this render pass's current render state, and after the
+    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
+    pub fn execute_bundles<'a, I: IntoIterator<Item = &'a RenderBundle>>(
+        &mut self,
+        render_bundles: I,
+    ) {
+        let mut render_bundles = render_bundles
+            .into_iter()
+            .map(|rb| (&rb.id, rb.data.as_ref()));
+
+        DynContext::render_pass_execute_bundles(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &mut render_bundles,
+        )
+    }
+}
+
+/// [`Features::MULTI_DRAW_INDIRECT`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    /// `count` draw calls are issued.
+    ///
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            count,
+        );
+    }
+
+    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`. `count` draw calls are issued.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indexed_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            count,
+        );
+    }
+}
+
+/// [`Features::MULTI_DRAW_INDIRECT_COUNT`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    /// The count buffer is read to determine how many draws to issue.
+    ///
+    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
+    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
+    ///
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// The structure expected in `count_buffer` is the following:
+    ///
+    /// ```rust
+    /// #[repr(C)]
+    /// struct DrawIndirectCount {
+    ///     count: u32, // Number of draw calls to issue.
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indirect_count(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count_buffer: &Buffer,
+        count_offset: BufferAddress,
+        max_count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indirect_count(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            &count_buffer.id,
+            count_buffer.data.as_ref(),
+            count_offset,
+            max_count,
+        );
+    }
+
+    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`. The count buffer is read to determine how many draws to issue.
+    ///
+    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
+    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    ///
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// The structure expected in `count_buffer` is the following:
+    ///
+    /// ```rust
+    /// #[repr(C)]
+    /// struct DrawIndexedIndirectCount {
+    ///     count: u32, // Number of draw calls to issue.
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indexed_indirect_count(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count_buffer: &Buffer,
+        count_offset: BufferAddress,
+        max_count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indexed_indirect_count(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            &count_buffer.id,
+            count_buffer.data.as_ref(),
+            count_offset,
+            max_count,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Set push constant data for subsequent draw calls.
+    ///
+    /// Write the bytes in `data` at offset `offset` within push constant
+    /// storage, all of which are accessible by all the pipeline stages in
+    /// `stages`, and no others.  Both `offset` and the length of `data` must be
+    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
+    ///
+    /// For example, if `offset` is `4` and `data` is eight bytes long, this
+    /// call will write `data` to bytes `4..12` of push constant storage.
+    ///
+    /// # Stage matching
+    ///
+    /// Every byte in the affected range of push constant storage must be
+    /// accessible to exactly the same set of pipeline stages, which must match
+    /// `stages`. If there are two bytes of storage that are accessible by
+    /// different sets of pipeline stages - say, one is accessible by fragment
+    /// shaders, and the other is accessible by both fragment shaders and vertex
+    /// shaders - then no single `set_push_constants` call may affect both of
+    /// them; to write both, you must make multiple calls, each with the
+    /// appropriate `stages` value.
+    ///
+    /// Which pipeline stages may access a given byte is determined by the
+    /// pipeline's [`PushConstant`] global variable and (if it is a struct) its
+    /// members' offsets.
+    ///
+    /// For example, suppose you have twelve bytes of push constant storage,
+    /// where bytes `0..8` are accessed by the vertex shader, and bytes `4..12`
+    /// are accessed by the fragment shader. This means there are three byte
+    /// ranges each accessed by a different set of stages:
+    ///
+    /// - Bytes `0..4` are accessed only by the fragment shader.
+    ///
+    /// - Bytes `4..8` are accessed by both the fragment shader and the vertex shader.
+    ///
+    /// - Bytes `8..12` are accessed only by the vertex shader.
+    ///
+    /// To write all twelve bytes requires three `set_push_constants` calls, one
+    /// for each range, each passing the matching `stages` mask.
+    ///
+    /// [`PushConstant`]: https://docs.rs/naga/latest/naga/enum.StorageClass.html#variant.PushConstant
+    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
+        DynContext::render_pass_set_push_constants(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            stages,
+            offset,
+            data,
+        );
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Issue a timestamp command at this point in the queue. The
+    /// timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::render_pass_write_timestamp(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
+
+impl<'encoder> RenderPass<'encoder> {
+    /// Start a occlusion query on this render pass. It can be ended with
+    /// `end_occlusion_query`. Occlusion queries may not be nested.
+    pub fn begin_occlusion_query(&mut self, query_index: u32) {
+        DynContext::render_pass_begin_occlusion_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            query_index,
+        );
+    }
+
+    /// End the occlusion query on this render pass. It can be started with
+    /// `begin_occlusion_query`. Occlusion queries may not be nested.
+    pub fn end_occlusion_query(&mut self) {
+        DynContext::render_pass_end_occlusion_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Start a pipeline statistics query on this render pass. It can be ended with
+    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::render_pass_begin_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        );
+    }
+
+    /// End the pipeline statistics query on this render pass. It can be started with
+    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn end_pipeline_statistics_query(&mut self) {
+        DynContext::render_pass_end_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+/// Operation to perform to the output attachment at the start of a render pass.
+///
+/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop),
+/// plus the corresponding clearValue.
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum LoadOp<V> {
+    /// Loads the specified value for this attachment into the render pass.
+    ///
+    /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper
+    /// because it avoids loading data from main memory into tile-local memory.
+    ///
+    /// On other GPU hardware, there isn’t a significant difference.
+    ///
+    /// As a result, it is recommended to use "clear" rather than "load" in cases
+    /// where the initial value doesn’t matter
+    /// (e.g. the render target will be cleared using a skybox).
+    Clear(V),
+    /// Loads the existing value for this attachment into the render pass.
+    Load,
+}
+
+impl<V: Default> Default for LoadOp<V> {
+    fn default() -> Self {
+        Self::Clear(Default::default())
+    }
+}
+
+/// Operation to perform to the output attachment at the end of a render pass.
+///
+/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop).
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum StoreOp {
+    /// Stores the resulting value of the render pass for this attachment.
+    #[default]
+    Store,
+    /// Discards the resulting value of the render pass for this attachment.
+    ///
+    /// The attachment will be treated as uninitialized afterwards.
+    /// (If only either Depth or Stencil texture-aspects is set to `Discard`,
+    /// the respective other texture-aspect will be preserved.)
+    ///
+    /// This can be significantly faster on tile-based render hardware.
+    ///
+    /// Prefer this if the attachment is not read by subsequent passes.
+    Discard,
+}
+
+/// Pair of load and store operations for an attachment aspect.
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// separate `loadOp` and `storeOp` fields are used instead.
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Operations<V> {
+    /// How data should be read through this attachment.
+    pub load: LoadOp<V>,
+    /// Whether data will be written to through this attachment.
+    ///
+    /// Note that resolve textures (if specified) are always written to,
+    /// regardless of this setting.
+    pub store: StoreOp,
+}
+
+impl<V: Default> Default for Operations<V> {
+    #[inline]
+    fn default() -> Self {
+        Self {
+            load: LoadOp::<V>::default(),
+            store: StoreOp::default(),
+        }
+    }
+}
+
+/// Describes the timestamp writes of a render pass.
+///
+/// For use with [`RenderPassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct RenderPassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassTimestampWrites<'_>: Send, Sync);
+
+/// Describes a color attachment to a [`RenderPass`].
+///
+/// For use with [`RenderPassDescriptor`].
+///
+/// Corresponds to [WebGPU `GPURenderPassColorAttachment`](
+/// https://gpuweb.github.io/gpuweb/#color-attachments).
+#[derive(Clone, Debug)]
+pub struct RenderPassColorAttachment<'tex> {
+    /// The view to use as an attachment.
+    pub view: &'tex TextureView,
+    /// The view that will receive the resolved output if multisampling is used.
+    ///
+    /// If set, it is always written to, regardless of how [`Self::ops`] is configured.
+    pub resolve_target: Option<&'tex TextureView>,
+    /// What operations will be performed on this color attachment.
+    pub ops: Operations<Color>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassColorAttachment<'_>: Send, Sync);
+
+/// Describes a depth/stencil attachment to a [`RenderPass`].
+///
+/// For use with [`RenderPassDescriptor`].
+///
+/// Corresponds to [WebGPU `GPURenderPassDepthStencilAttachment`](
+/// https://gpuweb.github.io/gpuweb/#depth-stencil-attachments).
+#[derive(Clone, Debug)]
+pub struct RenderPassDepthStencilAttachment<'tex> {
+    /// The view to use as an attachment.
+    pub view: &'tex TextureView,
+    /// What operations will be performed on the depth part of the attachment.
+    pub depth_ops: Option<Operations<f32>>,
+    /// What operations will be performed on the stencil part of the attachment.
+    pub stencil_ops: Option<Operations<u32>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassDepthStencilAttachment<'_>: Send, Sync);
+
+/// Describes the attachments of a render pass.
+///
+/// For use with [`CommandEncoder::begin_render_pass`].
+///
+/// Corresponds to [WebGPU `GPURenderPassDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpassdescriptor).
+#[derive(Clone, Debug, Default)]
+pub struct RenderPassDescriptor<'a> {
+    /// Debug label of the render pass. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The color attachments of the render pass.
+    pub color_attachments: &'a [Option<RenderPassColorAttachment<'a>>],
+    /// The depth and stencil attachment of the render pass, if any.
+    pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'a>>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<RenderPassTimestampWrites<'a>>,
+    /// Defines where the occlusion query results will be stored for this pass.
+    pub occlusion_query_set: Option<&'a QuerySet>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/render_pipeline.rs b/wgpu/src/api/render_pipeline.rs
new file mode 100644
index 0000000000..2b81aa95a7
--- /dev/null
+++ b/wgpu/src/api/render_pipeline.rs
@@ -0,0 +1,141 @@
+use std::{num::NonZeroU32, sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a rendering (graphics) pipeline.
+///
+/// A `RenderPipeline` object represents a graphics pipeline and its stages, bindings, vertex
+/// buffers and targets. It can be created with [`Device::create_render_pipeline`].
+///
+/// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
+#[derive(Debug)]
+pub struct RenderPipeline {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
+
+impl Drop for RenderPipeline {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_pipeline_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+impl RenderPipeline {
+    /// Returns a globally-unique identifier for this `RenderPipeline`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get an object representing the bind group layout at a given index.
+    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
+        let context = Arc::clone(&self.context);
+        let (id, data) =
+            self.context
+                .render_pipeline_get_bind_group_layout(&self.id, self.data.as_ref(), index);
+        BindGroupLayout { context, id, data }
+    }
+}
+
+/// Describes how the vertex buffer is interpreted.
+///
+/// For use in [`VertexState`].
+///
+/// Corresponds to [WebGPU `GPUVertexBufferLayout`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexbufferlayout).
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct VertexBufferLayout<'a> {
+    /// The stride, in bytes, between elements of this buffer.
+    pub array_stride: BufferAddress,
+    /// How often this vertex buffer is "stepped" forward.
+    pub step_mode: VertexStepMode,
+    /// The list of attributes which comprise a single vertex.
+    pub attributes: &'a [VertexAttribute],
+}
+static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
+
+/// Describes the vertex processing in a render pipeline.
+///
+/// For use in [`RenderPipelineDescriptor`].
+///
+/// Corresponds to [WebGPU `GPUVertexState`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexstate).
+#[derive(Clone, Debug)]
+pub struct VertexState<'a> {
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader. There must be a function with this name
+    /// in the shader.
+    pub entry_point: &'a str,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The format of any vertex buffers used with this pipeline.
+    pub buffers: &'a [VertexBufferLayout<'a>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
+
+/// Describes the fragment processing in a render pipeline.
+///
+/// For use in [`RenderPipelineDescriptor`].
+///
+/// Corresponds to [WebGPU `GPUFragmentState`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpufragmentstate).
+#[derive(Clone, Debug)]
+pub struct FragmentState<'a> {
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader. There must be a function with this name
+    /// in the shader.
+    pub entry_point: &'a str,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The color state of the render targets.
+    pub targets: &'a [Option<ColorTargetState>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(FragmentState<'_>: Send, Sync);
+
+/// Describes a render (graphics) pipeline.
+///
+/// For use with [`Device::create_render_pipeline`].
+///
+/// Corresponds to [WebGPU `GPURenderPipelineDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpipelinedescriptor).
+#[derive(Clone, Debug)]
+pub struct RenderPipelineDescriptor<'a> {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<&'a PipelineLayout>,
+    /// The compiled vertex stage, its entry point, and the input buffers layout.
+    pub vertex: VertexState<'a>,
+    /// The properties of the pipeline at the primitive assembly and rasterization level.
+    pub primitive: PrimitiveState,
+    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
+    pub depth_stencil: Option<DepthStencilState>,
+    /// The multi-sampling properties of the pipeline.
+    pub multisample: MultisampleState,
+    /// The compiled fragment stage, its entry point, and the color targets.
+    pub fragment: Option<FragmentState<'a>>,
+    /// If the pipeline will be used with a multiview render pass, this indicates how many array
+    /// layers the attachments will have.
+    pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/sampler.rs b/wgpu/src/api/sampler.rs
new file mode 100644
index 0000000000..63267ded5d
--- /dev/null
+++ b/wgpu/src/api/sampler.rs
@@ -0,0 +1,94 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a sampler.
+///
+/// A `Sampler` object defines how a pipeline will sample from a [`TextureView`]. Samplers define
+/// image filters (including anisotropy) and address (wrapping) modes, among other things. See
+/// the documentation for [`SamplerDescriptor`] for more information.
+///
+/// It can be created with [`Device::create_sampler`].
+///
+/// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
+#[derive(Debug)]
+pub struct Sampler {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Sampler: Send, Sync);
+
+impl Sampler {
+    /// Returns a globally-unique identifier for this `Sampler`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for Sampler {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.sampler_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`Sampler`].
+///
+/// For use with [`Device::create_sampler`].
+///
+/// Corresponds to [WebGPU `GPUSamplerDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpusamplerdescriptor).
+#[derive(Clone, Debug, PartialEq)]
+pub struct SamplerDescriptor<'a> {
+    /// Debug label of the sampler. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// How to deal with out of bounds accesses in the u (i.e. x) direction
+    pub address_mode_u: AddressMode,
+    /// How to deal with out of bounds accesses in the v (i.e. y) direction
+    pub address_mode_v: AddressMode,
+    /// How to deal with out of bounds accesses in the w (i.e. z) direction
+    pub address_mode_w: AddressMode,
+    /// How to filter the texture when it needs to be magnified (made larger)
+    pub mag_filter: FilterMode,
+    /// How to filter the texture when it needs to be minified (made smaller)
+    pub min_filter: FilterMode,
+    /// How to filter between mip map levels
+    pub mipmap_filter: FilterMode,
+    /// Minimum level of detail (i.e. mip level) to use
+    pub lod_min_clamp: f32,
+    /// Maximum level of detail (i.e. mip level) to use
+    pub lod_max_clamp: f32,
+    /// If this is enabled, this is a comparison sampler using the given comparison function.
+    pub compare: Option<CompareFunction>,
+    /// Must be at least 1. If this is not 1, all filter modes must be linear.
+    pub anisotropy_clamp: u16,
+    /// Border color to use when address_mode is [`AddressMode::ClampToBorder`]
+    pub border_color: Option<SamplerBorderColor>,
+}
+static_assertions::assert_impl_all!(SamplerDescriptor<'_>: Send, Sync);
+
+impl Default for SamplerDescriptor<'_> {
+    fn default() -> Self {
+        Self {
+            label: None,
+            address_mode_u: Default::default(),
+            address_mode_v: Default::default(),
+            address_mode_w: Default::default(),
+            mag_filter: Default::default(),
+            min_filter: Default::default(),
+            mipmap_filter: Default::default(),
+            lod_min_clamp: 0.0,
+            lod_max_clamp: 32.0,
+            compare: None,
+            anisotropy_clamp: 1,
+            border_color: None,
+        }
+    }
+}
diff --git a/wgpu/src/api/shader_module.rs b/wgpu/src/api/shader_module.rs
new file mode 100644
index 0000000000..d81562e932
--- /dev/null
+++ b/wgpu/src/api/shader_module.rs
@@ -0,0 +1,249 @@
+use std::{borrow::Cow, future::Future, marker::PhantomData, sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a compiled shader module.
+///
+/// A `ShaderModule` represents a compiled shader module on the GPU. It can be created by passing
+/// source code to [`Device::create_shader_module`] or valid SPIR-V binary to
+/// [`Device::create_shader_module_spirv`]. Shader modules are used to define programmable stages
+/// of a pipeline.
+///
+/// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module).
+#[derive(Debug)]
+pub struct ShaderModule {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ShaderModule: Send, Sync);
+
+impl Drop for ShaderModule {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .shader_module_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+impl ShaderModule {
+    /// Returns a globally-unique identifier for this `ShaderModule`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get the compilation info for the shader module.
+    pub fn get_compilation_info(&self) -> impl Future<Output = CompilationInfo> + WasmNotSend {
+        self.context
+            .shader_get_compilation_info(&self.id, self.data.as_ref())
+    }
+}
+
+/// Compilation information for a shader module.
+///
+/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo).
+/// The source locations use bytes, and index a UTF-8 encoded string.
+#[derive(Debug, Clone)]
+pub struct CompilationInfo {
+    /// The messages from the shader compilation process.
+    pub messages: Vec<CompilationMessage>,
+}
+
+/// A single message from the shader compilation process.
+///
+/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage),
+/// except that the location uses UTF-8 for all positions.
+#[derive(Debug, Clone)]
+pub struct CompilationMessage {
+    /// The text of the message.
+    pub message: String,
+    /// The type of the message.
+    pub message_type: CompilationMessageType,
+    /// Where in the source code the message points at.
+    pub location: Option<SourceLocation>,
+}
+
+/// The type of a compilation message.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CompilationMessageType {
+    /// An error message.
+    Error,
+    /// A warning message.
+    Warning,
+    /// An informational message.
+    Info,
+}
+
+/// A human-readable representation for a span, tailored for text source.
+///
+/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from
+/// the WebGPU specification, except
+/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units.
+/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans.
+///
+/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct SourceLocation {
+    /// 1-based line number.
+    pub line_number: u32,
+    /// 1-based column in code units (in bytes) of the start of the span.
+    /// Remember to convert accordingly when displaying to the user.
+    pub line_position: u32,
+    /// 0-based Offset in code units (in bytes) of the start of the span.
+    pub offset: u32,
+    /// Length in code units (in bytes) of the span.
+    pub length: u32,
+}
+
+#[cfg(all(feature = "wgsl", wgpu_core))]
+impl From<crate::naga::error::ShaderError<crate::naga::front::wgsl::ParseError>>
+    for CompilationInfo
+{
+    fn from(value: crate::naga::error::ShaderError<crate::naga::front::wgsl::ParseError>) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: value.inner.location(&value.source).map(Into::into),
+            }],
+        }
+    }
+}
+#[cfg(feature = "glsl")]
+impl From<naga::error::ShaderError<naga::front::glsl::ParseErrors>> for CompilationInfo {
+    fn from(value: naga::error::ShaderError<naga::front::glsl::ParseErrors>) -> Self {
+        let messages = value
+            .inner
+            .errors
+            .into_iter()
+            .map(|err| CompilationMessage {
+                message: err.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: err.location(&value.source).map(Into::into),
+            })
+            .collect();
+        CompilationInfo { messages }
+    }
+}
+
+#[cfg(feature = "spirv")]
+impl From<naga::error::ShaderError<naga::front::spv::Error>> for CompilationInfo {
+    fn from(value: naga::error::ShaderError<naga::front::spv::Error>) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: None,
+            }],
+        }
+    }
+}
+
+#[cfg(any(wgpu_core, naga))]
+impl
+    From<
+        crate::naga::error::ShaderError<crate::naga::WithSpan<crate::naga::valid::ValidationError>>,
+    > for CompilationInfo
+{
+    fn from(
+        value: crate::naga::error::ShaderError<
+            crate::naga::WithSpan<crate::naga::valid::ValidationError>,
+        >,
+    ) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: value.inner.location(&value.source).map(Into::into),
+            }],
+        }
+    }
+}
+
+#[cfg(any(wgpu_core, naga))]
+impl From<crate::naga::SourceLocation> for SourceLocation {
+    fn from(value: crate::naga::SourceLocation) -> Self {
+        SourceLocation {
+            length: value.length,
+            offset: value.offset,
+            line_number: value.line_number,
+            line_position: value.line_position,
+        }
+    }
+}
+
+/// Source of a shader module.
+///
+/// The source will be parsed and validated.
+///
+/// Any necessary shader translation (e.g. from WGSL to SPIR-V or vice versa)
+/// will be done internally by wgpu.
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// only WGSL source code strings are accepted.
+#[cfg_attr(feature = "naga-ir", allow(clippy::large_enum_variant))]
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub enum ShaderSource<'a> {
+    /// SPIR-V module represented as a slice of words.
+    ///
+    /// See also: [`util::make_spirv`], [`include_spirv`]
+    #[cfg(feature = "spirv")]
+    SpirV(Cow<'a, [u32]>),
+    /// GLSL module as a string slice.
+    ///
+    /// Note: GLSL is not yet fully supported and must be a specific ShaderStage.
+    #[cfg(feature = "glsl")]
+    Glsl {
+        /// The source code of the shader.
+        shader: Cow<'a, str>,
+        /// The shader stage that the shader targets. For example, `naga::ShaderStage::Vertex`
+        stage: naga::ShaderStage,
+        /// Defines to unlock configured shader features.
+        defines: naga::FastHashMap<String, String>,
+    },
+    /// WGSL module as a string slice.
+    #[cfg(feature = "wgsl")]
+    Wgsl(Cow<'a, str>),
+    /// Naga module.
+    #[cfg(feature = "naga-ir")]
+    Naga(Cow<'static, naga::Module>),
+    /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
+    /// could be the last one active.
+    #[doc(hidden)]
+    Dummy(PhantomData<&'a ()>),
+}
+static_assertions::assert_impl_all!(ShaderSource<'_>: Send, Sync);
+
+/// Descriptor for use with [`Device::create_shader_module`].
+///
+/// Corresponds to [WebGPU `GPUShaderModuleDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpushadermoduledescriptor).
+#[derive(Clone, Debug)]
+pub struct ShaderModuleDescriptor<'a> {
+    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Source code for the shader.
+    pub source: ShaderSource<'a>,
+}
+static_assertions::assert_impl_all!(ShaderModuleDescriptor<'_>: Send, Sync);
+
+/// Descriptor for a shader module given by SPIR-V binary, for use with
+/// [`Device::create_shader_module_spirv`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// only WGSL source code strings are accepted.
+#[derive(Debug)]
+pub struct ShaderModuleDescriptorSpirV<'a> {
+    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Binary SPIR-V data, in 4-byte words.
+    pub source: Cow<'a, [u32]>,
+}
+static_assertions::assert_impl_all!(ShaderModuleDescriptorSpirV<'_>: Send, Sync);
diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs
new file mode 100644
index 0000000000..9c7e056aaf
--- /dev/null
+++ b/wgpu/src/api/surface.rs
@@ -0,0 +1,425 @@
+use std::{error, fmt, sync::Arc, thread};
+
+use parking_lot::Mutex;
+use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Describes a [`Surface`].
+///
+/// For use with [`Surface::configure`].
+///
+/// Corresponds to [WebGPU `GPUCanvasConfiguration`](
+/// https://gpuweb.github.io/gpuweb/#canvas-configuration).
+pub type SurfaceConfiguration = wgt::SurfaceConfiguration<Vec<TextureFormat>>;
+static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync);
+
+/// Handle to a presentable surface.
+///
+/// A `Surface` represents a platform-specific surface (e.g. a window) onto which rendered images may
+/// be presented. A `Surface` may be created with the function [`Instance::create_surface`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context)
+/// serves a similar role.
+pub struct Surface<'window> {
+    pub(crate) context: Arc<C>,
+
+    /// Optionally, keep the source of the handle used for the surface alive.
+    ///
+    /// This is useful for platforms where the surface is created from a window and the surface
+    /// would become invalid when the window is dropped.
+    pub(crate) _handle_source: Option<Box<dyn WindowHandle + 'window>>,
+
+    /// Wgpu-core surface id.
+    pub(crate) id: ObjectId,
+
+    /// Additional surface data returned by [`DynContext::instance_create_surface`].
+    pub(crate) surface_data: Box<Data>,
+
+    // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`.
+    // It is required to set the attributes of the `SurfaceTexture` in the
+    // `Surface::get_current_texture` method.
+    // Because the `Surface::configure` method operates on an immutable reference this type has to
+    // be wrapped in a mutex and since the configuration is only supplied after the surface has
+    // been created is is additionally wrapped in an option.
+    pub(crate) config: Mutex<Option<SurfaceConfiguration>>,
+}
+
+impl Surface<'_> {
+    /// Returns a globally-unique identifier for this `Surface`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Surface<'_>> {
+        Id::new(self.id)
+    }
+
+    /// Returns the capabilities of the surface when used with the given adapter.
+    ///
+    /// Returns specified values (see [`SurfaceCapabilities`]) if surface is incompatible with the adapter.
+    pub fn get_capabilities(&self, adapter: &Adapter) -> SurfaceCapabilities {
+        DynContext::surface_get_capabilities(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+            &adapter.id,
+            adapter.data.as_ref(),
+        )
+    }
+
+    /// Return a default `SurfaceConfiguration` from width and height to use for the [`Surface`] with this adapter.
+    ///
+    /// Returns None if the surface isn't supported by this adapter
+    pub fn get_default_config(
+        &self,
+        adapter: &Adapter,
+        width: u32,
+        height: u32,
+    ) -> Option<SurfaceConfiguration> {
+        let caps = self.get_capabilities(adapter);
+        Some(SurfaceConfiguration {
+            usage: wgt::TextureUsages::RENDER_ATTACHMENT,
+            format: *caps.formats.first()?,
+            width,
+            height,
+            desired_maximum_frame_latency: 2,
+            present_mode: *caps.present_modes.first()?,
+            alpha_mode: wgt::CompositeAlphaMode::Auto,
+            view_formats: vec![],
+        })
+    }
+
+    /// Initializes [`Surface`] for presentation.
+    ///
+    /// # Panics
+    ///
+    /// - A old [`SurfaceTexture`] is still alive referencing an old surface.
+    /// - Texture format requested is unsupported on the surface.
+    /// - `config.width` or `config.height` is zero.
+    pub fn configure(&self, device: &Device, config: &SurfaceConfiguration) {
+        DynContext::surface_configure(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+            &device.id,
+            device.data.as_ref(),
+            config,
+        );
+
+        let mut conf = self.config.lock();
+        *conf = Some(config.clone());
+    }
+
+    /// Returns the next texture to be presented by the swapchain for drawing.
+    ///
+    /// In order to present the [`SurfaceTexture`] returned by this method,
+    /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
+    /// Then [`SurfaceTexture::present`] needs to be called.
+    ///
+    /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated,
+    /// recreating the swapchain will panic.
+    pub fn get_current_texture(&self) -> Result<SurfaceTexture, SurfaceError> {
+        let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+        );
+
+        let suboptimal = match status {
+            SurfaceStatus::Good => false,
+            SurfaceStatus::Suboptimal => true,
+            SurfaceStatus::Timeout => return Err(SurfaceError::Timeout),
+            SurfaceStatus::Outdated => return Err(SurfaceError::Outdated),
+            SurfaceStatus::Lost => return Err(SurfaceError::Lost),
+        };
+
+        let guard = self.config.lock();
+        let config = guard
+            .as_ref()
+            .expect("This surface has not been configured yet.");
+
+        let descriptor = TextureDescriptor {
+            label: None,
+            size: Extent3d {
+                width: config.width,
+                height: config.height,
+                depth_or_array_layers: 1,
+            },
+            format: config.format,
+            usage: config.usage,
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: TextureDimension::D2,
+            view_formats: &[],
+        };
+
+        texture_id
+            .zip(texture_data)
+            .map(|(id, data)| SurfaceTexture {
+                texture: Texture {
+                    context: Arc::clone(&self.context),
+                    id,
+                    data,
+                    owned: false,
+                    descriptor,
+                },
+                suboptimal,
+                presented: false,
+                detail,
+            })
+            .ok_or(SurfaceError::Lost)
+    }
+
+    /// Returns the inner hal Surface using a callback. The hal surface will be `None` if the
+    /// backend type argument does not match with this wgpu Surface
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Surface must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>(
+        &mut self,
+        hal_surface_callback: F,
+    ) -> Option<R> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.surface_as_hal::<A, F, R>(
+                    self.surface_data.downcast_ref().unwrap(),
+                    hal_surface_callback,
+                )
+            })
+    }
+}
+
+// This custom implementation is required because [`Surface::_surface`] doesn't
+// require [`Debug`](fmt::Debug), which we should not require from the user.
+impl<'window> fmt::Debug for Surface<'window> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Surface")
+            .field("context", &self.context)
+            .field(
+                "_handle_source",
+                &if self._handle_source.is_some() {
+                    "Some"
+                } else {
+                    "None"
+                },
+            )
+            .field("id", &self.id)
+            .field("data", &self.surface_data)
+            .field("config", &self.config)
+            .finish()
+    }
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Surface<'_>: Send, Sync);
+
+impl Drop for Surface<'_> {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .surface_drop(&self.id, self.surface_data.as_ref())
+        }
+    }
+}
+
+/// Super trait for window handles as used in [`SurfaceTarget`].
+pub trait WindowHandle: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
+
+impl<T> WindowHandle for T where T: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
+
+/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with safe surface creation.
+///
+/// This is either a window or an actual web canvas depending on the platform and
+/// enabled features.
+/// Refer to the individual variants for more information.
+///
+/// See also [`SurfaceTargetUnsafe`] for unsafe variants.
+#[non_exhaustive]
+pub enum SurfaceTarget<'window> {
+    /// Window handle producer.
+    ///
+    /// If the specified display and window handle are not supported by any of the backends, then the surface
+    /// will not be supported by any adapters.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation returns an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    ///
+    /// # Panics
+    ///
+    /// - On macOS/Metal: will panic if not called on the main thread.
+    /// - On web: will panic if the `raw_window_handle` does not properly refer to a
+    ///   canvas element.
+    Window(Box<dyn WindowHandle + 'window>),
+
+    /// Surface from a `web_sys::HtmlCanvasElement`.
+    ///
+    /// The `canvas` argument must be a valid `<canvas>` element to
+    /// create a surface upon.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    #[cfg(any(webgpu, webgl))]
+    Canvas(web_sys::HtmlCanvasElement),
+
+    /// Surface from a `web_sys::OffscreenCanvas`.
+    ///
+    /// The `canvas` argument must be a valid `OffscreenCanvas` object
+    /// to create a surface upon.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    #[cfg(any(webgpu, webgl))]
+    OffscreenCanvas(web_sys::OffscreenCanvas),
+}
+
+impl<'a, T> From<T> for SurfaceTarget<'a>
+where
+    T: WindowHandle + 'a,
+{
+    fn from(window: T) -> Self {
+        Self::Window(Box::new(window))
+    }
+}
+
+/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with unsafe surface creation.
+///
+/// This is either a window or an actual web canvas depending on the platform and
+/// enabled features.
+/// Refer to the individual variants for more information.
+///
+/// See also [`SurfaceTarget`] for safe variants.
+#[non_exhaustive]
+pub enum SurfaceTargetUnsafe {
+    /// Raw window & display handle.
+    ///
+    /// If the specified display and window handle are not supported by any of the backends, then the surface
+    /// will not be supported by any adapters.
+    ///
+    /// # Safety
+    ///
+    /// - `raw_window_handle` & `raw_display_handle` must be valid objects to create a surface upon.
+    /// - `raw_window_handle` & `raw_display_handle` must remain valid until after the returned
+    ///    [`Surface`] is  dropped.
+    RawHandle {
+        /// Raw display handle, underlying display must outlive the surface created from this.
+        raw_display_handle: raw_window_handle::RawDisplayHandle,
+
+        /// Raw display handle, underlying window must outlive the surface created from this.
+        raw_window_handle: raw_window_handle::RawWindowHandle,
+    },
+
+    /// Surface from `CoreAnimationLayer`.
+    ///
+    /// # Safety
+    ///
+    /// - layer must be a valid object to create a surface upon.
+    #[cfg(metal)]
+    CoreAnimationLayer(*mut std::ffi::c_void),
+
+    /// Surface from `IDCompositionVisual`.
+    ///
+    /// # Safety
+    ///
+    /// - visual must be a valid IDCompositionVisual to create a surface upon.
+    #[cfg(dx12)]
+    CompositionVisual(*mut std::ffi::c_void),
+
+    /// Surface from DX12 `SurfaceHandle`.
+    ///
+    /// # Safety
+    ///
+    /// - surface_handle must be a valid SurfaceHandle to create a surface upon.
+    #[cfg(dx12)]
+    SurfaceHandle(*mut std::ffi::c_void),
+
+    /// Surface from DX12 `SwapChainPanel`.
+    ///
+    /// # Safety
+    ///
+    /// - visual must be a valid SwapChainPanel to create a surface upon.
+    #[cfg(dx12)]
+    SwapChainPanel(*mut std::ffi::c_void),
+}
+
+impl SurfaceTargetUnsafe {
+    /// Creates a [`SurfaceTargetUnsafe::RawHandle`] from a window.
+    ///
+    /// # Safety
+    ///
+    /// - `window` must outlive the resulting surface target
+    ///   (and subsequently the surface created for this target).
+    pub unsafe fn from_window<T>(window: &T) -> Result<Self, raw_window_handle::HandleError>
+    where
+        T: HasDisplayHandle + HasWindowHandle,
+    {
+        Ok(Self::RawHandle {
+            raw_display_handle: window.display_handle()?.as_raw(),
+            raw_window_handle: window.window_handle()?.as_raw(),
+        })
+    }
+}
+
+/// [`Instance::create_surface()`] or a related function failed.
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub struct CreateSurfaceError {
+    pub(crate) inner: CreateSurfaceErrorKind,
+}
+#[derive(Clone, Debug)]
+pub(crate) enum CreateSurfaceErrorKind {
+    /// Error from [`wgpu_hal`].
+    #[cfg(wgpu_core)]
+    Hal(wgc::instance::CreateSurfaceError),
+
+    /// Error from WebGPU surface creation.
+    #[allow(dead_code)] // may be unused depending on target and features
+    Web(String),
+
+    /// Error when trying to get a [`DisplayHandle`] or a [`WindowHandle`] from
+    /// `raw_window_handle`.
+    RawHandle(raw_window_handle::HandleError),
+}
+static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
+
+impl fmt::Display for CreateSurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
+            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
+            CreateSurfaceErrorKind::RawHandle(e) => e.fmt(f),
+        }
+    }
+}
+
+impl error::Error for CreateSurfaceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            CreateSurfaceErrorKind::Hal(e) => e.source(),
+            CreateSurfaceErrorKind::Web(_) => None,
+            CreateSurfaceErrorKind::RawHandle(e) => e.source(),
+        }
+    }
+}
+
+#[cfg(wgpu_core)]
+impl From<wgc::instance::CreateSurfaceError> for CreateSurfaceError {
+    fn from(e: wgc::instance::CreateSurfaceError) -> Self {
+        Self {
+            inner: CreateSurfaceErrorKind::Hal(e),
+        }
+    }
+}
diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs
new file mode 100644
index 0000000000..9431683528
--- /dev/null
+++ b/wgpu/src/api/surface_texture.rs
@@ -0,0 +1,86 @@
+use std::{error, fmt, thread};
+
+use crate::context::DynContext;
+use crate::*;
+
+/// Surface texture that can be rendered to.
+/// Result of a successful call to [`Surface::get_current_texture`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
+/// a texture without any additional information.
+#[derive(Debug)]
+pub struct SurfaceTexture {
+    /// Accessible view of the frame.
+    pub texture: Texture,
+    /// `true` if the acquired buffer can still be used for rendering,
+    /// but should be recreated for maximum performance.
+    pub suboptimal: bool,
+    pub(crate) presented: bool,
+    pub(crate) detail: Box<dyn AnyWasmNotSendSync>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
+
+impl SurfaceTexture {
+    /// Schedule this texture to be presented on the owning surface.
+    ///
+    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
+    ///
+    /// # Platform dependent behavior
+    ///
+    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
+    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
+    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
+    pub fn present(mut self) {
+        self.presented = true;
+        DynContext::surface_present(
+            &*self.texture.context,
+            &self.texture.id,
+            // This call to as_ref is essential because we want the DynContext implementation to see the inner
+            // value of the Box (T::SurfaceOutputDetail), not the Box itself.
+            self.detail.as_ref(),
+        );
+    }
+}
+
+impl Drop for SurfaceTexture {
+    fn drop(&mut self) {
+        if !self.presented && !thread::panicking() {
+            DynContext::surface_texture_discard(
+                &*self.texture.context,
+                &self.texture.id,
+                // This call to as_ref is essential because we want the DynContext implementation to see the inner
+                // value of the Box (T::SurfaceOutputDetail), not the Box itself.
+                self.detail.as_ref(),
+            );
+        }
+    }
+}
+
+/// Result of an unsuccessful call to [`Surface::get_current_texture`].
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum SurfaceError {
+    /// A timeout was encountered while trying to acquire the next frame.
+    Timeout,
+    /// The underlying surface has changed, and therefore the swap chain must be updated.
+    Outdated,
+    /// The swap chain has been lost and needs to be recreated.
+    Lost,
+    /// There is no more memory left to allocate a new frame.
+    OutOfMemory,
+}
+static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
+
+impl fmt::Display for SurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", match self {
+            Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
+            Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
+            Self::Lost =>  "The swap chain has been lost and needs to be recreated",
+            Self::OutOfMemory => "There is no more memory left to allocate a new frame",
+        })
+    }
+}
+
+impl error::Error for SurfaceError {}
diff --git a/wgpu/src/api/texture.rs b/wgpu/src/api/texture.rs
new file mode 100644
index 0000000000..98295b9396
--- /dev/null
+++ b/wgpu/src/api/texture.rs
@@ -0,0 +1,160 @@
+use std::{sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a texture on the GPU.
+///
+/// It can be created with [`Device::create_texture`].
+///
+/// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
+#[derive(Debug)]
+pub struct Texture {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) owned: bool,
+    pub(crate) descriptor: TextureDescriptor<'static>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Texture: Send, Sync);
+
+impl Texture {
+    /// Returns a globally-unique identifier for this `Texture`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
+    /// backend type argument does not match with this wgpu Texture
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Texture must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Texture>) -> R, R>(
+        &self,
+        hal_texture_callback: F,
+    ) -> R {
+        let texture = self.data.as_ref().downcast_ref().unwrap();
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.texture_as_hal::<A, F, R>(texture, hal_texture_callback) }
+        } else {
+            hal_texture_callback(None)
+        }
+    }
+
+    /// Creates a view of this texture.
+    pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
+        let (id, data) =
+            DynContext::texture_create_view(&*self.context, &self.id, self.data.as_ref(), desc);
+        TextureView {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Destroy the associated native resources as soon as possible.
+    pub fn destroy(&self) {
+        DynContext::texture_destroy(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Make an `ImageCopyTexture` representing the whole texture.
+    pub fn as_image_copy(&self) -> ImageCopyTexture<'_> {
+        ImageCopyTexture {
+            texture: self,
+            mip_level: 0,
+            origin: Origin3d::ZERO,
+            aspect: TextureAspect::All,
+        }
+    }
+
+    /// Returns the size of this `Texture`.
+    ///
+    /// This is always equal to the `size` that was specified when creating the texture.
+    pub fn size(&self) -> Extent3d {
+        self.descriptor.size
+    }
+
+    /// Returns the width of this `Texture`.
+    ///
+    /// This is always equal to the `size.width` that was specified when creating the texture.
+    pub fn width(&self) -> u32 {
+        self.descriptor.size.width
+    }
+
+    /// Returns the height of this `Texture`.
+    ///
+    /// This is always equal to the `size.height` that was specified when creating the texture.
+    pub fn height(&self) -> u32 {
+        self.descriptor.size.height
+    }
+
+    /// Returns the depth or layer count of this `Texture`.
+    ///
+    /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
+    pub fn depth_or_array_layers(&self) -> u32 {
+        self.descriptor.size.depth_or_array_layers
+    }
+
+    /// Returns the mip_level_count of this `Texture`.
+    ///
+    /// This is always equal to the `mip_level_count` that was specified when creating the texture.
+    pub fn mip_level_count(&self) -> u32 {
+        self.descriptor.mip_level_count
+    }
+
+    /// Returns the sample_count of this `Texture`.
+    ///
+    /// This is always equal to the `sample_count` that was specified when creating the texture.
+    pub fn sample_count(&self) -> u32 {
+        self.descriptor.sample_count
+    }
+
+    /// Returns the dimension of this `Texture`.
+    ///
+    /// This is always equal to the `dimension` that was specified when creating the texture.
+    pub fn dimension(&self) -> TextureDimension {
+        self.descriptor.dimension
+    }
+
+    /// Returns the format of this `Texture`.
+    ///
+    /// This is always equal to the `format` that was specified when creating the texture.
+    pub fn format(&self) -> TextureFormat {
+        self.descriptor.format
+    }
+
+    /// Returns the allowed usages of this `Texture`.
+    ///
+    /// This is always equal to the `usage` that was specified when creating the texture.
+    pub fn usage(&self) -> TextureUsages {
+        self.descriptor.usage
+    }
+}
+
+impl Drop for Texture {
+    fn drop(&mut self) {
+        if self.owned && !thread::panicking() {
+            self.context.texture_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`Texture`].
+///
+/// For use with [`Device::create_texture`].
+///
+/// Corresponds to [WebGPU `GPUTextureDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gputexturedescriptor).
+pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, &'a [TextureFormat]>;
+static_assertions::assert_impl_all!(TextureDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/texture_view.rs b/wgpu/src/api/texture_view.rs
new file mode 100644
index 0000000000..b6e60a3c60
--- /dev/null
+++ b/wgpu/src/api/texture_view.rs
@@ -0,0 +1,98 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a texture view.
+///
+/// A `TextureView` object describes a texture and associated metadata needed by a
+/// [`RenderPipeline`] or [`BindGroup`].
+///
+/// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
+#[derive(Debug)]
+pub struct TextureView {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(TextureView: Send, Sync);
+
+impl TextureView {
+    /// Returns a globally-unique identifier for this `TextureView`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Returns the inner hal TextureView using a callback. The hal texture will be `None` if the
+    /// backend type argument does not match with this wgpu Texture
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal TextureView must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::TextureView>) -> R, R>(
+        &self,
+        hal_texture_view_callback: F,
+    ) -> R {
+        use wgc::id::TextureViewId;
+
+        let texture_view_id = TextureViewId::from(self.id);
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe {
+                ctx.texture_view_as_hal::<A, F, R>(texture_view_id, hal_texture_view_callback)
+            }
+        } else {
+            hal_texture_view_callback(None)
+        }
+    }
+}
+
+impl Drop for TextureView {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.texture_view_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`TextureView`].
+///
+/// For use with [`Texture::create_view`].
+///
+/// Corresponds to [WebGPU `GPUTextureViewDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gputextureviewdescriptor).
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct TextureViewDescriptor<'a> {
+    /// Debug label of the texture view. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Format of the texture view. Either must be the same as the texture format or in the list
+    /// of `view_formats` in the texture's descriptor.
+    pub format: Option<TextureFormat>,
+    /// The dimension of the texture view. For 1D textures, this must be `D1`. For 2D textures it must be one of
+    /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `D3`
+    pub dimension: Option<TextureViewDimension>,
+    /// Aspect of the texture. Color textures must be [`TextureAspect::All`].
+    pub aspect: TextureAspect,
+    /// Base mip level.
+    pub base_mip_level: u32,
+    /// Mip level count.
+    /// If `Some(count)`, `base_mip_level + count` must be less or equal to underlying texture mip count.
+    /// If `None`, considered to include the rest of the mipmap levels, but at least 1 in total.
+    pub mip_level_count: Option<u32>,
+    /// Base array layer.
+    pub base_array_layer: u32,
+    /// Layer count.
+    /// If `Some(count)`, `base_array_layer + count` must be less or equal to the underlying array count.
+    /// If `None`, considered to include the rest of the array layers, but at least 1 in total.
+    pub array_layer_count: Option<u32>,
+}
+static_assertions::assert_impl_all!(TextureViewDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index fb3e611c94..e8c33ab583 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -18,33 +18,42 @@
 #![doc(html_logo_url = "https://raw.githubusercontent.com/gfx-rs/wgpu/trunk/logo.png")]
 #![warn(missing_docs, rust_2018_idioms, unsafe_op_in_unsafe_fn)]
 
+//
+//
+// Modules
+//
+//
+
+mod api;
 mod backend;
 mod context;
-pub mod util;
-#[macro_use]
 mod macros;
+mod send_sync;
+pub mod util;
 
-use std::{
-    any::Any,
-    borrow::Cow,
-    cmp::Ordering,
-    collections::HashMap,
-    error, fmt,
-    future::Future,
-    marker::PhantomData,
-    num::{NonZeroU32, NonZeroU64},
-    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
-    sync::Arc,
-    thread,
-};
+//
+//
+// Private re-exports
+//
+//
 
-#[allow(unused_imports)] // Unused if all backends are disabled.
+#[allow(unused_imports)] // WebGPU needs this
 use context::Context;
+use send_sync::*;
+
+type C = dyn context::DynContext;
+#[cfg(send_sync)]
+type Data = dyn std::any::Any + Send + Sync;
+#[cfg(not(send_sync))]
+type Data = dyn std::any::Any;
 
-use context::{DeviceRequest, DynContext, ObjectId};
-use parking_lot::Mutex;
+//
+//
+// Public re-exports
+//
+//
 
-use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
+pub use api::*;
 pub use wgt::{
     AdapterInfo, AddressMode, AstcBlock, AstcChannel, Backend, Backends, BindGroupLayoutEntry,
     BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress,
@@ -65,6 +74,16 @@ pub use wgt::{
     MAP_ALIGNMENT, PUSH_CONSTANT_ALIGNMENT, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES,
     QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT,
 };
+// wasm-only types, we try to keep as many types non-platform
+// specific, but these need to depend on web-sys.
+#[cfg(any(webgpu, webgl))]
+pub use wgt::{ExternalImageSource, ImageCopyExternalImage};
+
+//
+//
+// Re-exports of dependencies
+//
+//
 
 /// Re-export of our `wgpu-core` dependency.
 ///
@@ -98,6037 +117,3 @@ pub use raw_window_handle as rwh;
 ///
 #[cfg(any(webgl, webgpu))]
 pub use web_sys;
-
-// wasm-only types, we try to keep as many types non-platform
-// specific, but these need to depend on web-sys.
-#[cfg(any(webgpu, webgl))]
-pub use wgt::{ExternalImageSource, ImageCopyExternalImage};
-
-/// Filter for error scopes.
-#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
-pub enum ErrorFilter {
-    /// Catch only out-of-memory errors.
-    OutOfMemory,
-    /// Catch only validation errors.
-    Validation,
-    /// Catch only internal errors.
-    Internal,
-}
-static_assertions::assert_impl_all!(ErrorFilter: Send, Sync);
-
-type C = dyn DynContext;
-#[cfg(send_sync)]
-type Data = dyn Any + Send + Sync;
-#[cfg(not(send_sync))]
-type Data = dyn Any;
-
-/// Context for all other wgpu objects. Instance of wgpu.
-///
-/// This is the first thing you create when using wgpu.
-/// Its primary use is to create [`Adapter`]s and [`Surface`]s.
-///
-/// Does not have to be kept alive.
-///
-/// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
-#[derive(Debug)]
-pub struct Instance {
-    context: Arc<C>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Instance: Send, Sync);
-
-/// Handle to a physical graphics and/or compute device.
-///
-/// Adapters can be used to open a connection to the corresponding [`Device`]
-/// on the host system by using [`Adapter::request_device`].
-///
-/// Does not have to be kept alive.
-///
-/// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
-#[derive(Debug)]
-pub struct Adapter {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Adapter: Send, Sync);
-
-impl Drop for Adapter {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.adapter_drop(&self.id, self.data.as_ref())
-        }
-    }
-}
-
-/// Open connection to a graphics and/or compute device.
-///
-/// Responsible for the creation of most rendering and compute resources.
-/// These are then used in commands, which are submitted to a [`Queue`].
-///
-/// A device may be requested from an adapter with [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
-#[derive(Debug)]
-pub struct Device {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Device: Send, Sync);
-
-/// Identifier for a particular call to [`Queue::submit`]. Can be used
-/// as part of an argument to [`Device::poll`] to block for a particular
-/// submission to finish.
-///
-/// This type is unique to the Rust API of `wgpu`.
-/// There is no analogue in the WebGPU specification.
-#[derive(Debug, Clone)]
-pub struct SubmissionIndex(Arc<crate::Data>);
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
-
-/// The mapped portion of a buffer, if any, and its outstanding views.
-///
-/// This ensures that views fall within the mapped range and don't overlap, and
-/// also takes care of turning `Option<BufferSize>` sizes into actual buffer
-/// offsets.
-#[derive(Debug)]
-struct MapContext {
-    /// The overall size of the buffer.
-    ///
-    /// This is just a convenient copy of [`Buffer::size`].
-    total_size: BufferAddress,
-
-    /// The range of the buffer that is mapped.
-    ///
-    /// This is `0..0` if the buffer is not mapped. This becomes non-empty when
-    /// the buffer is mapped at creation time, and when you call `map_async` on
-    /// some [`BufferSlice`] (so technically, it indicates the portion that is
-    /// *or has been requested to be* mapped.)
-    ///
-    /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range.
-    initial_range: Range<BufferAddress>,
-
-    /// The ranges covered by all outstanding [`BufferView`]s and
-    /// [`BufferViewMut`]s. These are non-overlapping, and are all contained
-    /// within `initial_range`.
-    sub_ranges: Vec<Range<BufferAddress>>,
-}
-
-impl MapContext {
-    fn new(total_size: BufferAddress) -> Self {
-        Self {
-            total_size,
-            initial_range: 0..0,
-            sub_ranges: Vec::new(),
-        }
-    }
-
-    /// Record that the buffer is no longer mapped.
-    fn reset(&mut self) {
-        self.initial_range = 0..0;
-
-        assert!(
-            self.sub_ranges.is_empty(),
-            "You cannot unmap a buffer that still has accessible mapped views"
-        );
-    }
-
-    /// Record that the `size` bytes of the buffer at `offset` are now viewed.
-    ///
-    /// Return the byte offset within the buffer of the end of the viewed range.
-    ///
-    /// # Panics
-    ///
-    /// This panics if the given range overlaps with any existing range.
-    fn add(&mut self, offset: BufferAddress, size: Option<BufferSize>) -> BufferAddress {
-        let end = match size {
-            Some(s) => offset + s.get(),
-            None => self.initial_range.end,
-        };
-        assert!(self.initial_range.start <= offset && end <= self.initial_range.end);
-        // This check is essential for avoiding undefined behavior: it is the
-        // only thing that ensures that `&mut` references to the buffer's
-        // contents don't alias anything else.
-        for sub in self.sub_ranges.iter() {
-            assert!(
-                end <= sub.start || offset >= sub.end,
-                "Intersecting map range with {sub:?}"
-            );
-        }
-        self.sub_ranges.push(offset..end);
-        end
-    }
-
-    /// Record that the `size` bytes of the buffer at `offset` are no longer viewed.
-    ///
-    /// # Panics
-    ///
-    /// This panics if the given range does not exactly match one previously
-    /// passed to [`add`].
-    ///
-    /// [`add]`: MapContext::add
-    fn remove(&mut self, offset: BufferAddress, size: Option<BufferSize>) {
-        let end = match size {
-            Some(s) => offset + s.get(),
-            None => self.initial_range.end,
-        };
-
-        let index = self
-            .sub_ranges
-            .iter()
-            .position(|r| *r == (offset..end))
-            .expect("unable to remove range from map context");
-        self.sub_ranges.swap_remove(index);
-    }
-}
-
-/// Handle to a GPU-accessible buffer.
-///
-/// Created with [`Device::create_buffer`] or
-/// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init).
-///
-/// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
-///
-/// A `Buffer`'s bytes have "interior mutability": functions like
-/// [`Queue::write_buffer`] or [mapping] a buffer for writing only require a
-/// `&Buffer`, not a `&mut Buffer`, even though they modify its contents. `wgpu`
-/// prevents simultaneous reads and writes of buffer contents using run-time
-/// checks.
-///
-/// [mapping]: Buffer#mapping-buffers
-///
-/// # Mapping buffers
-///
-/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
-/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or
-/// `&mut [u8]` slice of bytes. Buffers created with the
-/// [`mapped_at_creation`][mac] flag set are also mapped initially.
-///
-/// Depending on the hardware, the buffer could be memory shared between CPU and
-/// GPU, so that the CPU has direct access to the same bytes the GPU will
-/// consult; or it may be ordinary CPU memory, whose contents the system must
-/// copy to/from the GPU as needed. This crate's API is designed to work the
-/// same way in either case: at any given time, a buffer is either mapped and
-/// available to the CPU, or unmapped and ready for use by the GPU, but never
-/// both. This makes it impossible for either side to observe changes by the
-/// other immediately, and any necessary transfers can be carried out when the
-/// buffer transitions from one state to the other.
-///
-/// There are two ways to map a buffer:
-///
-/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire
-///   buffer is mapped when it is created. This is the easiest way to initialize
-///   a new buffer. You can set `mapped_at_creation` on any kind of buffer,
-///   regardless of its [`usage`] flags.
-///
-/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`]
-///   flags, then you can call `buffer.slice(range).map_async(mode, callback)`
-///   to map the portion of `buffer` given by `range`. This waits for the GPU to
-///   finish using the buffer, and invokes `callback` as soon as the buffer is
-///   safe for the CPU to access.
-///
-/// Once a buffer is mapped:
-///
-/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a
-///   [`BufferView`], which dereferences to a `&[u8]` that you can use to read
-///   the buffer's contents.
-///
-/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a
-///   [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to
-///   read and write the buffer's contents.
-///
-/// The given `range` must fall within the mapped portion of the buffer. If you
-/// attempt to access overlapping ranges, even for shared access only, these
-/// methods panic.
-///
-/// While a buffer is mapped, you may not submit any commands to the GPU that
-/// access it. You may record command buffers that use the buffer, but if you
-/// submit them while the buffer is mapped, submission will panic.
-///
-/// When you are done using the buffer on the CPU, you must call
-/// [`Buffer::unmap`] to make it available for use by the GPU again. All
-/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
-/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
-///
-/// # Example
-///
-/// If `buffer` was created with [`BufferUsages::MAP_WRITE`], we could fill it
-/// with `f32` values like this:
-///
-/// ```no_run
-/// # mod bytemuck {
-/// #     pub fn cast_slice_mut(bytes: &mut [u8]) -> &mut [f32] { todo!() }
-/// # }
-/// # let device: wgpu::Device = todo!();
-/// # let buffer: wgpu::Buffer = todo!();
-/// let buffer = std::sync::Arc::new(buffer);
-/// let capturable = buffer.clone();
-/// buffer.slice(..).map_async(wgpu::MapMode::Write, move |result| {
-///     if result.is_ok() {
-///         let mut view = capturable.slice(..).get_mapped_range_mut();
-///         let floats: &mut [f32] = bytemuck::cast_slice_mut(&mut view);
-///         floats.fill(42.0);
-///         drop(view);
-///         capturable.unmap();
-///     }
-/// });
-/// ```
-///
-/// This code takes the following steps:
-///
-/// - First, it moves `buffer` into an [`Arc`], and makes a clone for capture by
-///   the callback passed to [`map_async`]. Since a [`map_async`] callback may be
-///   invoked from another thread, interaction between the callback and the
-///   thread calling [`map_async`] generally requires some sort of shared heap
-///   data like this. In real code, the [`Arc`] would probably own some larger
-///   structure that itself owns `buffer`.
-///
-/// - Then, it calls [`Buffer::slice`] to make a [`BufferSlice`] referring to
-///   the buffer's entire contents.
-///
-/// - Next, it calls [`BufferSlice::map_async`] to request that the bytes to
-///   which the slice refers be made accessible to the CPU ("mapped"). This may
-///   entail waiting for previously enqueued operations on `buffer` to finish.
-///   Although [`map_async`] itself always returns immediately, it saves the
-///   callback function to be invoked later.
-///
-/// - When some later call to [`Device::poll`] or [`Instance::poll_all`] (not
-///   shown in this example) determines that the buffer is mapped and ready for
-///   the CPU to use, it invokes the callback function.
-///
-/// - The callback function calls [`Buffer::slice`] and then
-///   [`BufferSlice::get_mapped_range_mut`] to obtain a [`BufferViewMut`], which
-///   dereferences to a `&mut [u8]` slice referring to the buffer's bytes.
-///
-/// - It then uses the [`bytemuck`] crate to turn the `&mut [u8]` into a `&mut
-///   [f32]`, and calls the slice [`fill`] method to fill the buffer with a
-///   useful value.
-///
-/// - Finally, the callback drops the view and calls [`Buffer::unmap`] to unmap
-///   the buffer. In real code, the callback would also need to do some sort of
-///   synchronization to let the rest of the program know that it has completed
-///   its work.
-///
-/// If using [`map_async`] directly is awkward, you may find it more convenient to
-/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
-/// However, those each have their own tradeoffs; the asynchronous nature of GPU
-/// execution makes it hard to avoid friction altogether.
-///
-/// [`Arc`]: std::sync::Arc
-/// [`map_async`]: BufferSlice::map_async
-/// [`bytemuck`]: https://crates.io/crates/bytemuck
-/// [`fill`]: slice::fill
-///
-/// ## Mapping buffers on the web
-///
-/// When compiled to WebAssembly and running in a browser content process,
-/// `wgpu` implements its API in terms of the browser's WebGPU implementation.
-/// In this context, `wgpu` is further isolated from the GPU:
-///
-/// - Depending on the browser's WebGPU implementation, mapping and unmapping
-///   buffers probably entails copies between WebAssembly linear memory and the
-///   graphics driver's buffers.
-///
-/// - All modern web browsers isolate web content in its own sandboxed process,
-///   which can only interact with the GPU via interprocess communication (IPC).
-///   Although most browsers' IPC systems use shared memory for large data
-///   transfers, there will still probably need to be copies into and out of the
-///   shared memory buffers.
-///
-/// All of these copies contribute to the cost of buffer mapping in this
-/// configuration.
-///
-/// [`usage`]: BufferDescriptor::usage
-/// [mac]: BufferDescriptor::mapped_at_creation
-/// [`MAP_READ`]: BufferUsages::MAP_READ
-/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
-#[derive(Debug)]
-pub struct Buffer {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    map_context: Mutex<MapContext>,
-    size: wgt::BufferAddress,
-    usage: BufferUsages,
-    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Buffer: Send, Sync);
-
-/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like.
-///
-/// You can create a `BufferSlice` by calling [`Buffer::slice`]:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let slice = buffer.slice(10..20);
-/// ```
-///
-/// This returns a slice referring to the second ten bytes of `buffer`. To get a
-/// slice of the entire `Buffer`:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let whole_buffer_slice = buffer.slice(..);
-/// ```
-///
-/// You can pass buffer slices to methods like [`RenderPass::set_vertex_buffer`]
-/// and [`RenderPass::set_index_buffer`] to indicate which portion of the buffer
-/// a draw call should consult.
-///
-/// To access the slice's contents on the CPU, you must first [map] the buffer,
-/// and then call [`BufferSlice::get_mapped_range`] or
-/// [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
-/// contents. See the documentation on [mapping][map] for more details,
-/// including example code.
-///
-/// Unlike a Rust shared slice `&[T]`, whose existence guarantees that
-/// nobody else is modifying the `T` values to which it refers, a
-/// [`BufferSlice`] doesn't guarantee that the buffer's contents aren't
-/// changing. You can still record and submit commands operating on the
-/// buffer while holding a [`BufferSlice`]. A [`BufferSlice`] simply
-/// represents a certain range of the buffer's bytes.
-///
-/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
-/// specification, an offset and size are specified as arguments to each call
-/// working with the [`Buffer`], instead.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Copy, Clone, Debug)]
-pub struct BufferSlice<'a> {
-    buffer: &'a Buffer,
-    offset: BufferAddress,
-    size: Option<BufferSize>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BufferSlice<'_>: Send, Sync);
-
-/// Handle to a texture on the GPU.
-///
-/// It can be created with [`Device::create_texture`].
-///
-/// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
-#[derive(Debug)]
-pub struct Texture {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    owned: bool,
-    descriptor: TextureDescriptor<'static>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Texture: Send, Sync);
-
-/// Handle to a texture view.
-///
-/// A `TextureView` object describes a texture and associated metadata needed by a
-/// [`RenderPipeline`] or [`BindGroup`].
-///
-/// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
-#[derive(Debug)]
-pub struct TextureView {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(TextureView: Send, Sync);
-
-/// Handle to a sampler.
-///
-/// A `Sampler` object defines how a pipeline will sample from a [`TextureView`]. Samplers define
-/// image filters (including anisotropy) and address (wrapping) modes, among other things. See
-/// the documentation for [`SamplerDescriptor`] for more information.
-///
-/// It can be created with [`Device::create_sampler`].
-///
-/// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
-#[derive(Debug)]
-pub struct Sampler {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Sampler: Send, Sync);
-
-impl Drop for Sampler {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.sampler_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Describes a [`Surface`].
-///
-/// For use with [`Surface::configure`].
-///
-/// Corresponds to [WebGPU `GPUCanvasConfiguration`](
-/// https://gpuweb.github.io/gpuweb/#canvas-configuration).
-pub type SurfaceConfiguration = wgt::SurfaceConfiguration<Vec<TextureFormat>>;
-static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync);
-
-/// Handle to a presentable surface.
-///
-/// A `Surface` represents a platform-specific surface (e.g. a window) onto which rendered images may
-/// be presented. A `Surface` may be created with the function [`Instance::create_surface`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context)
-/// serves a similar role.
-pub struct Surface<'window> {
-    context: Arc<C>,
-
-    /// Optionally, keep the source of the handle used for the surface alive.
-    ///
-    /// This is useful for platforms where the surface is created from a window and the surface
-    /// would become invalid when the window is dropped.
-    _handle_source: Option<Box<dyn WindowHandle + 'window>>,
-
-    /// Wgpu-core surface id.
-    id: ObjectId,
-
-    /// Additional surface data returned by [`DynContext::instance_create_surface`].
-    surface_data: Box<Data>,
-
-    // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`.
-    // It is required to set the attributes of the `SurfaceTexture` in the
-    // `Surface::get_current_texture` method.
-    // Because the `Surface::configure` method operates on an immutable reference this type has to
-    // be wrapped in a mutex and since the configuration is only supplied after the surface has
-    // been created is is additionally wrapped in an option.
-    config: Mutex<Option<SurfaceConfiguration>>,
-}
-
-// This custom implementation is required because [`Surface::_surface`] doesn't
-// require [`Debug`](fmt::Debug), which we should not require from the user.
-impl<'window> fmt::Debug for Surface<'window> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Surface")
-            .field("context", &self.context)
-            .field(
-                "_handle_source",
-                &if self._handle_source.is_some() {
-                    "Some"
-                } else {
-                    "None"
-                },
-            )
-            .field("id", &self.id)
-            .field("data", &self.surface_data)
-            .field("config", &self.config)
-            .finish()
-    }
-}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Surface<'_>: Send, Sync);
-
-impl Drop for Surface<'_> {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .surface_drop(&self.id, self.surface_data.as_ref())
-        }
-    }
-}
-
-/// Super trait for window handles as used in [`SurfaceTarget`].
-pub trait WindowHandle: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
-
-impl<T> WindowHandle for T where T: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
-
-/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with safe surface creation.
-///
-/// This is either a window or an actual web canvas depending on the platform and
-/// enabled features.
-/// Refer to the individual variants for more information.
-///
-/// See also [`SurfaceTargetUnsafe`] for unsafe variants.
-#[non_exhaustive]
-pub enum SurfaceTarget<'window> {
-    /// Window handle producer.
-    ///
-    /// If the specified display and window handle are not supported by any of the backends, then the surface
-    /// will not be supported by any adapters.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation returns an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    ///
-    /// # Panics
-    ///
-    /// - On macOS/Metal: will panic if not called on the main thread.
-    /// - On web: will panic if the `raw_window_handle` does not properly refer to a
-    ///   canvas element.
-    Window(Box<dyn WindowHandle + 'window>),
-
-    /// Surface from a `web_sys::HtmlCanvasElement`.
-    ///
-    /// The `canvas` argument must be a valid `<canvas>` element to
-    /// create a surface upon.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    #[cfg(any(webgpu, webgl))]
-    Canvas(web_sys::HtmlCanvasElement),
-
-    /// Surface from a `web_sys::OffscreenCanvas`.
-    ///
-    /// The `canvas` argument must be a valid `OffscreenCanvas` object
-    /// to create a surface upon.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    #[cfg(any(webgpu, webgl))]
-    OffscreenCanvas(web_sys::OffscreenCanvas),
-}
-
-impl<'a, T> From<T> for SurfaceTarget<'a>
-where
-    T: WindowHandle + 'a,
-{
-    fn from(window: T) -> Self {
-        Self::Window(Box::new(window))
-    }
-}
-
-/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with unsafe surface creation.
-///
-/// This is either a window or an actual web canvas depending on the platform and
-/// enabled features.
-/// Refer to the individual variants for more information.
-///
-/// See also [`SurfaceTarget`] for safe variants.
-#[non_exhaustive]
-pub enum SurfaceTargetUnsafe {
-    /// Raw window & display handle.
-    ///
-    /// If the specified display and window handle are not supported by any of the backends, then the surface
-    /// will not be supported by any adapters.
-    ///
-    /// # Safety
-    ///
-    /// - `raw_window_handle` & `raw_display_handle` must be valid objects to create a surface upon.
-    /// - `raw_window_handle` & `raw_display_handle` must remain valid until after the returned
-    ///    [`Surface`] is  dropped.
-    RawHandle {
-        /// Raw display handle, underlying display must outlive the surface created from this.
-        raw_display_handle: raw_window_handle::RawDisplayHandle,
-
-        /// Raw display handle, underlying window must outlive the surface created from this.
-        raw_window_handle: raw_window_handle::RawWindowHandle,
-    },
-
-    /// Surface from `CoreAnimationLayer`.
-    ///
-    /// # Safety
-    ///
-    /// - layer must be a valid object to create a surface upon.
-    #[cfg(metal)]
-    CoreAnimationLayer(*mut std::ffi::c_void),
-
-    /// Surface from `IDCompositionVisual`.
-    ///
-    /// # Safety
-    ///
-    /// - visual must be a valid IDCompositionVisual to create a surface upon.
-    #[cfg(dx12)]
-    CompositionVisual(*mut std::ffi::c_void),
-
-    /// Surface from DX12 `SurfaceHandle`.
-    ///
-    /// # Safety
-    ///
-    /// - surface_handle must be a valid SurfaceHandle to create a surface upon.
-    #[cfg(dx12)]
-    SurfaceHandle(*mut std::ffi::c_void),
-
-    /// Surface from DX12 `SwapChainPanel`.
-    ///
-    /// # Safety
-    ///
-    /// - visual must be a valid SwapChainPanel to create a surface upon.
-    #[cfg(dx12)]
-    SwapChainPanel(*mut std::ffi::c_void),
-}
-
-impl SurfaceTargetUnsafe {
-    /// Creates a [`SurfaceTargetUnsafe::RawHandle`] from a window.
-    ///
-    /// # Safety
-    ///
-    /// - `window` must outlive the resulting surface target
-    ///   (and subsequently the surface created for this target).
-    pub unsafe fn from_window<T>(window: &T) -> Result<Self, raw_window_handle::HandleError>
-    where
-        T: HasDisplayHandle + HasWindowHandle,
-    {
-        Ok(Self::RawHandle {
-            raw_display_handle: window.display_handle()?.as_raw(),
-            raw_window_handle: window.window_handle()?.as_raw(),
-        })
-    }
-}
-
-/// Handle to a binding group layout.
-///
-/// A `BindGroupLayout` is a handle to the GPU-side layout of a binding group. It can be used to
-/// create a [`BindGroupDescriptor`] object, which in turn can be used to create a [`BindGroup`]
-/// object with [`Device::create_bind_group`]. A series of `BindGroupLayout`s can also be used to
-/// create a [`PipelineLayoutDescriptor`], which can be used to create a [`PipelineLayout`].
-///
-/// It can be created with [`Device::create_bind_group_layout`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupLayout`](
-/// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
-#[derive(Debug)]
-pub struct BindGroupLayout {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
-
-impl Drop for BindGroupLayout {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .bind_group_layout_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a binding group.
-///
-/// A `BindGroup` represents the set of resources bound to the bindings described by a
-/// [`BindGroupLayout`]. It can be created with [`Device::create_bind_group`]. A `BindGroup` can
-/// be bound to a particular [`RenderPass`] with [`RenderPass::set_bind_group`], or to a
-/// [`ComputePass`] with [`ComputePass::set_bind_group`].
-///
-/// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
-#[derive(Debug)]
-pub struct BindGroup {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroup: Send, Sync);
-
-impl Drop for BindGroup {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.bind_group_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a compiled shader module.
-///
-/// A `ShaderModule` represents a compiled shader module on the GPU. It can be created by passing
-/// source code to [`Device::create_shader_module`] or valid SPIR-V binary to
-/// [`Device::create_shader_module_spirv`]. Shader modules are used to define programmable stages
-/// of a pipeline.
-///
-/// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module).
-#[derive(Debug)]
-pub struct ShaderModule {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ShaderModule: Send, Sync);
-
-impl Drop for ShaderModule {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .shader_module_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl ShaderModule {
-    /// Get the compilation info for the shader module.
-    pub fn get_compilation_info(&self) -> impl Future<Output = CompilationInfo> + WasmNotSend {
-        self.context
-            .shader_get_compilation_info(&self.id, self.data.as_ref())
-    }
-}
-
-/// Compilation information for a shader module.
-///
-/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo).
-/// The source locations use bytes, and index a UTF-8 encoded string.
-#[derive(Debug, Clone)]
-pub struct CompilationInfo {
-    /// The messages from the shader compilation process.
-    pub messages: Vec<CompilationMessage>,
-}
-
-/// A single message from the shader compilation process.
-///
-/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage),
-/// except that the location uses UTF-8 for all positions.
-#[derive(Debug, Clone)]
-pub struct CompilationMessage {
-    /// The text of the message.
-    pub message: String,
-    /// The type of the message.
-    pub message_type: CompilationMessageType,
-    /// Where in the source code the message points at.
-    pub location: Option<SourceLocation>,
-}
-
-/// The type of a compilation message.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum CompilationMessageType {
-    /// An error message.
-    Error,
-    /// A warning message.
-    Warning,
-    /// An informational message.
-    Info,
-}
-
-/// A human-readable representation for a span, tailored for text source.
-///
-/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from
-/// the WebGPU specification, except
-/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units.
-/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans.
-///
-/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub struct SourceLocation {
-    /// 1-based line number.
-    pub line_number: u32,
-    /// 1-based column in code units (in bytes) of the start of the span.
-    /// Remember to convert accordingly when displaying to the user.
-    pub line_position: u32,
-    /// 0-based Offset in code units (in bytes) of the start of the span.
-    pub offset: u32,
-    /// Length in code units (in bytes) of the span.
-    pub length: u32,
-}
-
-#[cfg(all(feature = "wgsl", wgpu_core))]
-impl From<naga::error::ShaderError<naga::front::wgsl::ParseError>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::wgsl::ParseError>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: value.inner.location(&value.source).map(Into::into),
-            }],
-        }
-    }
-}
-#[cfg(feature = "glsl")]
-impl From<naga::error::ShaderError<naga::front::glsl::ParseErrors>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::glsl::ParseErrors>) -> Self {
-        let messages = value
-            .inner
-            .errors
-            .into_iter()
-            .map(|err| CompilationMessage {
-                message: err.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: err.location(&value.source).map(Into::into),
-            })
-            .collect();
-        CompilationInfo { messages }
-    }
-}
-
-#[cfg(feature = "spirv")]
-impl From<naga::error::ShaderError<naga::front::spv::Error>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::spv::Error>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: None,
-            }],
-        }
-    }
-}
-
-#[cfg(any(wgpu_core, naga))]
-impl From<naga::error::ShaderError<naga::WithSpan<naga::valid::ValidationError>>>
-    for CompilationInfo
-{
-    fn from(value: naga::error::ShaderError<naga::WithSpan<naga::valid::ValidationError>>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: value.inner.location(&value.source).map(Into::into),
-            }],
-        }
-    }
-}
-
-#[cfg(any(wgpu_core, naga))]
-impl From<naga::SourceLocation> for SourceLocation {
-    fn from(value: naga::SourceLocation) -> Self {
-        SourceLocation {
-            length: value.length,
-            offset: value.offset,
-            line_number: value.line_number,
-            line_position: value.line_position,
-        }
-    }
-}
-
-/// Source of a shader module.
-///
-/// The source will be parsed and validated.
-///
-/// Any necessary shader translation (e.g. from WGSL to SPIR-V or vice versa)
-/// will be done internally by wgpu.
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// only WGSL source code strings are accepted.
-#[cfg_attr(feature = "naga-ir", allow(clippy::large_enum_variant))]
-#[derive(Clone, Debug)]
-#[non_exhaustive]
-pub enum ShaderSource<'a> {
-    /// SPIR-V module represented as a slice of words.
-    ///
-    /// See also: [`util::make_spirv`], [`include_spirv`]
-    #[cfg(feature = "spirv")]
-    SpirV(Cow<'a, [u32]>),
-    /// GLSL module as a string slice.
-    ///
-    /// Note: GLSL is not yet fully supported and must be a specific ShaderStage.
-    #[cfg(feature = "glsl")]
-    Glsl {
-        /// The source code of the shader.
-        shader: Cow<'a, str>,
-        /// The shader stage that the shader targets. For example, `naga::ShaderStage::Vertex`
-        stage: naga::ShaderStage,
-        /// Defines to unlock configured shader features.
-        defines: naga::FastHashMap<String, String>,
-    },
-    /// WGSL module as a string slice.
-    #[cfg(feature = "wgsl")]
-    Wgsl(Cow<'a, str>),
-    /// Naga module.
-    #[cfg(feature = "naga-ir")]
-    Naga(Cow<'static, naga::Module>),
-    /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
-    /// could be the last one active.
-    #[doc(hidden)]
-    Dummy(PhantomData<&'a ()>),
-}
-static_assertions::assert_impl_all!(ShaderSource<'_>: Send, Sync);
-
-/// Descriptor for use with [`Device::create_shader_module`].
-///
-/// Corresponds to [WebGPU `GPUShaderModuleDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpushadermoduledescriptor).
-#[derive(Clone, Debug)]
-pub struct ShaderModuleDescriptor<'a> {
-    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Source code for the shader.
-    pub source: ShaderSource<'a>,
-}
-static_assertions::assert_impl_all!(ShaderModuleDescriptor<'_>: Send, Sync);
-
-/// Descriptor for a shader module given by SPIR-V binary, for use with
-/// [`Device::create_shader_module_spirv`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// only WGSL source code strings are accepted.
-#[derive(Debug)]
-pub struct ShaderModuleDescriptorSpirV<'a> {
-    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Binary SPIR-V data, in 4-byte words.
-    pub source: Cow<'a, [u32]>,
-}
-static_assertions::assert_impl_all!(ShaderModuleDescriptorSpirV<'_>: Send, Sync);
-
-/// Handle to a pipeline layout.
-///
-/// A `PipelineLayout` object describes the available binding groups of a pipeline.
-/// It can be created with [`Device::create_pipeline_layout`].
-///
-/// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
-#[derive(Debug)]
-pub struct PipelineLayout {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
-
-impl Drop for PipelineLayout {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .pipeline_layout_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a rendering (graphics) pipeline.
-///
-/// A `RenderPipeline` object represents a graphics pipeline and its stages, bindings, vertex
-/// buffers and targets. It can be created with [`Device::create_render_pipeline`].
-///
-/// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
-#[derive(Debug)]
-pub struct RenderPipeline {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
-
-impl Drop for RenderPipeline {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_pipeline_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl RenderPipeline {
-    /// Get an object representing the bind group layout at a given index.
-    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let context = Arc::clone(&self.context);
-        let (id, data) =
-            self.context
-                .render_pipeline_get_bind_group_layout(&self.id, self.data.as_ref(), index);
-        BindGroupLayout { context, id, data }
-    }
-}
-
-/// Handle to a compute pipeline.
-///
-/// A `ComputePipeline` object represents a compute pipeline and its single shader stage.
-/// It can be created with [`Device::create_compute_pipeline`].
-///
-/// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
-#[derive(Debug)]
-pub struct ComputePipeline {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
-
-impl Drop for ComputePipeline {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .compute_pipeline_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl ComputePipeline {
-    /// Get an object representing the bind group layout at a given index.
-    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let context = Arc::clone(&self.context);
-        let (id, data) = self.context.compute_pipeline_get_bind_group_layout(
-            &self.id,
-            self.data.as_ref(),
-            index,
-        );
-        BindGroupLayout { context, id, data }
-    }
-}
-
-/// Handle to a pipeline cache, which is used to accelerate
-/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
-/// in subsequent executions
-///
-/// This reuse is only applicable for the same or similar devices.
-/// See [`util::pipeline_cache_key`] for some details.
-///
-/// # Background
-///
-/// In most GPU drivers, shader code must be converted into a machine code
-/// which can be executed on the GPU.
-/// Generating this machine code can require a lot of computation.
-/// Pipeline caches allow this computation to be reused between executions
-/// of the program.
-/// This can be very useful for reducing program startup time.
-///
-/// Note that most desktop GPU drivers will manage their own caches,
-/// meaning that little advantage can be gained from this on those platforms.
-/// However, on some platforms, especially Android, drivers leave this to the
-/// application to implement.
-///
-/// Unfortunately, drivers do not expose whether they manage their own caches.
-/// Some reasonable policies for applications to use are:
-/// - Manage their own pipeline cache on all platforms
-/// - Only manage pipeline caches on Android
-///
-/// # Usage
-///
-/// It is valid to use this resource when creating multiple pipelines, in
-/// which case it will likely cache each of those pipelines.
-/// It is also valid to create a new cache for each pipeline.
-///
-/// This resource is most useful when the data produced from it (using
-/// [`PipelineCache::get_data`]) is persisted.
-/// Care should be taken that pipeline caches are only used for the same device,
-/// as pipeline caches from compatible devices are unlikely to provide any advantage.
-/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
-///
-/// It is recommended to store pipeline caches atomically. If persisting to disk,
-/// this can usually be achieved by creating a temporary file, then moving/[renaming]
-/// the temporary file over the existing cache
-///
-/// # Storage Usage
-///
-/// There is not currently an API available to reduce the size of a cache.
-/// This is due to limitations in the underlying graphics APIs used.
-/// This is especially impactful if your application is being updated, so
-/// previous caches are no longer being used.
-///
-/// One option to work around this is to regenerate the cache.
-/// That is, creating the pipelines which your program runs using
-/// with the stored cached data, then recreating the *same* pipelines
-/// using a new cache, which your application then store.
-///
-/// # Implementations
-///
-/// This resource currently only works on the following backends:
-///  - Vulkan
-///
-/// This type is unique to the Rust API of `wgpu`.
-///
-/// [renaming]: std::fs::rename
-#[derive(Debug)]
-pub struct PipelineCache {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
-
-impl PipelineCache {
-    /// Get the data associated with this pipeline cache.
-    /// The data format is an implementation detail of `wgpu`.
-    /// The only defined operation on this data setting it as the `data` field
-    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
-    ///
-    /// This function is unique to the Rust API of `wgpu`.
-    pub fn get_data(&self) -> Option<Vec<u8>> {
-        self.context
-            .pipeline_cache_get_data(&self.id, self.data.as_ref())
-    }
-}
-
-impl Drop for PipelineCache {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .pipeline_cache_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a command buffer on the GPU.
-///
-/// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
-/// queue with [`Queue::submit`]. A `CommandBuffer` is obtained by recording a series of commands to
-/// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
-///
-/// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
-#[derive(Debug)]
-pub struct CommandBuffer {
-    context: Arc<C>,
-    id: Option<ObjectId>,
-    data: Option<Box<Data>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
-
-impl Drop for CommandBuffer {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            if let Some(id) = self.id.take() {
-                self.context
-                    .command_buffer_drop(&id, self.data.take().unwrap().as_ref());
-            }
-        }
-    }
-}
-
-/// Encodes a series of GPU operations.
-///
-/// A command encoder can record [`RenderPass`]es, [`ComputePass`]es,
-/// and transfer operations between driver-managed resources like [`Buffer`]s and [`Texture`]s.
-///
-/// When finished recording, call [`CommandEncoder::finish`] to obtain a [`CommandBuffer`] which may
-/// be submitted for execution.
-///
-/// Corresponds to [WebGPU `GPUCommandEncoder`](https://gpuweb.github.io/gpuweb/#command-encoder).
-#[derive(Debug)]
-pub struct CommandEncoder {
-    context: Arc<C>,
-    id: Option<ObjectId>,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(CommandEncoder: Send, Sync);
-
-impl Drop for CommandEncoder {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            if let Some(id) = self.id.take() {
-                self.context.command_encoder_drop(&id, self.data.as_ref());
-            }
-        }
-    }
-}
-
-/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
-///
-/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
-/// specifies the attachments (textures) that will be rendered to.
-///
-/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
-///
-/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
-///   rasterize something and execute shaders).
-/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
-///   for future drawing commands.
-///
-/// A render pass may contain any number of drawing commands, and before/between each command the
-/// render state may be updated however you wish; each drawing command will be executed using the
-/// render state that has been set when the `draw_*()` function is called.
-///
-/// Corresponds to [WebGPU `GPURenderPassEncoder`](
-/// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
-#[derive(Debug)]
-pub struct RenderPass<'encoder> {
-    /// The inner data of the render pass, separated out so it's easy to replace the lifetime with 'static if desired.
-    inner: RenderPassInner,
-
-    /// This lifetime is used to protect the [`CommandEncoder`] from being used
-    /// while the pass is alive.
-    encoder_guard: PhantomData<&'encoder ()>,
-}
-
-#[derive(Debug)]
-struct RenderPassInner {
-    id: ObjectId,
-    data: Box<Data>,
-    context: Arc<C>,
-}
-
-/// In-progress recording of a compute pass.
-///
-/// It can be created with [`CommandEncoder::begin_compute_pass`].
-///
-/// Corresponds to [WebGPU `GPUComputePassEncoder`](
-/// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
-#[derive(Debug)]
-pub struct ComputePass<'encoder> {
-    /// The inner data of the compute pass, separated out so it's easy to replace the lifetime with 'static if desired.
-    inner: ComputePassInner,
-
-    /// This lifetime is used to protect the [`CommandEncoder`] from being used
-    /// while the pass is alive.
-    encoder_guard: PhantomData<&'encoder ()>,
-}
-
-#[derive(Debug)]
-struct ComputePassInner {
-    id: ObjectId,
-    data: Box<Data>,
-    context: Arc<C>,
-}
-
-/// Encodes a series of GPU operations into a reusable "render bundle".
-///
-/// It only supports a handful of render commands, but it makes them reusable.
-/// It can be created with [`Device::create_render_bundle_encoder`].
-/// It can be executed onto a [`CommandEncoder`] using [`RenderPass::execute_bundles`].
-///
-/// Executing a [`RenderBundle`] is often more efficient than issuing the underlying commands
-/// manually.
-///
-/// Corresponds to [WebGPU `GPURenderBundleEncoder`](
-/// https://gpuweb.github.io/gpuweb/#gpurenderbundleencoder).
-#[derive(Debug)]
-pub struct RenderBundleEncoder<'a> {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    parent: &'a Device,
-    /// This type should be !Send !Sync, because it represents an allocation on this thread's
-    /// command buffer.
-    _p: PhantomData<*const u8>,
-}
-static_assertions::assert_not_impl_any!(RenderBundleEncoder<'_>: Send, Sync);
-
-/// Pre-prepared reusable bundle of GPU operations.
-///
-/// It only supports a handful of render commands, but it makes them reusable. Executing a
-/// [`RenderBundle`] is often more efficient than issuing the underlying commands manually.
-///
-/// It can be created by use of a [`RenderBundleEncoder`], and executed onto a [`CommandEncoder`]
-/// using [`RenderPass::execute_bundles`].
-///
-/// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
-#[derive(Debug)]
-pub struct RenderBundle {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
-
-impl Drop for RenderBundle {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_bundle_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a query set.
-///
-/// It can be created with [`Device::create_query_set`].
-///
-/// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
-#[derive(Debug)]
-pub struct QuerySet {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(QuerySet: Send, Sync);
-
-impl Drop for QuerySet {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.query_set_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a command queue on a device.
-///
-/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
-/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
-/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
-#[derive(Debug)]
-pub struct Queue {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Queue: Send, Sync);
-
-impl Drop for Queue {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.queue_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Resource that can be bound to a pipeline.
-///
-/// Corresponds to [WebGPU `GPUBindingResource`](
-/// https://gpuweb.github.io/gpuweb/#typedefdef-gpubindingresource).
-#[non_exhaustive]
-#[derive(Clone, Debug)]
-pub enum BindingResource<'a> {
-    /// Binding is backed by a buffer.
-    ///
-    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
-    /// with [`BindGroupLayoutEntry::count`] set to None.
-    Buffer(BufferBinding<'a>),
-    /// Binding is backed by an array of buffers.
-    ///
-    /// [`Features::BUFFER_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
-    /// with [`BindGroupLayoutEntry::count`] set to Some.
-    BufferArray(&'a [BufferBinding<'a>]),
-    /// Binding is a sampler.
-    ///
-    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set to None.
-    Sampler(&'a Sampler),
-    /// Binding is backed by an array of samplers.
-    ///
-    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set
-    /// to Some.
-    SamplerArray(&'a [&'a Sampler]),
-    /// Binding is backed by a texture.
-    ///
-    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
-    /// [`BindGroupLayoutEntry::count`] set to None.
-    TextureView(&'a TextureView),
-    /// Binding is backed by an array of textures.
-    ///
-    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
-    /// [`BindGroupLayoutEntry::count`] set to Some.
-    TextureViewArray(&'a [&'a TextureView]),
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
-
-/// Describes the segment of a buffer to bind.
-///
-/// Corresponds to [WebGPU `GPUBufferBinding`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferbinding).
-#[derive(Clone, Debug)]
-pub struct BufferBinding<'a> {
-    /// The buffer to bind.
-    pub buffer: &'a Buffer,
-
-    /// Base offset of the buffer, in bytes.
-    ///
-    /// If the [`has_dynamic_offset`] field of this buffer's layout entry is
-    /// `true`, the offset here will be added to the dynamic offset passed to
-    /// [`RenderPass::set_bind_group`] or [`ComputePass::set_bind_group`].
-    ///
-    /// If the buffer was created with [`BufferUsages::UNIFORM`], then this
-    /// offset must be a multiple of
-    /// [`Limits::min_uniform_buffer_offset_alignment`].
-    ///
-    /// If the buffer was created with [`BufferUsages::STORAGE`], then this
-    /// offset must be a multiple of
-    /// [`Limits::min_storage_buffer_offset_alignment`].
-    ///
-    /// [`has_dynamic_offset`]: BindingType::Buffer::has_dynamic_offset
-    pub offset: BufferAddress,
-
-    /// Size of the binding in bytes, or `None` for using the rest of the buffer.
-    pub size: Option<BufferSize>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BufferBinding<'_>: Send, Sync);
-
-/// Operation to perform to the output attachment at the start of a render pass.
-///
-/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop),
-/// plus the corresponding clearValue.
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum LoadOp<V> {
-    /// Loads the specified value for this attachment into the render pass.
-    ///
-    /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper
-    /// because it avoids loading data from main memory into tile-local memory.
-    ///
-    /// On other GPU hardware, there isn’t a significant difference.
-    ///
-    /// As a result, it is recommended to use "clear" rather than "load" in cases
-    /// where the initial value doesn’t matter
-    /// (e.g. the render target will be cleared using a skybox).
-    Clear(V),
-    /// Loads the existing value for this attachment into the render pass.
-    Load,
-}
-
-impl<V: Default> Default for LoadOp<V> {
-    fn default() -> Self {
-        Self::Clear(Default::default())
-    }
-}
-
-/// Operation to perform to the output attachment at the end of a render pass.
-///
-/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop).
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum StoreOp {
-    /// Stores the resulting value of the render pass for this attachment.
-    #[default]
-    Store,
-    /// Discards the resulting value of the render pass for this attachment.
-    ///
-    /// The attachment will be treated as uninitialized afterwards.
-    /// (If only either Depth or Stencil texture-aspects is set to `Discard`,
-    /// the respective other texture-aspect will be preserved.)
-    ///
-    /// This can be significantly faster on tile-based render hardware.
-    ///
-    /// Prefer this if the attachment is not read by subsequent passes.
-    Discard,
-}
-
-/// Pair of load and store operations for an attachment aspect.
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// separate `loadOp` and `storeOp` fields are used instead.
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct Operations<V> {
-    /// How data should be read through this attachment.
-    pub load: LoadOp<V>,
-    /// Whether data will be written to through this attachment.
-    ///
-    /// Note that resolve textures (if specified) are always written to,
-    /// regardless of this setting.
-    pub store: StoreOp,
-}
-
-impl<V: Default> Default for Operations<V> {
-    #[inline]
-    fn default() -> Self {
-        Self {
-            load: LoadOp::<V>::default(),
-            store: StoreOp::default(),
-        }
-    }
-}
-
-/// Describes the timestamp writes of a render pass.
-///
-/// For use with [`RenderPassDescriptor`].
-/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
-///
-/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
-#[derive(Clone, Debug)]
-pub struct RenderPassTimestampWrites<'a> {
-    /// The query set to write to.
-    pub query_set: &'a QuerySet,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassTimestampWrites<'_>: Send, Sync);
-
-/// Describes a color attachment to a [`RenderPass`].
-///
-/// For use with [`RenderPassDescriptor`].
-///
-/// Corresponds to [WebGPU `GPURenderPassColorAttachment`](
-/// https://gpuweb.github.io/gpuweb/#color-attachments).
-#[derive(Clone, Debug)]
-pub struct RenderPassColorAttachment<'tex> {
-    /// The view to use as an attachment.
-    pub view: &'tex TextureView,
-    /// The view that will receive the resolved output if multisampling is used.
-    ///
-    /// If set, it is always written to, regardless of how [`Self::ops`] is configured.
-    pub resolve_target: Option<&'tex TextureView>,
-    /// What operations will be performed on this color attachment.
-    pub ops: Operations<Color>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassColorAttachment<'_>: Send, Sync);
-
-/// Describes a depth/stencil attachment to a [`RenderPass`].
-///
-/// For use with [`RenderPassDescriptor`].
-///
-/// Corresponds to [WebGPU `GPURenderPassDepthStencilAttachment`](
-/// https://gpuweb.github.io/gpuweb/#depth-stencil-attachments).
-#[derive(Clone, Debug)]
-pub struct RenderPassDepthStencilAttachment<'tex> {
-    /// The view to use as an attachment.
-    pub view: &'tex TextureView,
-    /// What operations will be performed on the depth part of the attachment.
-    pub depth_ops: Option<Operations<f32>>,
-    /// What operations will be performed on the stencil part of the attachment.
-    pub stencil_ops: Option<Operations<u32>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassDepthStencilAttachment<'_>: Send, Sync);
-
-// The underlying types are also exported so that documentation shows up for them
-
-/// Object debugging label.
-pub type Label<'a> = Option<&'a str>;
-pub use wgt::RequestAdapterOptions as RequestAdapterOptionsBase;
-/// Additional information required when requesting an adapter.
-///
-/// For use with [`Instance::request_adapter`].
-///
-/// Corresponds to [WebGPU `GPURequestAdapterOptions`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurequestadapteroptions).
-pub type RequestAdapterOptions<'a, 'b> = RequestAdapterOptionsBase<&'a Surface<'b>>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RequestAdapterOptions<'_, '_>: Send, Sync);
-/// Describes a [`Device`].
-///
-/// For use with [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUDeviceDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpudevicedescriptor).
-pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(DeviceDescriptor<'_>: Send, Sync);
-/// Describes a [`Buffer`].
-///
-/// For use with [`Device::create_buffer`].
-///
-/// Corresponds to [WebGPU `GPUBufferDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferdescriptor).
-pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(BufferDescriptor<'_>: Send, Sync);
-/// Describes a [`CommandEncoder`].
-///
-/// For use with [`Device::create_command_encoder`].
-///
-/// Corresponds to [WebGPU `GPUCommandEncoderDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucommandencoderdescriptor).
-pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
-/// Describes a [`RenderBundle`].
-///
-/// For use with [`RenderBundleEncoder::finish`].
-///
-/// Corresponds to [WebGPU `GPURenderBundleDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundledescriptor).
-pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(RenderBundleDescriptor<'_>: Send, Sync);
-/// Describes a [`Texture`].
-///
-/// For use with [`Device::create_texture`].
-///
-/// Corresponds to [WebGPU `GPUTextureDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gputexturedescriptor).
-pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, &'a [TextureFormat]>;
-static_assertions::assert_impl_all!(TextureDescriptor<'_>: Send, Sync);
-/// Describes a [`QuerySet`].
-///
-/// For use with [`Device::create_query_set`].
-///
-/// Corresponds to [WebGPU `GPUQuerySetDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor).
-pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(QuerySetDescriptor<'_>: Send, Sync);
-pub use wgt::Maintain as MaintainBase;
-/// Passed to [`Device::poll`] to control how and if it should block.
-pub type Maintain = wgt::Maintain<SubmissionIndex>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Maintain: Send, Sync);
-
-/// Describes a [`TextureView`].
-///
-/// For use with [`Texture::create_view`].
-///
-/// Corresponds to [WebGPU `GPUTextureViewDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gputextureviewdescriptor).
-#[derive(Clone, Debug, Default, Eq, PartialEq)]
-pub struct TextureViewDescriptor<'a> {
-    /// Debug label of the texture view. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Format of the texture view. Either must be the same as the texture format or in the list
-    /// of `view_formats` in the texture's descriptor.
-    pub format: Option<TextureFormat>,
-    /// The dimension of the texture view. For 1D textures, this must be `D1`. For 2D textures it must be one of
-    /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `D3`
-    pub dimension: Option<TextureViewDimension>,
-    /// Aspect of the texture. Color textures must be [`TextureAspect::All`].
-    pub aspect: TextureAspect,
-    /// Base mip level.
-    pub base_mip_level: u32,
-    /// Mip level count.
-    /// If `Some(count)`, `base_mip_level + count` must be less or equal to underlying texture mip count.
-    /// If `None`, considered to include the rest of the mipmap levels, but at least 1 in total.
-    pub mip_level_count: Option<u32>,
-    /// Base array layer.
-    pub base_array_layer: u32,
-    /// Layer count.
-    /// If `Some(count)`, `base_array_layer + count` must be less or equal to the underlying array count.
-    /// If `None`, considered to include the rest of the array layers, but at least 1 in total.
-    pub array_layer_count: Option<u32>,
-}
-static_assertions::assert_impl_all!(TextureViewDescriptor<'_>: Send, Sync);
-
-/// Describes a [`PipelineLayout`].
-///
-/// For use with [`Device::create_pipeline_layout`].
-///
-/// Corresponds to [WebGPU `GPUPipelineLayoutDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpupipelinelayoutdescriptor).
-#[derive(Clone, Debug, Default)]
-pub struct PipelineLayoutDescriptor<'a> {
-    /// Debug label of the pipeline layout. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
-    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
-    pub bind_group_layouts: &'a [&'a BindGroupLayout],
-    /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants
-    /// must define the range in push constant memory that corresponds to its single `layout(push_constant)`
-    /// uniform block.
-    ///
-    /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled.
-    pub push_constant_ranges: &'a [PushConstantRange],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineLayoutDescriptor<'_>: Send, Sync);
-
-/// Describes a [`Sampler`].
-///
-/// For use with [`Device::create_sampler`].
-///
-/// Corresponds to [WebGPU `GPUSamplerDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpusamplerdescriptor).
-#[derive(Clone, Debug, PartialEq)]
-pub struct SamplerDescriptor<'a> {
-    /// Debug label of the sampler. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// How to deal with out of bounds accesses in the u (i.e. x) direction
-    pub address_mode_u: AddressMode,
-    /// How to deal with out of bounds accesses in the v (i.e. y) direction
-    pub address_mode_v: AddressMode,
-    /// How to deal with out of bounds accesses in the w (i.e. z) direction
-    pub address_mode_w: AddressMode,
-    /// How to filter the texture when it needs to be magnified (made larger)
-    pub mag_filter: FilterMode,
-    /// How to filter the texture when it needs to be minified (made smaller)
-    pub min_filter: FilterMode,
-    /// How to filter between mip map levels
-    pub mipmap_filter: FilterMode,
-    /// Minimum level of detail (i.e. mip level) to use
-    pub lod_min_clamp: f32,
-    /// Maximum level of detail (i.e. mip level) to use
-    pub lod_max_clamp: f32,
-    /// If this is enabled, this is a comparison sampler using the given comparison function.
-    pub compare: Option<CompareFunction>,
-    /// Must be at least 1. If this is not 1, all filter modes must be linear.
-    pub anisotropy_clamp: u16,
-    /// Border color to use when address_mode is [`AddressMode::ClampToBorder`]
-    pub border_color: Option<SamplerBorderColor>,
-}
-static_assertions::assert_impl_all!(SamplerDescriptor<'_>: Send, Sync);
-
-impl Default for SamplerDescriptor<'_> {
-    fn default() -> Self {
-        Self {
-            label: None,
-            address_mode_u: Default::default(),
-            address_mode_v: Default::default(),
-            address_mode_w: Default::default(),
-            mag_filter: Default::default(),
-            min_filter: Default::default(),
-            mipmap_filter: Default::default(),
-            lod_min_clamp: 0.0,
-            lod_max_clamp: 32.0,
-            compare: None,
-            anisotropy_clamp: 1,
-            border_color: None,
-        }
-    }
-}
-
-/// An element of a [`BindGroupDescriptor`], consisting of a bindable resource
-/// and the slot to bind it to.
-///
-/// Corresponds to [WebGPU `GPUBindGroupEntry`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupentry).
-#[derive(Clone, Debug)]
-pub struct BindGroupEntry<'a> {
-    /// Slot for which binding provides resource. Corresponds to an entry of the same
-    /// binding index in the [`BindGroupLayoutDescriptor`].
-    pub binding: u32,
-    /// Resource to attach to the binding
-    pub resource: BindingResource<'a>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupEntry<'_>: Send, Sync);
-
-/// Describes a group of bindings and the resources to be bound.
-///
-/// For use with [`Device::create_bind_group`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupdescriptor).
-#[derive(Clone, Debug)]
-pub struct BindGroupDescriptor<'a> {
-    /// Debug label of the bind group. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The [`BindGroupLayout`] that corresponds to this bind group.
-    pub layout: &'a BindGroupLayout,
-    /// The resources to bind to this bind group.
-    pub entries: &'a [BindGroupEntry<'a>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupDescriptor<'_>: Send, Sync);
-
-/// Describes the attachments of a render pass.
-///
-/// For use with [`CommandEncoder::begin_render_pass`].
-///
-/// Corresponds to [WebGPU `GPURenderPassDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpassdescriptor).
-#[derive(Clone, Debug, Default)]
-pub struct RenderPassDescriptor<'a> {
-    /// Debug label of the render pass. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The color attachments of the render pass.
-    pub color_attachments: &'a [Option<RenderPassColorAttachment<'a>>],
-    /// The depth and stencil attachment of the render pass, if any.
-    pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'a>>,
-    /// Defines which timestamp values will be written for this pass, and where to write them to.
-    ///
-    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
-    pub timestamp_writes: Option<RenderPassTimestampWrites<'a>>,
-    /// Defines where the occlusion query results will be stored for this pass.
-    pub occlusion_query_set: Option<&'a QuerySet>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassDescriptor<'_>: Send, Sync);
-
-/// Describes how the vertex buffer is interpreted.
-///
-/// For use in [`VertexState`].
-///
-/// Corresponds to [WebGPU `GPUVertexBufferLayout`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexbufferlayout).
-#[derive(Clone, Debug, Hash, Eq, PartialEq)]
-pub struct VertexBufferLayout<'a> {
-    /// The stride, in bytes, between elements of this buffer.
-    pub array_stride: BufferAddress,
-    /// How often this vertex buffer is "stepped" forward.
-    pub step_mode: VertexStepMode,
-    /// The list of attributes which comprise a single vertex.
-    pub attributes: &'a [VertexAttribute],
-}
-static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
-
-/// Describes the vertex processing in a render pipeline.
-///
-/// For use in [`RenderPipelineDescriptor`].
-///
-/// Corresponds to [WebGPU `GPUVertexState`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexstate).
-#[derive(Clone, Debug)]
-pub struct VertexState<'a> {
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The format of any vertex buffers used with this pipeline.
-    pub buffers: &'a [VertexBufferLayout<'a>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
-
-/// Describes the fragment processing in a render pipeline.
-///
-/// For use in [`RenderPipelineDescriptor`].
-///
-/// Corresponds to [WebGPU `GPUFragmentState`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpufragmentstate).
-#[derive(Clone, Debug)]
-pub struct FragmentState<'a> {
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The color state of the render targets.
-    pub targets: &'a [Option<ColorTargetState>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(FragmentState<'_>: Send, Sync);
-
-/// Describes a render (graphics) pipeline.
-///
-/// For use with [`Device::create_render_pipeline`].
-///
-/// Corresponds to [WebGPU `GPURenderPipelineDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpipelinedescriptor).
-#[derive(Clone, Debug)]
-pub struct RenderPipelineDescriptor<'a> {
-    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The layout of bind groups for this pipeline.
-    pub layout: Option<&'a PipelineLayout>,
-    /// The compiled vertex stage, its entry point, and the input buffers layout.
-    pub vertex: VertexState<'a>,
-    /// The properties of the pipeline at the primitive assembly and rasterization level.
-    pub primitive: PrimitiveState,
-    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
-    pub depth_stencil: Option<DepthStencilState>,
-    /// The multi-sampling properties of the pipeline.
-    pub multisample: MultisampleState,
-    /// The compiled fragment stage, its entry point, and the color targets.
-    pub fragment: Option<FragmentState<'a>>,
-    /// If the pipeline will be used with a multiview render pass, this indicates how many array
-    /// layers the attachments will have.
-    pub multiview: Option<NonZeroU32>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
-
-/// Describes the timestamp writes of a compute pass.
-///
-/// For use with [`ComputePassDescriptor`].
-/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
-///
-/// Corresponds to [WebGPU `GPUComputePassTimestampWrites`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
-#[derive(Clone, Debug)]
-pub struct ComputePassTimestampWrites<'a> {
-    /// The query set to write to.
-    pub query_set: &'a QuerySet,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePassTimestampWrites<'_>: Send, Sync);
-
-/// Describes the attachments of a compute pass.
-///
-/// For use with [`CommandEncoder::begin_compute_pass`].
-///
-/// Corresponds to [WebGPU `GPUComputePassDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
-#[derive(Clone, Default, Debug)]
-pub struct ComputePassDescriptor<'a> {
-    /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Defines which timestamp values will be written for this pass, and where to write them to.
-    ///
-    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
-    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync);
-
-#[derive(Clone, Debug)]
-/// Advanced options for use when a pipeline is compiled
-///
-/// This implements `Default`, and for most users can be set to `Default::default()`
-pub struct PipelineCompilationOptions<'a> {
-    /// Specifies the values of pipeline-overridable constants in the shader module.
-    ///
-    /// If an `@id` attribute was specified on the declaration,
-    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
-    /// the key must be the constant's identifier name.
-    ///
-    /// The value may represent any of WGSL's concrete scalar types.
-    pub constants: &'a HashMap<String, f64>,
-    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
-    ///
-    /// This is required by the WebGPU spec, but may have overhead which can be avoided
-    /// for cross-platform applications
-    pub zero_initialize_workgroup_memory: bool,
-}
-
-impl<'a> Default for PipelineCompilationOptions<'a> {
-    fn default() -> Self {
-        // HashMap doesn't have a const constructor, due to the use of RandomState
-        // This does introduce some synchronisation costs, but these should be minor,
-        // and might be cheaper than the alternative of getting new random state
-        static DEFAULT_CONSTANTS: std::sync::OnceLock<HashMap<String, f64>> =
-            std::sync::OnceLock::new();
-        let constants = DEFAULT_CONSTANTS.get_or_init(Default::default);
-        Self {
-            constants,
-            zero_initialize_workgroup_memory: true,
-        }
-    }
-}
-
-/// Describes a compute pipeline.
-///
-/// For use with [`Device::create_compute_pipeline`].
-///
-/// Corresponds to [WebGPU `GPUComputePipelineDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepipelinedescriptor).
-#[derive(Clone, Debug)]
-pub struct ComputePipelineDescriptor<'a> {
-    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The layout of bind groups for this pipeline.
-    pub layout: Option<&'a PipelineLayout>,
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// and no return value in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
-
-/// Describes a pipeline cache, which allows reusing compilation work
-/// between program runs.
-///
-/// For use with [`Device::create_pipeline_cache`]
-///
-/// This type is unique to the Rust API of `wgpu`.
-#[derive(Clone, Debug)]
-pub struct PipelineCacheDescriptor<'a> {
-    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
-    pub label: Label<'a>,
-    /// The data used to initialise the cache initialise
-    ///
-    /// # Safety
-    ///
-    /// This data must have been provided from a previous call to
-    /// [`PipelineCache::get_data`], if not `None`
-    pub data: Option<&'a [u8]>,
-    /// Whether to create a cache without data when the provided data
-    /// is invalid.
-    ///
-    /// Recommended to set to true
-    pub fallback: bool,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
-
-pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
-/// View of a buffer which can be used to copy to/from a texture.
-///
-/// Corresponds to [WebGPU `GPUImageCopyBuffer`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopybuffer).
-pub type ImageCopyBuffer<'a> = ImageCopyBufferBase<&'a Buffer>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyBuffer<'_>: Send, Sync);
-
-pub use wgt::ImageCopyTexture as ImageCopyTextureBase;
-/// View of a texture which can be used to copy to/from a buffer/texture.
-///
-/// Corresponds to [WebGPU `GPUImageCopyTexture`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexture).
-pub type ImageCopyTexture<'a> = ImageCopyTextureBase<&'a Texture>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
-
-pub use wgt::ImageCopyTextureTagged as ImageCopyTextureTaggedBase;
-/// View of a texture which can be used to copy to a texture, including
-/// color space and alpha premultiplication information.
-///
-/// Corresponds to [WebGPU `GPUImageCopyTextureTagged`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexturetagged).
-pub type ImageCopyTextureTagged<'a> = ImageCopyTextureTaggedBase<&'a Texture>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
-
-/// Describes a [`BindGroupLayout`].
-///
-/// For use with [`Device::create_bind_group_layout`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupLayoutDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgrouplayoutdescriptor).
-#[derive(Clone, Debug)]
-pub struct BindGroupLayoutDescriptor<'a> {
-    /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-
-    /// Array of entries in this BindGroupLayout
-    pub entries: &'a [BindGroupLayoutEntry],
-}
-static_assertions::assert_impl_all!(BindGroupLayoutDescriptor<'_>: Send, Sync);
-
-/// Describes a [`RenderBundleEncoder`].
-///
-/// For use with [`Device::create_render_bundle_encoder`].
-///
-/// Corresponds to [WebGPU `GPURenderBundleEncoderDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundleencoderdescriptor).
-#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
-pub struct RenderBundleEncoderDescriptor<'a> {
-    /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The formats of the color attachments that this render bundle is capable to rendering to. This
-    /// must match the formats of the color attachments in the render pass this render bundle is executed in.
-    pub color_formats: &'a [Option<TextureFormat>],
-    /// Information about the depth attachment that this render bundle is capable to rendering to. This
-    /// must match the format of the depth attachments in the render pass this render bundle is executed in.
-    pub depth_stencil: Option<RenderBundleDepthStencil>,
-    /// Sample count this render bundle is capable of rendering to. This must match the pipelines and
-    /// the render passes it is used in.
-    pub sample_count: u32,
-    /// If this render bundle will rendering to multiple array layers in the attachments at the same time.
-    pub multiview: Option<NonZeroU32>,
-}
-static_assertions::assert_impl_all!(RenderBundleEncoderDescriptor<'_>: Send, Sync);
-
-/// Surface texture that can be rendered to.
-/// Result of a successful call to [`Surface::get_current_texture`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
-/// a texture without any additional information.
-#[derive(Debug)]
-pub struct SurfaceTexture {
-    /// Accessible view of the frame.
-    pub texture: Texture,
-    /// `true` if the acquired buffer can still be used for rendering,
-    /// but should be recreated for maximum performance.
-    pub suboptimal: bool,
-    presented: bool,
-    detail: Box<dyn AnyWasmNotSendSync>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
-
-/// Result of an unsuccessful call to [`Surface::get_current_texture`].
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub enum SurfaceError {
-    /// A timeout was encountered while trying to acquire the next frame.
-    Timeout,
-    /// The underlying surface has changed, and therefore the swap chain must be updated.
-    Outdated,
-    /// The swap chain has been lost and needs to be recreated.
-    Lost,
-    /// There is no more memory left to allocate a new frame.
-    OutOfMemory,
-}
-static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
-
-impl fmt::Display for SurfaceError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", match self {
-            Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
-            Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
-            Self::Lost =>  "The swap chain has been lost and needs to be recreated",
-            Self::OutOfMemory => "There is no more memory left to allocate a new frame",
-        })
-    }
-}
-
-impl error::Error for SurfaceError {}
-
-impl Default for Instance {
-    /// Creates a new instance of wgpu with default options.
-    ///
-    /// Backends are set to `Backends::all()`, and FXC is chosen as the `dx12_shader_compiler`.
-    ///
-    /// # Panics
-    ///
-    /// If no backend feature for the active target platform is enabled,
-    /// this method will panic, see [`Instance::enabled_backend_features()`].
-    fn default() -> Self {
-        Self::new(InstanceDescriptor::default())
-    }
-}
-
-impl Instance {
-    /// Returns which backends can be picked for the current build configuration.
-    ///
-    /// The returned set depends on a combination of target platform and enabled features.
-    /// This does *not* do any runtime checks and is exclusively based on compile time information.
-    ///
-    /// `InstanceDescriptor::backends` does not need to be a subset of this,
-    /// but any backend that is not in this set, will not be picked.
-    ///
-    /// TODO: Right now it's otherwise not possible yet to opt-out of all features on some platforms.
-    /// See <https://github.com/gfx-rs/wgpu/issues/3514>
-    /// * Windows/Linux/Android: always enables Vulkan and GLES with no way to opt out
-    pub const fn enabled_backend_features() -> Backends {
-        let mut backends = Backends::empty();
-
-        if cfg!(native) {
-            if cfg!(metal) {
-                backends = backends.union(Backends::METAL);
-            }
-            if cfg!(dx12) {
-                backends = backends.union(Backends::DX12);
-            }
-
-            // Windows, Android, Linux currently always enable Vulkan and OpenGL.
-            // See <https://github.com/gfx-rs/wgpu/issues/3514>
-            if cfg!(target_os = "windows") || cfg!(unix) {
-                backends = backends.union(Backends::VULKAN).union(Backends::GL);
-            }
-
-            // Vulkan on Mac/iOS is only available through vulkan-portability.
-            if (cfg!(target_os = "ios") || cfg!(target_os = "macos"))
-                && cfg!(feature = "vulkan-portability")
-            {
-                backends = backends.union(Backends::VULKAN);
-            }
-
-            // GL on Mac is only available through angle.
-            if cfg!(target_os = "macos") && cfg!(feature = "angle") {
-                backends = backends.union(Backends::GL);
-            }
-        } else {
-            if cfg!(webgpu) {
-                backends = backends.union(Backends::BROWSER_WEBGPU);
-            }
-            if cfg!(webgl) {
-                backends = backends.union(Backends::GL);
-            }
-        }
-
-        backends
-    }
-
-    /// Create an new instance of wgpu.
-    ///
-    /// # Arguments
-    ///
-    /// - `instance_desc` - Has fields for which [backends][Backends] wgpu will choose
-    ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
-    ///
-    ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
-    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
-    ///   WebGPU adapters. If you instead want to force use of WebGL, either
-    ///   disable the `webgpu` compile-time feature or do add the [`Backends::BROWSER_WEBGPU`]
-    ///   flag to the the `instance_desc`'s `backends` field.
-    ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
-    ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
-    ///   a WebGL adapter.
-    ///
-    /// # Panics
-    ///
-    /// If no backend feature for the active target platform is enabled,
-    /// this method will panic, see [`Instance::enabled_backend_features()`].
-    #[allow(unreachable_code)]
-    pub fn new(_instance_desc: InstanceDescriptor) -> Self {
-        if Self::enabled_backend_features().is_empty() {
-            panic!(
-                "No wgpu backend feature that is implemented for the target platform was enabled. \
-                 See `wgpu::Instance::enabled_backend_features()` for more information."
-            );
-        }
-
-        #[cfg(webgpu)]
-        {
-            let is_only_available_backend = !cfg!(wgpu_core);
-            let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU);
-            let support_webgpu =
-                crate::backend::get_browser_gpu_property().map_or(false, |gpu| !gpu.is_undefined());
-
-            if is_only_available_backend || (requested_webgpu && support_webgpu) {
-                return Self {
-                    context: Arc::from(crate::backend::ContextWebGpu::init(_instance_desc)),
-                };
-            }
-        }
-
-        #[cfg(wgpu_core)]
-        {
-            return Self {
-                context: Arc::from(crate::backend::ContextWgpuCore::init(_instance_desc)),
-            };
-        }
-
-        unreachable!(
-            "Earlier check of `enabled_backend_features` should have prevented getting here!"
-        );
-    }
-
-    /// Create an new instance of wgpu from a wgpu-hal instance.
-    ///
-    /// # Arguments
-    ///
-    /// - `hal_instance` - wgpu-hal instance.
-    ///
-    /// # Safety
-    ///
-    /// Refer to the creation of wgpu-hal Instance for every backend.
-    #[cfg(wgpu_core)]
-    pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
-        Self {
-            context: Arc::new(unsafe {
-                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance)
-            }),
-        }
-    }
-
-    /// Return a reference to a specific backend instance, if available.
-    ///
-    /// If this `Instance` has a wgpu-hal [`Instance`] for backend
-    /// `A`, return a reference to it. Otherwise, return `None`.
-    ///
-    /// # Safety
-    ///
-    /// - The raw instance handle returned must not be manually destroyed.
-    ///
-    /// [`Instance`]: hal::Api::Instance
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi>(&self) -> Option<&A::Instance> {
-        self.context
-            .as_any()
-            // If we don't have a wgpu-core instance, we don't have a hal instance either.
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .and_then(|ctx| unsafe { ctx.instance_as_hal::<A>() })
-    }
-
-    /// Create an new instance of wgpu from a wgpu-core instance.
-    ///
-    /// # Arguments
-    ///
-    /// - `core_instance` - wgpu-core instance.
-    ///
-    /// # Safety
-    ///
-    /// Refer to the creation of wgpu-core Instance.
-    #[cfg(wgpu_core)]
-    pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
-        Self {
-            context: Arc::new(unsafe {
-                crate::backend::ContextWgpuCore::from_core_instance(core_instance)
-            }),
-        }
-    }
-
-    /// Retrieves all available [`Adapter`]s that match the given [`Backends`].
-    ///
-    /// # Arguments
-    ///
-    /// - `backends` - Backends from which to enumerate adapters.
-    #[cfg(native)]
-    pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
-        let context = Arc::clone(&self.context);
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| {
-                ctx.enumerate_adapters(backends)
-                    .into_iter()
-                    .map(move |id| crate::Adapter {
-                        context: Arc::clone(&context),
-                        id: ObjectId::from(id),
-                        data: Box::new(()),
-                    })
-                    .collect()
-            })
-            .unwrap()
-    }
-
-    /// Retrieves an [`Adapter`] which matches the given [`RequestAdapterOptions`].
-    ///
-    /// Some options are "soft", so treated as non-mandatory. Others are "hard".
-    ///
-    /// If no adapters are found that suffice all the "hard" options, `None` is returned.
-    ///
-    /// A `compatible_surface` is required when targeting WebGL2.
-    pub fn request_adapter(
-        &self,
-        options: &RequestAdapterOptions<'_, '_>,
-    ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
-        let context = Arc::clone(&self.context);
-        let adapter = self.context.instance_request_adapter(options);
-        async move {
-            adapter
-                .await
-                .map(|(id, data)| Adapter { context, id, data })
-        }
-    }
-
-    /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
-    ///
-    /// # Safety
-    ///
-    /// `hal_adapter` must be created from this instance internal handle.
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_adapter_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_adapter: hal::ExposedAdapter<A>,
-    ) -> Adapter {
-        let context = Arc::clone(&self.context);
-        let id = unsafe {
-            context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                .unwrap()
-                .create_adapter_from_hal(hal_adapter)
-                .into()
-        };
-        Adapter {
-            context,
-            id,
-            data: Box::new(()),
-        }
-    }
-
-    /// Creates a new surface targeting a given window/canvas/surface/etc..
-    ///
-    /// Internally, this creates surfaces for all backends that are enabled for this instance.
-    ///
-    /// See [`SurfaceTarget`] for what targets are supported.
-    /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants.
-    ///
-    /// Most commonly used are window handles (or provider of windows handles)
-    /// which can be passed directly as they're automatically converted to [`SurfaceTarget`].
-    pub fn create_surface<'window>(
-        &self,
-        target: impl Into<SurfaceTarget<'window>>,
-    ) -> Result<Surface<'window>, CreateSurfaceError> {
-        // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window.
-        let handle_source;
-
-        let target = target.into();
-        let mut surface = match target {
-            SurfaceTarget::Window(window) => unsafe {
-                let surface = self.create_surface_unsafe(
-                    SurfaceTargetUnsafe::from_window(&window).map_err(|e| CreateSurfaceError {
-                        inner: CreateSurfaceErrorKind::RawHandle(e),
-                    })?,
-                );
-                handle_source = Some(window);
-
-                surface
-            }?,
-
-            #[cfg(any(webgpu, webgl))]
-            SurfaceTarget::Canvas(canvas) => {
-                handle_source = None;
-
-                let value: &wasm_bindgen::JsValue = &canvas;
-                let obj = std::ptr::NonNull::from(value).cast();
-                let raw_window_handle = raw_window_handle::WebCanvasWindowHandle::new(obj).into();
-                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
-
-                // Note that we need to call this while we still have `value` around.
-                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
-                unsafe {
-                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
-                        raw_display_handle,
-                        raw_window_handle,
-                    })
-                }?
-            }
-
-            #[cfg(any(webgpu, webgl))]
-            SurfaceTarget::OffscreenCanvas(canvas) => {
-                handle_source = None;
-
-                let value: &wasm_bindgen::JsValue = &canvas;
-                let obj = std::ptr::NonNull::from(value).cast();
-                let raw_window_handle =
-                    raw_window_handle::WebOffscreenCanvasWindowHandle::new(obj).into();
-                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
-
-                // Note that we need to call this while we still have `value` around.
-                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
-                unsafe {
-                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
-                        raw_display_handle,
-                        raw_window_handle,
-                    })
-                }?
-            }
-        };
-
-        surface._handle_source = handle_source;
-
-        Ok(surface)
-    }
-
-    /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target.
-    ///
-    /// Internally, this creates surfaces for all backends that are enabled for this instance.
-    ///
-    /// See [`SurfaceTargetUnsafe`] for what targets are supported.
-    /// See [`Instance::create_surface`] for surface creation with safe target variants.
-    ///
-    /// # Safety
-    ///
-    /// - See respective [`SurfaceTargetUnsafe`] variants for safety requirements.
-    pub unsafe fn create_surface_unsafe<'window>(
-        &self,
-        target: SurfaceTargetUnsafe,
-    ) -> Result<Surface<'window>, CreateSurfaceError> {
-        let (id, data) = unsafe { self.context.instance_create_surface(target) }?;
-
-        Ok(Surface {
-            context: Arc::clone(&self.context),
-            _handle_source: None,
-            id,
-            surface_data: data,
-            config: Mutex::new(None),
-        })
-    }
-
-    /// Polls all devices.
-    ///
-    /// If `force_wait` is true and this is not running on the web, then this
-    /// function will block until all in-flight buffers have been mapped and
-    /// all submitted commands have finished execution.
-    ///
-    /// Return `true` if all devices' queues are empty, or `false` if there are
-    /// queue submissions still in flight. (Note that, unless access to all
-    /// [`Queue`s] associated with this [`Instance`] is coordinated somehow,
-    /// this information could be out of date by the time the caller receives
-    /// it. `Queue`s can be shared between threads, and other threads could
-    /// submit new work at any time.)
-    ///
-    /// On the web, this is a no-op. `Device`s are automatically polled.
-    ///
-    /// [`Queue`s]: Queue
-    pub fn poll_all(&self, force_wait: bool) -> bool {
-        self.context.instance_poll_all_devices(force_wait)
-    }
-
-    /// Generates memory report.
-    ///
-    /// Returns `None` if the feature is not supported by the backend
-    /// which happens only when WebGPU is pre-selected by the instance creation.
-    #[cfg(wgpu_core)]
-    pub fn generate_report(&self) -> Option<wgc::global::GlobalReport> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| ctx.generate_report())
-    }
-}
-
-impl Adapter {
-    /// Requests a connection to a physical device, creating a logical device.
-    ///
-    /// Returns the [`Device`] together with a [`Queue`] that executes command buffers.
-    ///
-    /// [Per the WebGPU specification], an [`Adapter`] may only be used once to create a device.
-    /// If another device is wanted, call [`Instance::request_adapter()`] again to get a fresh
-    /// [`Adapter`].
-    /// However, `wgpu` does not currently enforce this restriction.
-    ///
-    /// # Arguments
-    ///
-    /// - `desc` - Description of the features and limits requested from the given device.
-    /// - `trace_path` - Can be used for API call tracing, if that feature is
-    ///   enabled in `wgpu-core`.
-    ///
-    /// # Panics
-    ///
-    /// - `request_device()` was already called on this `Adapter`.
-    /// - Features specified by `desc` are not supported by this adapter.
-    /// - Unsafe features were requested but not enabled when requesting the adapter.
-    /// - Limits requested exceed the values provided by the adapter.
-    /// - Adapter does not support all features wgpu requires to safely operate.
-    ///
-    /// [Per the WebGPU specification]: https://www.w3.org/TR/webgpu/#dom-gpuadapter-requestdevice
-    pub fn request_device(
-        &self,
-        desc: &DeviceDescriptor<'_>,
-        trace_path: Option<&std::path::Path>,
-    ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
-        let context = Arc::clone(&self.context);
-        let device = DynContext::adapter_request_device(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            trace_path,
-        );
-        async move {
-            device.await.map(
-                |DeviceRequest {
-                     device_id,
-                     device_data,
-                     queue_id,
-                     queue_data,
-                 }| {
-                    (
-                        Device {
-                            context: Arc::clone(&context),
-                            id: device_id,
-                            data: device_data,
-                        },
-                        Queue {
-                            context,
-                            id: queue_id,
-                            data: queue_data,
-                        },
-                    )
-                },
-            )
-        }
-    }
-
-    /// Create a wgpu [`Device`] and [`Queue`] from a wgpu-hal `OpenDevice`
-    ///
-    /// # Safety
-    ///
-    /// - `hal_device` must be created from this adapter internal handle.
-    /// - `desc.features` must be a subset of `hal_device` features.
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_device_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_device: hal::OpenDevice<A>,
-        desc: &DeviceDescriptor<'_>,
-        trace_path: Option<&std::path::Path>,
-    ) -> Result<(Device, Queue), RequestDeviceError> {
-        let context = Arc::clone(&self.context);
-        unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the device was generated from the same adapter.
-                // Therefore, unwrap is fine here since only WgpuCoreContext based adapters have the ability to create hal devices.
-                .unwrap()
-                .create_device_from_hal(&self.id.into(), hal_device, desc, trace_path)
-        }
-        .map(|(device, queue)| {
-            (
-                Device {
-                    context: Arc::clone(&context),
-                    id: device.id().into(),
-                    data: Box::new(device),
-                },
-                Queue {
-                    context,
-                    id: queue.id().into(),
-                    data: Box::new(queue),
-                },
-            )
-        })
-    }
-
-    /// Apply a callback to this `Adapter`'s underlying backend adapter.
-    ///
-    /// If this `Adapter` is implemented by the backend API given by `A` (Vulkan,
-    /// Dx12, etc.), then apply `hal_adapter_callback` to `Some(&adapter)`, where
-    /// `adapter` is the underlying backend adapter type, [`A::Adapter`].
-    ///
-    /// If this `Adapter` uses a different backend, apply `hal_adapter_callback`
-    /// to `None`.
-    ///
-    /// The adapter is locked for reading while `hal_adapter_callback` runs. If
-    /// the callback attempts to perform any `wgpu` operations that require
-    /// write access to the adapter, deadlock will occur. The locks are
-    /// automatically released when the callback returns.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle passed to the callback must not be manually destroyed.
-    ///
-    /// [`A::Adapter`]: hal::Api::Adapter
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>(
-        &self,
-        hal_adapter_callback: F,
-    ) -> R {
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.adapter_as_hal::<A, F, R>(self.id.into(), hal_adapter_callback) }
-        } else {
-            hal_adapter_callback(None)
-        }
-    }
-
-    /// Returns whether this adapter may present to the passed surface.
-    pub fn is_surface_supported(&self, surface: &Surface<'_>) -> bool {
-        DynContext::adapter_is_surface_supported(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &surface.id,
-            surface.surface_data.as_ref(),
-        )
-    }
-
-    /// The features which can be used to create devices on this adapter.
-    pub fn features(&self) -> Features {
-        DynContext::adapter_features(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// The best limits which can be used to create devices on this adapter.
-    pub fn limits(&self) -> Limits {
-        DynContext::adapter_limits(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Get info about the adapter itself.
-    pub fn get_info(&self) -> AdapterInfo {
-        DynContext::adapter_get_info(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Get info about the adapter itself.
-    pub fn get_downlevel_capabilities(&self) -> DownlevelCapabilities {
-        DynContext::adapter_downlevel_capabilities(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Returns the features supported for a given texture format by this adapter.
-    ///
-    /// Note that the WebGPU spec further restricts the available usages/features.
-    /// To disable these restrictions on a device, request the [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] feature.
-    pub fn get_texture_format_features(&self, format: TextureFormat) -> TextureFormatFeatures {
-        DynContext::adapter_get_texture_format_features(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            format,
-        )
-    }
-
-    /// Generates a timestamp using the clock used by the presentation engine.
-    ///
-    /// When comparing completely opaque timestamp systems, we need a way of generating timestamps that signal
-    /// the exact same time. You can do this by calling your own timestamp function immediately after a call to
-    /// this function. This should result in timestamps that are 0.5 to 5 microseconds apart. There are locks
-    /// that must be taken during the call, so don't call your function before.
-    ///
-    /// ```no_run
-    /// # let adapter: wgpu::Adapter = panic!();
-    /// # let some_code = || wgpu::PresentationTimestamp::INVALID_TIMESTAMP;
-    /// use std::time::{Duration, Instant};
-    /// let presentation = adapter.get_presentation_timestamp();
-    /// let instant = Instant::now();
-    ///
-    /// // We can now turn a new presentation timestamp into an Instant.
-    /// let some_pres_timestamp = some_code();
-    /// let duration = Duration::from_nanos((some_pres_timestamp.0 - presentation.0) as u64);
-    /// let new_instant: Instant = instant + duration;
-    /// ```
-    //
-    /// [Instant]: std::time::Instant
-    pub fn get_presentation_timestamp(&self) -> PresentationTimestamp {
-        DynContext::adapter_get_presentation_timestamp(&*self.context, &self.id, self.data.as_ref())
-    }
-}
-
-impl Device {
-    /// Check for resource cleanups and mapping callbacks. Will block if [`Maintain::Wait`] is passed.
-    ///
-    /// Return `true` if the queue is empty, or `false` if there are more queue
-    /// submissions still in flight. (Note that, unless access to the [`Queue`] is
-    /// coordinated somehow, this information could be out of date by the time
-    /// the caller receives it. `Queue`s can be shared between threads, so
-    /// other threads could submit new work at any time.)
-    ///
-    /// When running on WebGPU, this is a no-op. `Device`s are automatically polled.
-    pub fn poll(&self, maintain: Maintain) -> MaintainResult {
-        DynContext::device_poll(&*self.context, &self.id, self.data.as_ref(), maintain)
-    }
-
-    /// The features which can be used on this device.
-    ///
-    /// No additional features can be used, even if the underlying adapter can support them.
-    pub fn features(&self) -> Features {
-        DynContext::device_features(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// The limits which can be used on this device.
-    ///
-    /// No better limits can be used, even if the underlying adapter can support them.
-    pub fn limits(&self) -> Limits {
-        DynContext::device_limits(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Creates a shader module from either SPIR-V or WGSL source code.
-    ///
-    /// <div class="warning">
-    // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`!
-    // NOTE: Keep this in sync with `wgpu_core::Global::device_create_shader_module`!
-    ///
-    /// This function may consume a lot of stack space. Compiler-enforced limits for parsing
-    /// recursion exist; if shader compilation runs into them, it will return an error gracefully.
-    /// However, on some build profiles and platforms, the default stack size for a thread may be
-    /// exceeded before this limit is reached during parsing. Callers should ensure that there is
-    /// enough stack space for this, particularly if calls to this method are exposed to user
-    /// input.
-    ///
-    /// </div>
-    pub fn create_shader_module(&self, desc: ShaderModuleDescriptor<'_>) -> ShaderModule {
-        let (id, data) = DynContext::device_create_shader_module(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            wgt::ShaderBoundChecks::new(),
-        );
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a shader module from either SPIR-V or WGSL source code without runtime checks.
-    ///
-    /// # Safety
-    /// In contrast with [`create_shader_module`](Self::create_shader_module) this function
-    /// creates a shader module without runtime checks which allows shaders to perform
-    /// operations which can lead to undefined behavior like indexing out of bounds, thus it's
-    /// the caller responsibility to pass a shader which doesn't perform any of this
-    /// operations.
-    ///
-    /// This has no effect on web.
-    pub unsafe fn create_shader_module_unchecked(
-        &self,
-        desc: ShaderModuleDescriptor<'_>,
-    ) -> ShaderModule {
-        let (id, data) = DynContext::device_create_shader_module(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            unsafe { wgt::ShaderBoundChecks::unchecked() },
-        );
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a shader module from SPIR-V binary directly.
-    ///
-    /// # Safety
-    ///
-    /// This function passes binary data to the backend as-is and can potentially result in a
-    /// driver crash or bogus behaviour. No attempt is made to ensure that data is valid SPIR-V.
-    ///
-    /// See also [`include_spirv_raw!`] and [`util::make_spirv_raw`].
-    pub unsafe fn create_shader_module_spirv(
-        &self,
-        desc: &ShaderModuleDescriptorSpirV<'_>,
-    ) -> ShaderModule {
-        let (id, data) = unsafe {
-            DynContext::device_create_shader_module_spirv(
-                &*self.context,
-                &self.id,
-                self.data.as_ref(),
-                desc,
-            )
-        };
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates an empty [`CommandEncoder`].
-    pub fn create_command_encoder(&self, desc: &CommandEncoderDescriptor<'_>) -> CommandEncoder {
-        let (id, data) = DynContext::device_create_command_encoder(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        CommandEncoder {
-            context: Arc::clone(&self.context),
-            id: Some(id),
-            data,
-        }
-    }
-
-    /// Creates an empty [`RenderBundleEncoder`].
-    pub fn create_render_bundle_encoder(
-        &self,
-        desc: &RenderBundleEncoderDescriptor<'_>,
-    ) -> RenderBundleEncoder<'_> {
-        let (id, data) = DynContext::device_create_render_bundle_encoder(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderBundleEncoder {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            parent: self,
-            _p: Default::default(),
-        }
-    }
-
-    /// Creates a new [`BindGroup`].
-    pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
-        let (id, data) = DynContext::device_create_bind_group(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        BindGroup {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`BindGroupLayout`].
-    pub fn create_bind_group_layout(
-        &self,
-        desc: &BindGroupLayoutDescriptor<'_>,
-    ) -> BindGroupLayout {
-        let (id, data) = DynContext::device_create_bind_group_layout(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        BindGroupLayout {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`PipelineLayout`].
-    pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
-        let (id, data) = DynContext::device_create_pipeline_layout(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        PipelineLayout {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`RenderPipeline`].
-    pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
-        let (id, data) = DynContext::device_create_render_pipeline(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderPipeline {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`ComputePipeline`].
-    pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
-        let (id, data) = DynContext::device_create_compute_pipeline(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        ComputePipeline {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`Buffer`].
-    pub fn create_buffer(&self, desc: &BufferDescriptor<'_>) -> Buffer {
-        let mut map_context = MapContext::new(desc.size);
-        if desc.mapped_at_creation {
-            map_context.initial_range = 0..desc.size;
-        }
-
-        let (id, data) =
-            DynContext::device_create_buffer(&*self.context, &self.id, self.data.as_ref(), desc);
-
-        Buffer {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
-        }
-    }
-
-    /// Creates a new [`Texture`].
-    ///
-    /// `desc` specifies the general format of the texture.
-    pub fn create_texture(&self, desc: &TextureDescriptor<'_>) -> Texture {
-        let (id, data) =
-            DynContext::device_create_texture(&*self.context, &self.id, self.data.as_ref(), desc);
-        Texture {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            owned: true,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
-        }
-    }
-
-    /// Creates a [`Texture`] from a wgpu-hal Texture.
-    ///
-    /// # Safety
-    ///
-    /// - `hal_texture` must be created from this device internal handle
-    /// - `hal_texture` must be created respecting `desc`
-    /// - `hal_texture` must be initialized
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_texture_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_texture: A::Texture,
-        desc: &TextureDescriptor<'_>,
-    ) -> Texture {
-        let texture = unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the texture was generated from the same hal device.
-                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal textures.
-                .unwrap()
-                .create_texture_from_hal::<A>(
-                    hal_texture,
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    desc,
-                )
-        };
-        Texture {
-            context: Arc::clone(&self.context),
-            id: ObjectId::from(texture.id()),
-            data: Box::new(texture),
-            owned: true,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
-        }
-    }
-
-    /// Creates a [`Buffer`] from a wgpu-hal Buffer.
-    ///
-    /// # Safety
-    ///
-    /// - `hal_buffer` must be created from this device internal handle
-    /// - `hal_buffer` must be created respecting `desc`
-    /// - `hal_buffer` must be initialized
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_buffer_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_buffer: A::Buffer,
-        desc: &BufferDescriptor<'_>,
-    ) -> Buffer {
-        let mut map_context = MapContext::new(desc.size);
-        if desc.mapped_at_creation {
-            map_context.initial_range = 0..desc.size;
-        }
-
-        let (id, buffer) = unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the buffer was generated from the same hal device.
-                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal buffers.
-                .unwrap()
-                .create_buffer_from_hal::<A>(
-                    hal_buffer,
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    desc,
-                )
-        };
-
-        Buffer {
-            context: Arc::clone(&self.context),
-            id: ObjectId::from(id),
-            data: Box::new(buffer),
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
-        }
-    }
-
-    /// Creates a new [`Sampler`].
-    ///
-    /// `desc` specifies the behavior of the sampler.
-    pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
-        let (id, data) =
-            DynContext::device_create_sampler(&*self.context, &self.id, self.data.as_ref(), desc);
-        Sampler {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a new [`QuerySet`].
-    pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
-        let (id, data) =
-            DynContext::device_create_query_set(&*self.context, &self.id, self.data.as_ref(), desc);
-        QuerySet {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Set a callback for errors that are not handled in error scopes.
-    pub fn on_uncaptured_error(&self, handler: Box<dyn UncapturedErrorHandler>) {
-        self.context
-            .device_on_uncaptured_error(&self.id, self.data.as_ref(), handler);
-    }
-
-    /// Push an error scope.
-    pub fn push_error_scope(&self, filter: ErrorFilter) {
-        self.context
-            .device_push_error_scope(&self.id, self.data.as_ref(), filter);
-    }
-
-    /// Pop an error scope.
-    pub fn pop_error_scope(&self) -> impl Future<Output = Option<Error>> + WasmNotSend {
-        self.context
-            .device_pop_error_scope(&self.id, self.data.as_ref())
-    }
-
-    /// Starts frame capture.
-    pub fn start_capture(&self) {
-        DynContext::device_start_capture(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Stops frame capture.
-    pub fn stop_capture(&self) {
-        DynContext::device_stop_capture(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Query internal counters from the native backend for debugging purposes.
-    ///
-    /// Some backends may not set all counters, or may not set any counter at all.
-    /// The `counters` cargo feature must be enabled for any counter to be set.
-    ///
-    /// If a counter is not set, its contains its default value (zero).
-    pub fn get_internal_counters(&self) -> wgt::InternalCounters {
-        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Generate an GPU memory allocation report if the underlying backend supports it.
-    ///
-    /// Backends that do not support producing these reports return `None`. A backend may
-    /// Support it and still return `None` if it is not using performing sub-allocation,
-    /// for example as a workaround for driver issues.
-    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
-        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Apply a callback to this `Device`'s underlying backend device.
-    ///
-    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,
-    /// Dx12, etc.), then apply `hal_device_callback` to `Some(&device)`, where
-    /// `device` is the underlying backend device type, [`A::Device`].
-    ///
-    /// If this `Device` uses a different backend, apply `hal_device_callback`
-    /// to `None`.
-    ///
-    /// The device is locked for reading while `hal_device_callback` runs. If
-    /// the callback attempts to perform any `wgpu` operations that require
-    /// write access to the device (destroying a buffer, say), deadlock will
-    /// occur. The locks are automatically released when the callback returns.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle passed to the callback must not be manually destroyed.
-    ///
-    /// [`A::Device`]: hal::Api::Device
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Device>) -> R, R>(
-        &self,
-        hal_device_callback: F,
-    ) -> Option<R> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.device_as_hal::<A, F, R>(
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    hal_device_callback,
-                )
-            })
-    }
-
-    /// Destroy this device.
-    pub fn destroy(&self) {
-        DynContext::device_destroy(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Set a DeviceLostCallback on this device.
-    pub fn set_device_lost_callback(
-        &self,
-        callback: impl Fn(DeviceLostReason, String) + Send + 'static,
-    ) {
-        DynContext::device_set_device_lost_callback(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            Box::new(callback),
-        )
-    }
-
-    /// Test-only function to make this device invalid.
-    #[doc(hidden)]
-    pub fn make_invalid(&self) {
-        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Create a [`PipelineCache`] with initial data
-    ///
-    /// This can be passed to [`Device::create_compute_pipeline`]
-    /// and [`Device::create_render_pipeline`] to either accelerate these
-    /// or add the cache results from those.
-    ///
-    /// # Safety
-    ///
-    /// If the `data` field of `desc` is set, it must have previously been returned from a call
-    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
-    /// from an adapter with the same [`util::pipeline_cache_key`].
-    /// This *is* compatible across wgpu versions, as any data format change will
-    /// be accounted for.
-    ///
-    /// It is *not* supported to bring caches from previous direct uses of backend APIs
-    /// into this method.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error value if:
-    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
-    ///  * this device is invalid; or
-    ///  * the device is out of memory
-    ///
-    /// This method also returns an error value if:
-    ///  * The `fallback` field on `desc` is false; and
-    ///  * the `data` provided would not be used[^data_not_used]
-    ///
-    /// If an error value is used in subsequent calls, default caching will be used.
-    ///
-    /// [^saving]: We do recognise that saving this data to disk means this condition
-    /// is impossible to fully prove. Consider the risks for your own application in this case.
-    ///
-    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
-    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
-    /// update. In some cases, the data might not be used and a real value is returned,
-    /// this is left to the discretion of GPU drivers.
-    pub unsafe fn create_pipeline_cache(
-        &self,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> PipelineCache {
-        let (id, data) = unsafe {
-            DynContext::device_create_pipeline_cache(
-                &*self.context,
-                &self.id,
-                self.data.as_ref(),
-                desc,
-            )
-        };
-        PipelineCache {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-}
-
-impl Drop for Device {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.device_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Requesting a device from an [`Adapter`] failed.
-#[derive(Clone, Debug)]
-pub struct RequestDeviceError {
-    inner: RequestDeviceErrorKind,
-}
-#[derive(Clone, Debug)]
-enum RequestDeviceErrorKind {
-    /// Error from [`wgpu_core`].
-    // must match dependency cfg
-    #[cfg(wgpu_core)]
-    Core(wgc::instance::RequestDeviceError),
-
-    /// Error from web API that was called by `wgpu` to request a device.
-    ///
-    /// (This is currently never used by the webgl backend, but it could be.)
-    #[cfg(webgpu)]
-    WebGpu(wasm_bindgen::JsValue),
-}
-
-#[cfg(send_sync)]
-unsafe impl Send for RequestDeviceErrorKind {}
-#[cfg(send_sync)]
-unsafe impl Sync for RequestDeviceErrorKind {}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
-
-impl fmt::Display for RequestDeviceError {
-    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            RequestDeviceErrorKind::Core(error) => error.fmt(_f),
-            #[cfg(webgpu)]
-            RequestDeviceErrorKind::WebGpu(error_js_value) => {
-                // wasm-bindgen provides a reasonable error stringification via `Debug` impl
-                write!(_f, "{error_js_value:?}")
-            }
-            #[cfg(not(any(webgpu, wgpu_core)))]
-            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
-        }
-    }
-}
-
-impl error::Error for RequestDeviceError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            RequestDeviceErrorKind::Core(error) => error.source(),
-            #[cfg(webgpu)]
-            RequestDeviceErrorKind::WebGpu(_) => None,
-            #[cfg(not(any(webgpu, wgpu_core)))]
-            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
-        }
-    }
-}
-
-#[cfg(wgpu_core)]
-impl From<wgc::instance::RequestDeviceError> for RequestDeviceError {
-    fn from(error: wgc::instance::RequestDeviceError) -> Self {
-        Self {
-            inner: RequestDeviceErrorKind::Core(error),
-        }
-    }
-}
-
-/// [`Instance::create_surface()`] or a related function failed.
-#[derive(Clone, Debug)]
-#[non_exhaustive]
-pub struct CreateSurfaceError {
-    inner: CreateSurfaceErrorKind,
-}
-#[derive(Clone, Debug)]
-enum CreateSurfaceErrorKind {
-    /// Error from [`wgpu_hal`].
-    #[cfg(wgpu_core)]
-    Hal(wgc::instance::CreateSurfaceError),
-
-    /// Error from WebGPU surface creation.
-    #[allow(dead_code)] // may be unused depending on target and features
-    Web(String),
-
-    /// Error when trying to get a [`DisplayHandle`] or a [`WindowHandle`] from
-    /// `raw_window_handle`.
-    RawHandle(raw_window_handle::HandleError),
-}
-static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
-
-impl fmt::Display for CreateSurfaceError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
-            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
-            CreateSurfaceErrorKind::RawHandle(e) => e.fmt(f),
-        }
-    }
-}
-
-impl error::Error for CreateSurfaceError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            CreateSurfaceErrorKind::Hal(e) => e.source(),
-            CreateSurfaceErrorKind::Web(_) => None,
-            CreateSurfaceErrorKind::RawHandle(e) => e.source(),
-        }
-    }
-}
-
-#[cfg(wgpu_core)]
-impl From<wgc::instance::CreateSurfaceError> for CreateSurfaceError {
-    fn from(e: wgc::instance::CreateSurfaceError) -> Self {
-        Self {
-            inner: CreateSurfaceErrorKind::Hal(e),
-        }
-    }
-}
-
-/// Error occurred when trying to async map a buffer.
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct BufferAsyncError;
-static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
-
-impl fmt::Display for BufferAsyncError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "Error occurred when trying to async map a buffer")
-    }
-}
-
-impl error::Error for BufferAsyncError {}
-
-/// Type of buffer mapping.
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-pub enum MapMode {
-    /// Map only for reading
-    Read,
-    /// Map only for writing
-    Write,
-}
-static_assertions::assert_impl_all!(MapMode: Send, Sync);
-
-fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
-    bounds: S,
-) -> (BufferAddress, Option<BufferSize>) {
-    let offset = match bounds.start_bound() {
-        Bound::Included(&bound) => bound,
-        Bound::Excluded(&bound) => bound + 1,
-        Bound::Unbounded => 0,
-    };
-    let size = match bounds.end_bound() {
-        Bound::Included(&bound) => Some(bound + 1 - offset),
-        Bound::Excluded(&bound) => Some(bound - offset),
-        Bound::Unbounded => None,
-    }
-    .map(|size| BufferSize::new(size).expect("Buffer slices can not be empty"));
-
-    (offset, size)
-}
-
-/// A read-only view of a mapped buffer's bytes.
-///
-/// To get a `BufferView`, first [map] the buffer, and then
-/// call `buffer.slice(range).get_mapped_range()`.
-///
-/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust
-/// slice methods to access the buffer's contents. It also implements
-/// `AsRef<[u8]>`, if that's more convenient.
-///
-/// Before the buffer can be unmapped, all `BufferView`s observing it
-/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
-///
-/// For example code, see the documentation on [mapping buffers][map].
-///
-/// [map]: Buffer#mapping-buffers
-/// [`map_async`]: BufferSlice::map_async
-#[derive(Debug)]
-pub struct BufferView<'a> {
-    slice: BufferSlice<'a>,
-    data: Box<dyn crate::context::BufferMappedRange>,
-}
-
-/// A write-only view of a mapped buffer's bytes.
-///
-/// To get a `BufferViewMut`, first [map] the buffer, and then
-/// call `buffer.slice(range).get_mapped_range_mut()`.
-///
-/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual
-/// Rust slice methods to access the buffer's contents. It also implements
-/// `AsMut<[u8]>`, if that's more convenient.
-///
-/// It is possible to read the buffer using this view, but doing so is not
-/// recommended, as it is likely to be slow.
-///
-/// Before the buffer can be unmapped, all `BufferViewMut`s observing it
-/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
-///
-/// For example code, see the documentation on [mapping buffers][map].
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Debug)]
-pub struct BufferViewMut<'a> {
-    slice: BufferSlice<'a>,
-    data: Box<dyn crate::context::BufferMappedRange>,
-    readable: bool,
-}
-
-impl std::ops::Deref for BufferView<'_> {
-    type Target = [u8];
-
-    #[inline]
-    fn deref(&self) -> &[u8] {
-        self.data.slice()
-    }
-}
-
-impl AsRef<[u8]> for BufferView<'_> {
-    #[inline]
-    fn as_ref(&self) -> &[u8] {
-        self.data.slice()
-    }
-}
-
-impl AsMut<[u8]> for BufferViewMut<'_> {
-    #[inline]
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.data.slice_mut()
-    }
-}
-
-impl Deref for BufferViewMut<'_> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        if !self.readable {
-            log::warn!("Reading from a BufferViewMut is slow and not recommended.");
-        }
-
-        self.data.slice()
-    }
-}
-
-impl DerefMut for BufferViewMut<'_> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.data.slice_mut()
-    }
-}
-
-impl Drop for BufferView<'_> {
-    fn drop(&mut self) {
-        self.slice
-            .buffer
-            .map_context
-            .lock()
-            .remove(self.slice.offset, self.slice.size);
-    }
-}
-
-impl Drop for BufferViewMut<'_> {
-    fn drop(&mut self) {
-        self.slice
-            .buffer
-            .map_context
-            .lock()
-            .remove(self.slice.offset, self.slice.size);
-    }
-}
-
-impl Buffer {
-    /// Return the binding view of the entire buffer.
-    pub fn as_entire_binding(&self) -> BindingResource<'_> {
-        BindingResource::Buffer(self.as_entire_buffer_binding())
-    }
-
-    /// Return the binding view of the entire buffer.
-    pub fn as_entire_buffer_binding(&self) -> BufferBinding<'_> {
-        BufferBinding {
-            buffer: self,
-            offset: 0,
-            size: None,
-        }
-    }
-
-    /// Returns the inner hal Buffer using a callback. The hal buffer will be `None` if the
-    /// backend type argument does not match with this wgpu Buffer
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Buffer must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Buffer>) -> R, R>(
-        &self,
-        hal_buffer_callback: F,
-    ) -> R {
-        let id = self.id;
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.buffer_as_hal::<A, F, R>(id.into(), hal_buffer_callback) }
-        } else {
-            hal_buffer_callback(None)
-        }
-    }
-
-    /// Return a slice of a [`Buffer`]'s bytes.
-    ///
-    /// Return a [`BufferSlice`] referring to the portion of `self`'s contents
-    /// indicated by `bounds`. Regardless of what sort of data `self` stores,
-    /// `bounds` start and end are given in bytes.
-    ///
-    /// A [`BufferSlice`] can be used to supply vertex and index data, or to map
-    /// buffer contents for access from the CPU. See the [`BufferSlice`]
-    /// documentation for details.
-    ///
-    /// The `range` argument can be half or fully unbounded: for example,
-    /// `buffer.slice(..)` refers to the entire buffer, and `buffer.slice(n..)`
-    /// refers to the portion starting at the `n`th byte and extending to the
-    /// end of the buffer.
-    pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
-        let (offset, size) = range_to_offset_size(bounds);
-        BufferSlice {
-            buffer: self,
-            offset,
-            size,
-        }
-    }
-
-    /// Flushes any pending write operations and unmaps the buffer from host memory.
-    pub fn unmap(&self) {
-        self.map_context.lock().reset();
-        DynContext::buffer_unmap(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Destroy the associated native resources as soon as possible.
-    pub fn destroy(&self) {
-        DynContext::buffer_destroy(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Returns the length of the buffer allocation in bytes.
-    ///
-    /// This is always equal to the `size` that was specified when creating the buffer.
-    pub fn size(&self) -> BufferAddress {
-        self.size
-    }
-
-    /// Returns the allowed usages for this `Buffer`.
-    ///
-    /// This is always equal to the `usage` that was specified when creating the buffer.
-    pub fn usage(&self) -> BufferUsages {
-        self.usage
-    }
-}
-
-impl<'a> BufferSlice<'a> {
-    /// Map the buffer. Buffer is ready to map once the callback is called.
-    ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
-    ///
-    /// The callback will be called on the thread that first calls the above functions after the gpu work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
-    pub fn map_async(
-        &self,
-        mode: MapMode,
-        callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) {
-        let mut mc = self.buffer.map_context.lock();
-        assert_eq!(
-            mc.initial_range,
-            0..0,
-            "Buffer {:?} is already mapped",
-            self.buffer.id
-        );
-        let end = match self.size {
-            Some(s) => self.offset + s.get(),
-            None => mc.total_size,
-        };
-        mc.initial_range = self.offset..end;
-
-        DynContext::buffer_map_async(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            mode,
-            self.offset..end,
-            Box::new(callback),
-        )
-    }
-
-    /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
-    ///
-    /// Return a [`BufferView`] referring to the buffer range represented by
-    /// `self`. See the documentation for [`BufferView`] for details.
-    ///
-    /// # Panics
-    ///
-    /// - This panics if the buffer to which `self` refers is not currently
-    ///   [mapped].
-    ///
-    /// - If you try to create overlapping views of a buffer, mutable or
-    ///   otherwise, `get_mapped_range` will panic.
-    ///
-    /// [mapped]: Buffer#mapping-buffers
-    pub fn get_mapped_range(&self) -> BufferView<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let data = DynContext::buffer_get_mapped_range(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset..end,
-        );
-        BufferView { slice: *self, data }
-    }
-
-    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will fail.
-    ///
-    /// This is useful when targeting WebGPU and you want to pass mapped data directly to js.
-    /// Unlike `get_mapped_range` which unconditionally copies mapped data into the wasm heap,
-    /// this function directly hands you the ArrayBuffer that we mapped the data into in js.
-    ///
-    /// This is only available on WebGPU, on any other backends this will return `None`.
-    #[cfg(webgpu)]
-    pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
-        self.buffer
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWebGpu>()
-            .map(|ctx| {
-                let buffer_data = crate::context::downcast_ref(self.buffer.data.as_ref());
-                let end = self.buffer.map_context.lock().add(self.offset, self.size);
-                ctx.buffer_get_mapped_range_as_array_buffer(buffer_data, self.offset..end)
-            })
-    }
-
-    /// Gain write access to the bytes of a [mapped] [`Buffer`].
-    ///
-    /// Return a [`BufferViewMut`] referring to the buffer range represented by
-    /// `self`. See the documentation for [`BufferViewMut`] for more details.
-    ///
-    /// # Panics
-    ///
-    /// - This panics if the buffer to which `self` refers is not currently
-    ///   [mapped].
-    ///
-    /// - If you try to create overlapping views of a buffer, mutable or
-    ///   otherwise, `get_mapped_range_mut` will panic.
-    ///
-    /// [mapped]: Buffer#mapping-buffers
-    pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let data = DynContext::buffer_get_mapped_range(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset..end,
-        );
-        BufferViewMut {
-            slice: *self,
-            data,
-            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
-        }
-    }
-}
-
-impl Drop for Buffer {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.buffer_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl Texture {
-    /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
-    /// backend type argument does not match with this wgpu Texture
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Texture must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Texture>) -> R, R>(
-        &self,
-        hal_texture_callback: F,
-    ) -> R {
-        let texture = self.data.as_ref().downcast_ref().unwrap();
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.texture_as_hal::<A, F, R>(texture, hal_texture_callback) }
-        } else {
-            hal_texture_callback(None)
-        }
-    }
-
-    /// Creates a view of this texture.
-    pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
-        let (id, data) =
-            DynContext::texture_create_view(&*self.context, &self.id, self.data.as_ref(), desc);
-        TextureView {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Destroy the associated native resources as soon as possible.
-    pub fn destroy(&self) {
-        DynContext::texture_destroy(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Make an `ImageCopyTexture` representing the whole texture.
-    pub fn as_image_copy(&self) -> ImageCopyTexture<'_> {
-        ImageCopyTexture {
-            texture: self,
-            mip_level: 0,
-            origin: Origin3d::ZERO,
-            aspect: TextureAspect::All,
-        }
-    }
-
-    /// Returns the size of this `Texture`.
-    ///
-    /// This is always equal to the `size` that was specified when creating the texture.
-    pub fn size(&self) -> Extent3d {
-        self.descriptor.size
-    }
-
-    /// Returns the width of this `Texture`.
-    ///
-    /// This is always equal to the `size.width` that was specified when creating the texture.
-    pub fn width(&self) -> u32 {
-        self.descriptor.size.width
-    }
-
-    /// Returns the height of this `Texture`.
-    ///
-    /// This is always equal to the `size.height` that was specified when creating the texture.
-    pub fn height(&self) -> u32 {
-        self.descriptor.size.height
-    }
-
-    /// Returns the depth or layer count of this `Texture`.
-    ///
-    /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
-    pub fn depth_or_array_layers(&self) -> u32 {
-        self.descriptor.size.depth_or_array_layers
-    }
-
-    /// Returns the mip_level_count of this `Texture`.
-    ///
-    /// This is always equal to the `mip_level_count` that was specified when creating the texture.
-    pub fn mip_level_count(&self) -> u32 {
-        self.descriptor.mip_level_count
-    }
-
-    /// Returns the sample_count of this `Texture`.
-    ///
-    /// This is always equal to the `sample_count` that was specified when creating the texture.
-    pub fn sample_count(&self) -> u32 {
-        self.descriptor.sample_count
-    }
-
-    /// Returns the dimension of this `Texture`.
-    ///
-    /// This is always equal to the `dimension` that was specified when creating the texture.
-    pub fn dimension(&self) -> TextureDimension {
-        self.descriptor.dimension
-    }
-
-    /// Returns the format of this `Texture`.
-    ///
-    /// This is always equal to the `format` that was specified when creating the texture.
-    pub fn format(&self) -> TextureFormat {
-        self.descriptor.format
-    }
-
-    /// Returns the allowed usages of this `Texture`.
-    ///
-    /// This is always equal to the `usage` that was specified when creating the texture.
-    pub fn usage(&self) -> TextureUsages {
-        self.descriptor.usage
-    }
-}
-
-impl Drop for Texture {
-    fn drop(&mut self) {
-        if self.owned && !thread::panicking() {
-            self.context.texture_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl Drop for TextureView {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.texture_view_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl CommandEncoder {
-    /// Finishes recording and returns a [`CommandBuffer`] that can be submitted for execution.
-    pub fn finish(mut self) -> CommandBuffer {
-        let (id, data) = DynContext::command_encoder_finish(
-            &*self.context,
-            self.id.take().unwrap(),
-            self.data.as_mut(),
-        );
-        CommandBuffer {
-            context: Arc::clone(&self.context),
-            id: Some(id),
-            data: Some(data),
-        }
-    }
-
-    /// Begins recording of a render pass.
-    ///
-    /// This function returns a [`RenderPass`] object which records a single render pass.
-    ///
-    /// As long as the returned  [`RenderPass`] has not ended,
-    /// any mutating operation on this command encoder causes an error and invalidates it.
-    /// Note that the `'encoder` lifetime relationship protects against this,
-    /// but it is possible to opt out of it by calling [`RenderPass::forget_lifetime`].
-    /// This can be useful for runtime handling of the encoder->pass
-    /// dependency e.g. when pass and encoder are stored in the same data structure.
-    pub fn begin_render_pass<'encoder>(
-        &'encoder mut self,
-        desc: &RenderPassDescriptor<'_>,
-    ) -> RenderPass<'encoder> {
-        let id = self.id.as_ref().unwrap();
-        let (id, data) = DynContext::command_encoder_begin_render_pass(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderPass {
-            inner: RenderPassInner {
-                id,
-                data,
-                context: self.context.clone(),
-            },
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Begins recording of a compute pass.
-    ///
-    /// This function returns a [`ComputePass`] object which records a single compute pass.
-    ///
-    /// As long as the returned  [`ComputePass`] has not ended,
-    /// any mutating operation on this command encoder causes an error and invalidates it.
-    /// Note that the `'encoder` lifetime relationship protects against this,
-    /// but it is possible to opt out of it by calling [`ComputePass::forget_lifetime`].
-    /// This can be useful for runtime handling of the encoder->pass
-    /// dependency e.g. when pass and encoder are stored in the same data structure.
-    pub fn begin_compute_pass<'encoder>(
-        &'encoder mut self,
-        desc: &ComputePassDescriptor<'_>,
-    ) -> ComputePass<'encoder> {
-        let id = self.id.as_ref().unwrap();
-        let (id, data) = DynContext::command_encoder_begin_compute_pass(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            desc,
-        );
-        ComputePass {
-            inner: ComputePassInner {
-                id,
-                data,
-                context: self.context.clone(),
-            },
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Copy data from one buffer to another.
-    ///
-    /// # Panics
-    ///
-    /// - Buffer offsets or copy size not a multiple of [`COPY_BUFFER_ALIGNMENT`].
-    /// - Copy would overrun buffer.
-    /// - Copy within the same buffer.
-    pub fn copy_buffer_to_buffer(
-        &mut self,
-        source: &Buffer,
-        source_offset: BufferAddress,
-        destination: &Buffer,
-        destination_offset: BufferAddress,
-        copy_size: BufferAddress,
-    ) {
-        DynContext::command_encoder_copy_buffer_to_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            &source.id,
-            source.data.as_ref(),
-            source_offset,
-            &destination.id,
-            destination.data.as_ref(),
-            destination_offset,
-            copy_size,
-        );
-    }
-
-    /// Copy data from a buffer to a texture.
-    pub fn copy_buffer_to_texture(
-        &mut self,
-        source: ImageCopyBuffer<'_>,
-        destination: ImageCopyTexture<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_buffer_to_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Copy data from a texture to a buffer.
-    pub fn copy_texture_to_buffer(
-        &mut self,
-        source: ImageCopyTexture<'_>,
-        destination: ImageCopyBuffer<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_texture_to_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Copy data from one texture to another.
-    ///
-    /// # Panics
-    ///
-    /// - Textures are not the same type
-    /// - If a depth texture, or a multisampled texture, the entire texture must be copied
-    /// - Copy would overrun either texture
-    pub fn copy_texture_to_texture(
-        &mut self,
-        source: ImageCopyTexture<'_>,
-        destination: ImageCopyTexture<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_texture_to_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Clears texture to zero.
-    ///
-    /// Note that unlike with clear_buffer, `COPY_DST` usage is not required.
-    ///
-    /// # Implementation notes
-    ///
-    /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages
-    /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized)
-    ///
-    /// # Panics
-    ///
-    /// - `CLEAR_TEXTURE` extension not enabled
-    /// - Range is out of bounds
-    pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
-        DynContext::command_encoder_clear_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            texture,
-            subresource_range,
-        );
-    }
-
-    /// Clears buffer to zero.
-    ///
-    /// # Panics
-    ///
-    /// - Buffer does not have `COPY_DST` usage.
-    /// - Range is out of bounds
-    pub fn clear_buffer(
-        &mut self,
-        buffer: &Buffer,
-        offset: BufferAddress,
-        size: Option<BufferAddress>,
-    ) {
-        DynContext::command_encoder_clear_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            buffer,
-            offset,
-            size,
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_insert_debug_marker(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_push_debug_group(&*self.context, id, self.data.as_ref(), label);
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_pop_debug_group(&*self.context, id, self.data.as_ref());
-    }
-
-    /// Resolves a query set, writing the results into the supplied destination buffer.
-    ///
-    /// Occlusion and timestamp queries are 8 bytes each (see [`crate::QUERY_SIZE`]). For pipeline statistics queries,
-    /// see [`PipelineStatisticsTypes`] for more information.
-    pub fn resolve_query_set(
-        &mut self,
-        query_set: &QuerySet,
-        query_range: Range<u32>,
-        destination: &Buffer,
-        destination_offset: BufferAddress,
-    ) {
-        DynContext::command_encoder_resolve_query_set(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_range.start,
-            query_range.end - query_range.start,
-            &destination.id,
-            destination.data.as_ref(),
-            destination_offset,
-        )
-    }
-
-    /// Returns the inner hal CommandEncoder using a callback. The hal command encoder will be `None` if the
-    /// backend type argument does not match with this wgpu CommandEncoder
-    ///
-    /// This method will start the wgpu_core level command recording.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal CommandEncoder must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal_mut<
-        A: wgc::hal_api::HalApi,
-        F: FnOnce(Option<&mut A::CommandEncoder>) -> R,
-        R,
-    >(
-        &mut self,
-        hal_command_encoder_callback: F,
-    ) -> Option<R> {
-        use core::id::CommandEncoderId;
-
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.command_encoder_as_hal_mut::<A, F, R>(
-                    CommandEncoderId::from(self.id.unwrap()),
-                    hal_command_encoder_callback,
-                )
-            })
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_ENCODERS`] must be enabled on the device in order to call these functions.
-impl CommandEncoder {
-    /// Issue a timestamp command at this point in the queue.
-    /// The timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    ///
-    /// Attention: Since commands within a command recorder may be reordered,
-    /// there is no strict guarantee that timestamps are taken after all commands
-    /// recorded so far and all before all commands recorded after.
-    /// This may depend both on the backend and the driver.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::command_encoder_write_timestamp(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-impl<'encoder> RenderPass<'encoder> {
-    /// Drops the lifetime relationship to the parent command encoder, making usage of
-    /// the encoder while this pass is recorded a run-time error instead.
-    ///
-    /// Attention: As long as the render pass has not been ended, any mutating operation on the parent
-    /// command encoder will cause a run-time error and invalidate it!
-    /// By default, the lifetime constraint prevents this, but it can be useful
-    /// to handle this at run time, such as when storing the pass and encoder in the same
-    /// data structure.
-    ///
-    /// This operation has no effect on pass recording.
-    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
-    /// regardless of the lifetime constraint or its absence.
-    pub fn forget_lifetime(self) -> RenderPass<'static> {
-        RenderPass {
-            inner: self.inner,
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw_*()` method is called must match the layout of
-    /// this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in binding order.
-    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
-    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
-    ///
-    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::render_pass_set_bind_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        )
-    }
-
-    /// Sets the active render pipeline.
-    ///
-    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
-    pub fn set_pipeline(&mut self, pipeline: &RenderPipeline) {
-        DynContext::render_pass_set_pipeline(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        )
-    }
-
-    /// Sets the blend color as used by some of the blending modes.
-    ///
-    /// Subsequent blending tests will test against this value.
-    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
-    /// (all components zero).
-    pub fn set_blend_constant(&mut self, color: Color) {
-        DynContext::render_pass_set_blend_constant(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            color,
-        )
-    }
-
-    /// Sets the active index buffer.
-    ///
-    /// Subsequent calls to [`draw_indexed`](RenderPass::draw_indexed) on this [`RenderPass`] will
-    /// use `buffer` as the source index buffer.
-    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'_>, index_format: IndexFormat) {
-        DynContext::render_pass_set_index_buffer(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            index_format,
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Assign a vertex buffer to a slot.
-    ///
-    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
-    /// [`RenderPass`] will use `buffer` as one of the source vertex buffers.
-    ///
-    /// The `slot` refers to the index of the matching descriptor in
-    /// [`VertexState::buffers`].
-    ///
-    /// [`draw`]: RenderPass::draw
-    /// [`draw_indexed`]: RenderPass::draw_indexed
-    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'_>) {
-        DynContext::render_pass_set_vertex_buffer(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            slot,
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Sets the scissor rectangle used during the rasterization stage.
-    /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
-    ///
-    /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
-    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
-    /// the render targets.
-    ///
-    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
-    /// but it does not affect the coordinate system, only which fragments are discarded.
-    pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
-        DynContext::render_pass_set_scissor_rect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            width,
-            height,
-        );
-    }
-
-    /// Sets the viewport used during the rasterization stage to linearly map
-    /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
-    ///
-    /// Subsequent draw calls will only draw within this region.
-    /// If this method has not been called, the viewport defaults to the entire bounds of the render
-    /// targets.
-    pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
-        DynContext::render_pass_set_viewport(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            w,
-            h,
-            min_depth,
-            max_depth,
-        );
-    }
-
-    /// Sets the stencil reference.
-    ///
-    /// Subsequent stencil tests will test against this value.
-    /// If this method has not been called, the stencil reference value defaults to `0`.
-    pub fn set_stencil_reference(&mut self, reference: u32) {
-        DynContext::render_pass_set_stencil_reference(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            reference,
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        DynContext::render_pass_insert_debug_marker(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        DynContext::render_pass_push_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        DynContext::render_pass_pop_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s).
-    ///
-    /// The active vertex buffer(s) can be set with [`RenderPass::set_vertex_buffer`].
-    /// Does not use an Index Buffer. If you need this see [`RenderPass::draw_indexed`]
-    ///
-    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
-    ///
-    /// vertices: The range of vertices to draw.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for vertex_id in vertex_range {
-    ///         let vertex = vertex[vertex_id];
-    ///         vertex_shader(vertex, vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
-        DynContext::render_pass_draw(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            vertices,
-            instances,
-        )
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`]
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
-    ///
-    /// indices: The range of indices to draw.
-    /// base_vertex: value added to each index value before indexing into the vertex buffers.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for index_index in index_range {
-    ///         let vertex_id = index_buffer[index_index];
-    ///         let adjusted_vertex_id = vertex_id + base_vertex;
-    ///         let vertex = vertex[adjusted_vertex_id];
-    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
-        DynContext::render_pass_draw_indexed(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            indices,
-            base_vertex,
-            instances,
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    ///
-    /// This is like calling [`RenderPass::draw`] but the contents of the call are specified in the `indirect_buffer`.
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    ///
-    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
-    /// these and issue an error.
-    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
-    ///   [`DrawIndirect::first_instance`](crate::util::DrawIndirectArgs::first_instance) will be ignored.
-    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
-    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
-    ///
-    /// See details on the individual flags for more information.
-    pub fn draw_indirect(&mut self, indirect_buffer: &Buffer, indirect_offset: BufferAddress) {
-        DynContext::render_pass_draw_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`.
-    ///
-    /// This is like calling [`RenderPass::draw_indexed`] but the contents of the call are specified in the `indirect_buffer`.
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    ///
-    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
-    /// these and issue an error.
-    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
-    ///   [`DrawIndexedIndirect::first_instance`](crate::util::DrawIndexedIndirectArgs::first_instance) will be ignored.
-    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
-    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
-    ///
-    /// See details on the individual flags for more information.
-    pub fn draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::render_pass_draw_indexed_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
-    /// that can be run together.
-    ///
-    /// Commands in the bundle do not inherit this render pass's current render state, and after the
-    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
-    pub fn execute_bundles<'a, I: IntoIterator<Item = &'a RenderBundle>>(
-        &mut self,
-        render_bundles: I,
-    ) {
-        let mut render_bundles = render_bundles
-            .into_iter()
-            .map(|rb| (&rb.id, rb.data.as_ref()));
-
-        DynContext::render_pass_execute_bundles(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &mut render_bundles,
-        )
-    }
-}
-
-/// [`Features::MULTI_DRAW_INDIRECT`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    /// `count` draw calls are issued.
-    ///
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            count,
-        );
-    }
-
-    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`. `count` draw calls are issued.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indexed_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            count,
-        );
-    }
-}
-
-/// [`Features::MULTI_DRAW_INDIRECT_COUNT`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    /// The count buffer is read to determine how many draws to issue.
-    ///
-    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
-    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
-    ///
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// The structure expected in `count_buffer` is the following:
-    ///
-    /// ```rust
-    /// #[repr(C)]
-    /// struct DrawIndirectCount {
-    ///     count: u32, // Number of draw calls to issue.
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indirect_count(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count_buffer: &Buffer,
-        count_offset: BufferAddress,
-        max_count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indirect_count(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            &count_buffer.id,
-            count_buffer.data.as_ref(),
-            count_offset,
-            max_count,
-        );
-    }
-
-    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`. The count buffer is read to determine how many draws to issue.
-    ///
-    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
-    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    ///
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// The structure expected in `count_buffer` is the following:
-    ///
-    /// ```rust
-    /// #[repr(C)]
-    /// struct DrawIndexedIndirectCount {
-    ///     count: u32, // Number of draw calls to issue.
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indexed_indirect_count(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count_buffer: &Buffer,
-        count_offset: BufferAddress,
-        max_count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indexed_indirect_count(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            &count_buffer.id,
-            count_buffer.data.as_ref(),
-            count_offset,
-            max_count,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Set push constant data for subsequent draw calls.
-    ///
-    /// Write the bytes in `data` at offset `offset` within push constant
-    /// storage, all of which are accessible by all the pipeline stages in
-    /// `stages`, and no others.  Both `offset` and the length of `data` must be
-    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
-    ///
-    /// For example, if `offset` is `4` and `data` is eight bytes long, this
-    /// call will write `data` to bytes `4..12` of push constant storage.
-    ///
-    /// # Stage matching
-    ///
-    /// Every byte in the affected range of push constant storage must be
-    /// accessible to exactly the same set of pipeline stages, which must match
-    /// `stages`. If there are two bytes of storage that are accessible by
-    /// different sets of pipeline stages - say, one is accessible by fragment
-    /// shaders, and the other is accessible by both fragment shaders and vertex
-    /// shaders - then no single `set_push_constants` call may affect both of
-    /// them; to write both, you must make multiple calls, each with the
-    /// appropriate `stages` value.
-    ///
-    /// Which pipeline stages may access a given byte is determined by the
-    /// pipeline's [`PushConstant`] global variable and (if it is a struct) its
-    /// members' offsets.
-    ///
-    /// For example, suppose you have twelve bytes of push constant storage,
-    /// where bytes `0..8` are accessed by the vertex shader, and bytes `4..12`
-    /// are accessed by the fragment shader. This means there are three byte
-    /// ranges each accessed by a different set of stages:
-    ///
-    /// - Bytes `0..4` are accessed only by the fragment shader.
-    ///
-    /// - Bytes `4..8` are accessed by both the fragment shader and the vertex shader.
-    ///
-    /// - Bytes `8..12` are accessed only by the vertex shader.
-    ///
-    /// To write all twelve bytes requires three `set_push_constants` calls, one
-    /// for each range, each passing the matching `stages` mask.
-    ///
-    /// [`PushConstant`]: https://docs.rs/naga/latest/naga/enum.StorageClass.html#variant.PushConstant
-    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
-        DynContext::render_pass_set_push_constants(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            stages,
-            offset,
-            data,
-        );
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Issue a timestamp command at this point in the queue. The
-    /// timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::render_pass_write_timestamp(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-impl<'encoder> RenderPass<'encoder> {
-    /// Start a occlusion query on this render pass. It can be ended with
-    /// `end_occlusion_query`. Occlusion queries may not be nested.
-    pub fn begin_occlusion_query(&mut self, query_index: u32) {
-        DynContext::render_pass_begin_occlusion_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            query_index,
-        );
-    }
-
-    /// End the occlusion query on this render pass. It can be started with
-    /// `begin_occlusion_query`. Occlusion queries may not be nested.
-    pub fn end_occlusion_query(&mut self) {
-        DynContext::render_pass_end_occlusion_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Start a pipeline statistics query on this render pass. It can be ended with
-    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::render_pass_begin_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        );
-    }
-
-    /// End the pipeline statistics query on this render pass. It can be started with
-    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn end_pipeline_statistics_query(&mut self) {
-        DynContext::render_pass_end_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-impl Drop for RenderPassInner {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_pass_end(&mut self.id, self.data.as_mut());
-        }
-    }
-}
-
-impl<'encoder> ComputePass<'encoder> {
-    /// Drops the lifetime relationship to the parent command encoder, making usage of
-    /// the encoder while this pass is recorded a run-time error instead.
-    ///
-    /// Attention: As long as the compute pass has not been ended, any mutating operation on the parent
-    /// command encoder will cause a run-time error and invalidate it!
-    /// By default, the lifetime constraint prevents this, but it can be useful
-    /// to handle this at run time, such as when storing the pass and encoder in the same
-    /// data structure.
-    ///
-    /// This operation has no effect on pass recording.
-    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
-    /// regardless of the lifetime constraint or its absence.
-    pub fn forget_lifetime(self) -> ComputePass<'static> {
-        ComputePass {
-            inner: self.inner,
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in the binding order.
-    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
-    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::compute_pass_set_bind_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        );
-    }
-
-    /// Sets the active compute pipeline.
-    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
-        DynContext::compute_pass_set_pipeline(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        DynContext::compute_pass_insert_debug_marker(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        DynContext::compute_pass_push_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        DynContext::compute_pass_pop_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-
-    /// Dispatches compute work operations.
-    ///
-    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
-    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
-        DynContext::compute_pass_dispatch_workgroups(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            z,
-        );
-    }
-
-    /// Dispatches compute work operations, based on the contents of the `indirect_buffer`.
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DispatchIndirectArgs`](crate::util::DispatchIndirectArgs).
-    pub fn dispatch_workgroups_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::compute_pass_dispatch_workgroups_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Set push constant data for subsequent dispatch calls.
-    ///
-    /// Write the bytes in `data` at offset `offset` within push constant
-    /// storage.  Both `offset` and the length of `data` must be
-    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
-    ///
-    /// For example, if `offset` is `4` and `data` is eight bytes long, this
-    /// call will write `data` to bytes `4..12` of push constant storage.
-    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
-        DynContext::compute_pass_set_push_constants(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            offset,
-            data,
-        );
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::compute_pass_write_timestamp(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Start a pipeline statistics query on this compute pass. It can be ended with
-    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::compute_pass_begin_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        );
-    }
-
-    /// End the pipeline statistics query on this compute pass. It can be started with
-    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn end_pipeline_statistics_query(&mut self) {
-        DynContext::compute_pass_end_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-impl Drop for ComputePassInner {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .compute_pass_end(&mut self.id, self.data.as_mut());
-        }
-    }
-}
-
-impl<'a> RenderBundleEncoder<'a> {
-    /// Finishes recording and returns a [`RenderBundle`] that can be executed in other render passes.
-    pub fn finish(self, desc: &RenderBundleDescriptor<'_>) -> RenderBundle {
-        let (id, data) =
-            DynContext::render_bundle_encoder_finish(&*self.context, self.id, self.data, desc);
-        RenderBundle {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in the binding order.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &'a BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::render_bundle_encoder_set_bind_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        )
-    }
-
-    /// Sets the active render pipeline.
-    ///
-    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
-    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
-        DynContext::render_bundle_encoder_set_pipeline(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        )
-    }
-
-    /// Sets the active index buffer.
-    ///
-    /// Subsequent calls to [`draw_indexed`](RenderBundleEncoder::draw_indexed) on this [`RenderBundleEncoder`] will
-    /// use `buffer` as the source index buffer.
-    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
-        DynContext::render_bundle_encoder_set_index_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            index_format,
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Assign a vertex buffer to a slot.
-    ///
-    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
-    /// [`RenderBundleEncoder`] will use `buffer` as one of the source vertex buffers.
-    ///
-    /// The `slot` refers to the index of the matching descriptor in
-    /// [`VertexState::buffers`].
-    ///
-    /// [`draw`]: RenderBundleEncoder::draw
-    /// [`draw_indexed`]: RenderBundleEncoder::draw_indexed
-    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
-        DynContext::render_bundle_encoder_set_vertex_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            slot,
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Draws primitives from the active vertex buffer(s).
-    ///
-    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    /// Does not use an Index Buffer. If you need this see [`RenderBundleEncoder::draw_indexed`]
-    ///
-    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
-    ///
-    /// vertices: The range of vertices to draw.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for vertex_id in vertex_range {
-    ///         let vertex = vertex[vertex_id];
-    ///         vertex_shader(vertex, vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
-        DynContext::render_bundle_encoder_draw(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            vertices,
-            instances,
-        )
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffer(s).
-    ///
-    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`].
-    /// The active vertex buffer(s) can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
-    ///
-    /// indices: The range of indices to draw.
-    /// base_vertex: value added to each index value before indexing into the vertex buffers.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for index_index in index_range {
-    ///         let vertex_id = index_buffer[index_index];
-    ///         let adjusted_vertex_id = vertex_id + base_vertex;
-    ///         let vertex = vertex[adjusted_vertex_id];
-    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
-        DynContext::render_bundle_encoder_draw_indexed(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            indices,
-            base_vertex,
-            instances,
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    ///
-    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
-        DynContext::render_bundle_encoder_draw_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`.
-    ///
-    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    pub fn draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::render_bundle_encoder_draw_indexed_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'a> RenderBundleEncoder<'a> {
-    /// Set push constant data.
-    ///
-    /// Offset is measured in bytes, but must be a multiple of [`PUSH_CONSTANT_ALIGNMENT`].
-    ///
-    /// Data size must be a multiple of 4 and must have an alignment of 4.
-    /// For example, with an offset of 4 and an array of `[u8; 8]`, that will write to the range
-    /// of 4..12.
-    ///
-    /// For each byte in the range of push constant data written, the union of the stages of all push constant
-    /// ranges that covers that byte must be exactly `stages`. There's no good way of explaining this simply,
-    /// so here are some examples:
-    ///
-    /// ```text
-    /// For the given ranges:
-    /// - 0..4 Vertex
-    /// - 4..8 Fragment
-    /// ```
-    ///
-    /// You would need to upload this in two set_push_constants calls. First for the `Vertex` range, second for the `Fragment` range.
-    ///
-    /// ```text
-    /// For the given ranges:
-    /// - 0..8  Vertex
-    /// - 4..12 Fragment
-    /// ```
-    ///
-    /// You would need to upload this in three set_push_constants calls. First for the `Vertex` only range 0..4, second
-    /// for the `Vertex | Fragment` range 4..8, third for the `Fragment` range 8..12.
-    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
-        DynContext::render_bundle_encoder_set_push_constants(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            stages,
-            offset,
-            data,
-        );
-    }
-}
-
-/// A write-only view into a staging buffer.
-///
-/// Reading into this buffer won't yield the contents of the buffer from the
-/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
-/// implemented for this type, [`AsRef`] is not.
-pub struct QueueWriteBufferView<'a> {
-    queue: &'a Queue,
-    buffer: &'a Buffer,
-    offset: BufferAddress,
-    inner: Box<dyn context::QueueWriteBuffer>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
-
-impl Deref for QueueWriteBufferView<'_> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
-        self.inner.slice()
-    }
-}
-
-impl DerefMut for QueueWriteBufferView<'_> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.inner.slice_mut()
-    }
-}
-
-impl<'a> AsMut<[u8]> for QueueWriteBufferView<'a> {
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.inner.slice_mut()
-    }
-}
-
-impl<'a> Drop for QueueWriteBufferView<'a> {
-    fn drop(&mut self) {
-        DynContext::queue_write_staging_buffer(
-            &*self.queue.context,
-            &self.queue.id,
-            self.queue.data.as_ref(),
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset,
-            &*self.inner,
-        );
-    }
-}
-
-impl Queue {
-    /// Schedule a data write into `buffer` starting at `offset`.
-    ///
-    /// This method fails if `data` overruns the size of `buffer` starting at `offset`.
-    ///
-    /// This does *not* submit the transfer to the GPU immediately. Calls to
-    /// `write_buffer` begin execution only on the next call to
-    /// [`Queue::submit`]. To get a set of scheduled transfers started
-    /// immediately, it's fine to call `submit` with no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// However, `data` will be immediately copied into staging memory, so the
-    /// caller may discard it any time after this call completes.
-    ///
-    /// If possible, consider using [`Queue::write_buffer_with`] instead. That
-    /// method avoids an intermediate copy and is often able to transfer data
-    /// more efficiently than this one.
-    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
-        DynContext::queue_write_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &buffer.id,
-            buffer.data.as_ref(),
-            offset,
-            data,
-        )
-    }
-
-    /// Write to a buffer via a directly mapped staging buffer.
-    ///
-    /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy
-    /// of its contents into `buffer` at `offset`. The returned view
-    /// dereferences to a `size`-byte long `&mut [u8]`, in which you should
-    /// store the data you would like written to `buffer`.
-    ///
-    /// This method may perform transfers faster than [`Queue::write_buffer`],
-    /// because the returned [`QueueWriteBufferView`] is actually the staging
-    /// buffer for the write, mapped into the caller's address space. Writing
-    /// your data directly into this staging buffer avoids the temporary
-    /// CPU-side buffer needed by `write_buffer`.
-    ///
-    /// Reading from the returned view is slow, and will not yield the current
-    /// contents of `buffer`.
-    ///
-    /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the
-    /// transfer to the GPU immediately. The transfer begins only on the next
-    /// call to [`Queue::submit`] after the view is dropped. To get a set of
-    /// scheduled transfers started immediately, it's fine to call `submit` with
-    /// no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// This method fails if `size` is greater than the size of `buffer` starting at `offset`.
-    #[must_use]
-    pub fn write_buffer_with<'a>(
-        &'a self,
-        buffer: &'a Buffer,
-        offset: BufferAddress,
-        size: BufferSize,
-    ) -> Option<QueueWriteBufferView<'a>> {
-        profiling::scope!("Queue::write_buffer_with");
-        DynContext::queue_validate_write_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &buffer.id,
-            buffer.data.as_ref(),
-            offset,
-            size,
-        )?;
-        let staging_buffer = DynContext::queue_create_staging_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            size,
-        )?;
-        Some(QueueWriteBufferView {
-            queue: self,
-            buffer,
-            offset,
-            inner: staging_buffer,
-        })
-    }
-
-    /// Schedule a write of some data into a texture.
-    ///
-    /// * `data` contains the texels to be written, which must be in
-    ///   [the same format as the texture](TextureFormat).
-    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
-    ///   have to have tightly packed rows.
-    /// * `texture` specifies the texture to write into, and the location within the
-    ///   texture (coordinate offset, mip level) that will be overwritten.
-    /// * `size` is the size, in texels, of the region to be written.
-    ///
-    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
-    ///
-    /// This does *not* submit the transfer to the GPU immediately. Calls to
-    /// `write_texture` begin execution only on the next call to
-    /// [`Queue::submit`]. To get a set of scheduled transfers started
-    /// immediately, it's fine to call `submit` with no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// However, `data` will be immediately copied into staging memory, so the
-    /// caller may discard it any time after this call completes.
-    pub fn write_texture(
-        &self,
-        texture: ImageCopyTexture<'_>,
-        data: &[u8],
-        data_layout: ImageDataLayout,
-        size: Extent3d,
-    ) {
-        DynContext::queue_write_texture(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            texture,
-            data,
-            data_layout,
-            size,
-        )
-    }
-
-    /// Schedule a copy of data from `image` into `texture`.
-    #[cfg(any(webgpu, webgl))]
-    pub fn copy_external_image_to_texture(
-        &self,
-        source: &wgt::ImageCopyExternalImage,
-        dest: ImageCopyTextureTagged<'_>,
-        size: Extent3d,
-    ) {
-        DynContext::queue_copy_external_image_to_texture(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            source,
-            dest,
-            size,
-        )
-    }
-
-    /// Submits a series of finished command buffers for execution.
-    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
-        &self,
-        command_buffers: I,
-    ) -> SubmissionIndex {
-        let mut command_buffers = command_buffers
-            .into_iter()
-            .map(|mut comb| (comb.id.take().unwrap(), comb.data.take().unwrap()));
-
-        let data = DynContext::queue_submit(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &mut command_buffers,
-        );
-
-        SubmissionIndex(data)
-    }
-
-    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
-    ///
-    /// Returns zero if timestamp queries are unsupported.
-    ///
-    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
-    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
-    pub fn get_timestamp_period(&self) -> f32 {
-        DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
-    /// being called implies that all mapped buffer callbacks which were registered before this call will
-    /// have been called.
-    ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
-    ///
-    /// The callback will be called on the thread that first calls the above functions after the gpu work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
-    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
-        DynContext::queue_on_submitted_work_done(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            Box::new(callback),
-        )
-    }
-}
-
-impl SurfaceTexture {
-    /// Schedule this texture to be presented on the owning surface.
-    ///
-    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
-    ///
-    /// # Platform dependent behavior
-    ///
-    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
-    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
-    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
-    pub fn present(mut self) {
-        self.presented = true;
-        DynContext::surface_present(
-            &*self.texture.context,
-            &self.texture.id,
-            // This call to as_ref is essential because we want the DynContext implementation to see the inner
-            // value of the Box (T::SurfaceOutputDetail), not the Box itself.
-            self.detail.as_ref(),
-        );
-    }
-}
-
-impl Drop for SurfaceTexture {
-    fn drop(&mut self) {
-        if !self.presented && !thread::panicking() {
-            DynContext::surface_texture_discard(
-                &*self.texture.context,
-                &self.texture.id,
-                // This call to as_ref is essential because we want the DynContext implementation to see the inner
-                // value of the Box (T::SurfaceOutputDetail), not the Box itself.
-                self.detail.as_ref(),
-            );
-        }
-    }
-}
-
-impl Surface<'_> {
-    /// Returns the capabilities of the surface when used with the given adapter.
-    ///
-    /// Returns specified values (see [`SurfaceCapabilities`]) if surface is incompatible with the adapter.
-    pub fn get_capabilities(&self, adapter: &Adapter) -> SurfaceCapabilities {
-        DynContext::surface_get_capabilities(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-            &adapter.id,
-            adapter.data.as_ref(),
-        )
-    }
-
-    /// Return a default `SurfaceConfiguration` from width and height to use for the [`Surface`] with this adapter.
-    ///
-    /// Returns None if the surface isn't supported by this adapter
-    pub fn get_default_config(
-        &self,
-        adapter: &Adapter,
-        width: u32,
-        height: u32,
-    ) -> Option<SurfaceConfiguration> {
-        let caps = self.get_capabilities(adapter);
-        Some(SurfaceConfiguration {
-            usage: wgt::TextureUsages::RENDER_ATTACHMENT,
-            format: *caps.formats.first()?,
-            width,
-            height,
-            desired_maximum_frame_latency: 2,
-            present_mode: *caps.present_modes.first()?,
-            alpha_mode: wgt::CompositeAlphaMode::Auto,
-            view_formats: vec![],
-        })
-    }
-
-    /// Initializes [`Surface`] for presentation.
-    ///
-    /// # Panics
-    ///
-    /// - A old [`SurfaceTexture`] is still alive referencing an old surface.
-    /// - Texture format requested is unsupported on the surface.
-    /// - `config.width` or `config.height` is zero.
-    pub fn configure(&self, device: &Device, config: &SurfaceConfiguration) {
-        DynContext::surface_configure(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-            &device.id,
-            device.data.as_ref(),
-            config,
-        );
-
-        let mut conf = self.config.lock();
-        *conf = Some(config.clone());
-    }
-
-    /// Returns the next texture to be presented by the swapchain for drawing.
-    ///
-    /// In order to present the [`SurfaceTexture`] returned by this method,
-    /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
-    /// Then [`SurfaceTexture::present`] needs to be called.
-    ///
-    /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated,
-    /// recreating the swapchain will panic.
-    pub fn get_current_texture(&self) -> Result<SurfaceTexture, SurfaceError> {
-        let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-        );
-
-        let suboptimal = match status {
-            SurfaceStatus::Good => false,
-            SurfaceStatus::Suboptimal => true,
-            SurfaceStatus::Timeout => return Err(SurfaceError::Timeout),
-            SurfaceStatus::Outdated => return Err(SurfaceError::Outdated),
-            SurfaceStatus::Lost => return Err(SurfaceError::Lost),
-        };
-
-        let guard = self.config.lock();
-        let config = guard
-            .as_ref()
-            .expect("This surface has not been configured yet.");
-
-        let descriptor = TextureDescriptor {
-            label: None,
-            size: Extent3d {
-                width: config.width,
-                height: config.height,
-                depth_or_array_layers: 1,
-            },
-            format: config.format,
-            usage: config.usage,
-            mip_level_count: 1,
-            sample_count: 1,
-            dimension: TextureDimension::D2,
-            view_formats: &[],
-        };
-
-        texture_id
-            .zip(texture_data)
-            .map(|(id, data)| SurfaceTexture {
-                texture: Texture {
-                    context: Arc::clone(&self.context),
-                    id,
-                    data,
-                    owned: false,
-                    descriptor,
-                },
-                suboptimal,
-                presented: false,
-                detail,
-            })
-            .ok_or(SurfaceError::Lost)
-    }
-
-    /// Returns the inner hal Surface using a callback. The hal surface will be `None` if the
-    /// backend type argument does not match with this wgpu Surface
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Surface must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>(
-        &mut self,
-        hal_surface_callback: F,
-    ) -> Option<R> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.surface_as_hal::<A, F, R>(
-                    self.surface_data.downcast_ref().unwrap(),
-                    hal_surface_callback,
-                )
-            })
-    }
-}
-
-/// Opaque globally-unique identifier
-#[repr(transparent)]
-pub struct Id<T>(NonZeroU64, PhantomData<*mut T>);
-
-impl<T> Id<T> {
-    /// For testing use only. We provide no guarantees about the actual value of the ids.
-    #[doc(hidden)]
-    pub fn inner(&self) -> u64 {
-        self.0.get()
-    }
-}
-
-// SAFETY: `Id` is a bare `NonZeroU64`, the type parameter is a marker purely to avoid confusing Ids
-// returned for different types , so `Id` can safely implement Send and Sync.
-unsafe impl<T> Send for Id<T> {}
-
-// SAFETY: See the implementation for `Send`.
-unsafe impl<T> Sync for Id<T> {}
-
-impl<T> Clone for Id<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T> Copy for Id<T> {}
-
-impl<T> fmt::Debug for Id<T> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_tuple("Id").field(&self.0).finish()
-    }
-}
-
-impl<T> PartialEq for Id<T> {
-    fn eq(&self, other: &Id<T>) -> bool {
-        self.0 == other.0
-    }
-}
-
-impl<T> Eq for Id<T> {}
-
-impl<T> PartialOrd for Id<T> {
-    fn partial_cmp(&self, other: &Id<T>) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<T> Ord for Id<T> {
-    fn cmp(&self, other: &Id<T>) -> Ordering {
-        self.0.cmp(&other.0)
-    }
-}
-
-impl<T> std::hash::Hash for Id<T> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.0.hash(state)
-    }
-}
-
-impl Adapter {
-    /// Returns a globally-unique identifier for this `Adapter`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Device {
-    /// Returns a globally-unique identifier for this `Device`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Queue {
-    /// Returns a globally-unique identifier for this `Queue`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl ShaderModule {
-    /// Returns a globally-unique identifier for this `ShaderModule`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl BindGroupLayout {
-    /// Returns a globally-unique identifier for this `BindGroupLayout`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl BindGroup {
-    /// Returns a globally-unique identifier for this `BindGroup`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl TextureView {
-    /// Returns a globally-unique identifier for this `TextureView`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-
-    /// Returns the inner hal TextureView using a callback. The hal texture will be `None` if the
-    /// backend type argument does not match with this wgpu Texture
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal TextureView must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::TextureView>) -> R, R>(
-        &self,
-        hal_texture_view_callback: F,
-    ) -> R {
-        use core::id::TextureViewId;
-
-        let texture_view_id = TextureViewId::from(self.id);
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe {
-                ctx.texture_view_as_hal::<A, F, R>(texture_view_id, hal_texture_view_callback)
-            }
-        } else {
-            hal_texture_view_callback(None)
-        }
-    }
-}
-
-impl Sampler {
-    /// Returns a globally-unique identifier for this `Sampler`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Buffer {
-    /// Returns a globally-unique identifier for this `Buffer`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Texture {
-    /// Returns a globally-unique identifier for this `Texture`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl QuerySet {
-    /// Returns a globally-unique identifier for this `QuerySet`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl PipelineLayout {
-    /// Returns a globally-unique identifier for this `PipelineLayout`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl RenderPipeline {
-    /// Returns a globally-unique identifier for this `RenderPipeline`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl ComputePipeline {
-    /// Returns a globally-unique identifier for this `ComputePipeline`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl RenderBundle {
-    /// Returns a globally-unique identifier for this `RenderBundle`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Surface<'_> {
-    /// Returns a globally-unique identifier for this `Surface`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Surface<'_>> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-/// Type for the callback of uncaptured error handler
-pub trait UncapturedErrorHandler: Fn(Error) + Send + 'static {}
-impl<T> UncapturedErrorHandler for T where T: Fn(Error) + Send + 'static {}
-
-/// Error type
-#[derive(Debug)]
-pub enum Error {
-    /// Out of memory error
-    OutOfMemory {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-    },
-    /// Validation error, signifying a bug in code or data
-    Validation {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-        /// Description of the validation error.
-        description: String,
-    },
-    /// Internal error. Used for signalling any failures not explicitly expected by WebGPU.
-    ///
-    /// These could be due to internal implementation or system limits being reached.
-    Internal {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-        /// Description of the internal GPU error.
-        description: String,
-    },
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Error: Send, Sync);
-
-impl error::Error for Error {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match self {
-            Error::OutOfMemory { source } => Some(source.as_ref()),
-            Error::Validation { source, .. } => Some(source.as_ref()),
-            Error::Internal { source, .. } => Some(source.as_ref()),
-        }
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
-            Error::Validation { description, .. } => f.write_str(description),
-            Error::Internal { description, .. } => f.write_str(description),
-        }
-    }
-}
-
-use send_sync::*;
-
-mod send_sync {
-    use std::any::Any;
-    use std::fmt;
-
-    use wgt::WasmNotSendSync;
-
-    pub trait AnyWasmNotSendSync: Any + WasmNotSendSync {
-        fn upcast_any_ref(&self) -> &dyn Any;
-    }
-    impl<T: Any + WasmNotSendSync> AnyWasmNotSendSync for T {
-        #[inline]
-        fn upcast_any_ref(&self) -> &dyn Any {
-            self
-        }
-    }
-
-    impl dyn AnyWasmNotSendSync + 'static {
-        #[inline]
-        pub fn downcast_ref<T: 'static>(&self) -> Option<&T> {
-            self.upcast_any_ref().downcast_ref::<T>()
-        }
-    }
-
-    impl fmt::Debug for dyn AnyWasmNotSendSync {
-        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            f.debug_struct("Any").finish_non_exhaustive()
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::BufferSize;
-
-    #[test]
-    fn range_to_offset_size_works() {
-        assert_eq!(crate::range_to_offset_size(0..2), (0, BufferSize::new(2)));
-        assert_eq!(crate::range_to_offset_size(2..5), (2, BufferSize::new(3)));
-        assert_eq!(crate::range_to_offset_size(..), (0, None));
-        assert_eq!(crate::range_to_offset_size(21..), (21, None));
-        assert_eq!(crate::range_to_offset_size(0..), (0, None));
-        assert_eq!(crate::range_to_offset_size(..21), (0, BufferSize::new(21)));
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_empty_range() {
-        crate::range_to_offset_size(123..123);
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_unbounded_empty_range() {
-        crate::range_to_offset_size(..0);
-    }
-}
diff --git a/wgpu/src/send_sync.rs b/wgpu/src/send_sync.rs
new file mode 100644
index 0000000000..3842931716
--- /dev/null
+++ b/wgpu/src/send_sync.rs
@@ -0,0 +1,27 @@
+use std::any::Any;
+use std::fmt;
+
+use wgt::WasmNotSendSync;
+
+pub trait AnyWasmNotSendSync: Any + WasmNotSendSync {
+    fn upcast_any_ref(&self) -> &dyn Any;
+}
+impl<T: Any + WasmNotSendSync> AnyWasmNotSendSync for T {
+    #[inline]
+    fn upcast_any_ref(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl dyn AnyWasmNotSendSync + 'static {
+    #[inline]
+    pub fn downcast_ref<T: 'static>(&self) -> Option<&T> {
+        self.upcast_any_ref().downcast_ref::<T>()
+    }
+}
+
+impl fmt::Debug for dyn AnyWasmNotSendSync {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Any").finish_non_exhaustive()
+    }
+}
diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs
index f52b82a9c1..ff4fb7ecf8 100644
--- a/wgpu/src/util/mod.rs
+++ b/wgpu/src/util/mod.rs
@@ -123,7 +123,7 @@ impl DownloadBuffer {
                     return;
                 }
 
-                let mapped_range = super::DynContext::buffer_get_mapped_range(
+                let mapped_range = crate::context::DynContext::buffer_get_mapped_range(
                     &*download.context,
                     &download.id,
                     download.data.as_ref(),

From c0e7c1ef945a7dd61c81fb951ea554213811aee0 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 21:20:05 +0200
Subject: [PATCH 065/226] Bump core MSRV to 1.76

---
 .github/workflows/ci.yml | 2 +-
 CHANGELOG.md             | 1 +
 Cargo.toml               | 2 +-
 README.md                | 4 ++--
 naga/Cargo.toml          | 2 +-
 naga/README.md           | 2 +-
 rust-toolchain.toml      | 2 +-
 wgpu-core/Cargo.toml     | 2 +-
 wgpu-hal/Cargo.toml      | 2 +-
 wgpu-types/Cargo.toml    | 2 +-
 10 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 426227adf1..a03a08f7ca 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -43,7 +43,7 @@ env:
   REPO_MSRV: "1.76"
   # This is the MSRV used by the `wgpu-core`, `wgpu-hal`, and `wgpu-types` crates,
   # to ensure that they can be used with firefox.
-  CORE_MSRV: "1.74"
+  CORE_MSRV: "1.76"
 
   #
   # Environment variables
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f5f499740..66248e1752 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,7 @@ Bottom level categories:
 
 - Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
 - As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
+- Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
 
 ## 22.0.0 (2024-07-17)
 
diff --git a/Cargo.toml b/Cargo.toml
index 2b2ca766a4..4ea60eb59e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,7 +42,7 @@ default-members = [
 
 [workspace.package]
 edition = "2021"
-rust-version = "1.74"
+rust-version = "1.76"
 keywords = ["graphics"]
 license = "MIT OR Apache-2.0"
 homepage = "https://wgpu.rs/"
diff --git a/README.md b/README.md
index fcff011539..bdd587b573 100644
--- a/README.md
+++ b/README.md
@@ -120,8 +120,8 @@ On Linux, you can point to them using `LD_LIBRARY_PATH` environment.
 
 Due to complex dependants, we have two MSRV policies:
 
-- `d3d12`, `naga`, `wgpu-core`, `wgpu-hal`, and `wgpu-types`'s MSRV is **1.74**.
-- The rest of the workspace has an MSRV of **1.76**.
+- `d3d12`, `naga`, `wgpu-core`, `wgpu-hal`, and `wgpu-types`'s MSRV is **1.76**, but may be lower than the rest of the workspace in the future.
+- The rest of the workspace has an MSRV of **1.76** as well right now, but may be higher than above listed crates.
 
 It is enforced on CI (in "/.github/workflows/ci.yml") with the `CORE_MSRV` and `REPO_MSRV` variables.
 This version can only be upgraded in breaking releases, though we release a breaking version every three months.
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 1bd14e9ee8..d6c543b567 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -9,7 +9,7 @@ keywords = ["shader", "SPIR-V", "GLSL", "MSL"]
 license = "MIT OR Apache-2.0"
 exclude = ["bin/**/*", "tests/**/*", "Cargo.lock", "target/**/*"]
 resolver = "2"
-rust-version = "1.74"
+rust-version = "1.76"
 autotests = false
 
 [[test]]
diff --git a/naga/README.md b/naga/README.md
index 0e07d40496..b5e98bb727 100644
--- a/naga/README.md
+++ b/naga/README.md
@@ -4,7 +4,7 @@
 [![Crates.io](https://img.shields.io/crates/v/naga.svg?label=naga)](https://crates.io/crates/naga)
 [![Docs.rs](https://docs.rs/naga/badge.svg)](https://docs.rs/naga)
 [![Build Status](https://github.com/gfx-rs/naga/workflows/pipeline/badge.svg)](https://github.com/gfx-rs/naga/actions)
-![MSRV](https://img.shields.io/badge/rustc-1.74+-blue.svg)
+![MSRV](https://img.shields.io/badge/rustc-1.76+-blue.svg)
 [![codecov.io](https://codecov.io/gh/gfx-rs/naga/branch/master/graph/badge.svg?token=9VOKYO8BM2)](https://codecov.io/gh/gfx-rs/naga)
 
 The shader translation library for the needs of [wgpu](https://github.com/gfx-rs/wgpu).
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index aa10fa14eb..45bb8d6d51 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,4 +1,4 @@
 [toolchain]
-channel = "1.76"                     # Needed for deno & cts_runner. Firefox's MSRV is 1.74
+channel = "1.76"
 components = ["rustfmt", "clippy"]
 targets = ["wasm32-unknown-unknown"]
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index 2e645a5406..ca03b99e2b 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index b8834dc705..07424c8f9d 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 # Ideally we would enable all the features.
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index 8c211e1839..6c8f284896 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 all-features = true

From 5a0e2187f0d4caf979a08c9257d97e63fda00e83 Mon Sep 17 00:00:00 2001
From: Vladas Zakrevskis <146100@gmail.com>
Date: Mon, 22 Jul 2024 04:12:28 +0300
Subject: [PATCH 066/226] Print requested and supported usages on
 UnsupportedUsage error (#6007)

* Print requested and supported usages on UnsupportedUsage error

* fmt

* changelog
---
 CHANGELOG.md                   | 1 +
 wgpu-core/src/device/global.rs | 5 ++++-
 wgpu-core/src/present.rs       | 7 +++++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 66248e1752..c781d3f604 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Bottom level categories:
 - Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
 - As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
 - Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
+- Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
 
 ## 22.0.0 (2024-07-17)
 
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index ba2b94dd24..69a9ebf32c 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2009,7 +2009,10 @@ impl Global {
                 config.composite_alpha_mode = new_alpha_mode;
             }
             if !caps.usage.contains(config.usage) {
-                return Err(E::UnsupportedUsage);
+                return Err(E::UnsupportedUsage {
+                    requested: config.usage,
+                    available: caps.usage,
+                });
             }
             if width == 0 || height == 0 {
                 return Err(E::ZeroArea);
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index fa03387cb7..b59493f316 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -89,8 +89,11 @@ pub enum ConfigureSurfaceError {
         requested: wgt::CompositeAlphaMode,
         available: Vec<wgt::CompositeAlphaMode>,
     },
-    #[error("Requested usage is not supported")]
-    UnsupportedUsage,
+    #[error("Requested usage {requested:?} is not in the list of supported usages: {available:?}")]
+    UnsupportedUsage {
+        requested: hal::TextureUses,
+        available: hal::TextureUses,
+    },
     #[error("Gpu got stuck :(")]
     StuckGpu,
 }

From 101c996703ba5701cfe38efc677e76b9083592de Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 00:20:29 -0400
Subject: [PATCH 067/226] build(deps): bump the patch-updates group with 15
 updates (#6008)

Bumps the patch-updates group with 15 updates:

| Package | From | To |
| --- | --- | --- |
| [bit-vec](https://github.com/contain-rs/bit-vec) | `0.7.0` | `0.8.0` |
| [libloading](https://github.com/nagisa/rust_libloading) | `0.8.4` | `0.8.5` |
| [tracy-client](https://github.com/nagisa/rust_tracy_client) | `0.17.0` | `0.17.1` |
| [thiserror](https://github.com/dtolnay/thiserror) | `1.0.62` | `1.0.63` |
| [bit-set](https://github.com/contain-rs/bit-set) | `0.6.0` | `0.8.0` |
| [glow](https://github.com/grovesNL/glow) | `0.13.1` | `0.14.0` |
| [tokio](https://github.com/tokio-rs/tokio) | `1.38.0` | `1.38.1` |
| [syn](https://github.com/dtolnay/syn) | `2.0.71` | `2.0.72` |
| [arrayref](https://github.com/droundy/arrayref) | `0.3.7` | `0.3.8` |
| [cc](https://github.com/rust-lang/cc-rs) | `1.1.5` | `1.1.6` |
| [thiserror-impl](https://github.com/dtolnay/thiserror) | `1.0.62` | `1.0.63` |
| [thread-id](https://github.com/ruuda/thread-id) | `4.2.1` | `4.2.2` |
| [tracy-client-sys](https://github.com/nagisa/rust_tracy_client) | `0.22.2` | `0.23.0` |
| [wayland-backend](https://github.com/smithay/wayland-rs) | `0.3.5` | `0.3.6` |
| [xcursor](https://github.com/esposm03/xcursor-rs) | `0.3.5` | `0.3.6` |


Updates `bit-vec` from 0.7.0 to 0.8.0
- [Changelog](https://github.com/contain-rs/bit-vec/blob/master/RELEASES.md)
- [Commits](https://github.com/contain-rs/bit-vec/commits)

Updates `libloading` from 0.8.4 to 0.8.5
- [Commits](https://github.com/nagisa/rust_libloading/compare/0.8.4...0.8.5)

Updates `tracy-client` from 0.17.0 to 0.17.1
- [Commits](https://github.com/nagisa/rust_tracy_client/compare/tracy-client-v0.17.0...tracy-client-v0.17.1)

Updates `thiserror` from 1.0.62 to 1.0.63
- [Release notes](https://github.com/dtolnay/thiserror/releases)
- [Commits](https://github.com/dtolnay/thiserror/compare/1.0.62...1.0.63)

Updates `bit-set` from 0.6.0 to 0.8.0
- [Release notes](https://github.com/contain-rs/bit-set/releases)
- [Changelog](https://github.com/contain-rs/bit-set/blob/master/RELEASES.md)
- [Commits](https://github.com/contain-rs/bit-set/commits)

Updates `glow` from 0.13.1 to 0.14.0
- [Commits](https://github.com/grovesNL/glow/commits)

Updates `tokio` from 1.38.0 to 1.38.1
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.38.0...tokio-1.38.1)

Updates `syn` from 2.0.71 to 2.0.72
- [Release notes](https://github.com/dtolnay/syn/releases)
- [Commits](https://github.com/dtolnay/syn/compare/2.0.71...2.0.72)

Updates `arrayref` from 0.3.7 to 0.3.8
- [Commits](https://github.com/droundy/arrayref/commits)

Updates `cc` from 1.1.5 to 1.1.6
- [Release notes](https://github.com/rust-lang/cc-rs/releases)
- [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.1.5...cc-v1.1.6)

Updates `thiserror-impl` from 1.0.62 to 1.0.63
- [Release notes](https://github.com/dtolnay/thiserror/releases)
- [Commits](https://github.com/dtolnay/thiserror/compare/1.0.62...1.0.63)

Updates `thread-id` from 4.2.1 to 4.2.2
- [Changelog](https://github.com/ruuda/thread-id/blob/master/changelog.md)
- [Commits](https://github.com/ruuda/thread-id/compare/v4.2.1...v4.2.2)

Updates `tracy-client-sys` from 0.22.2 to 0.23.0
- [Commits](https://github.com/nagisa/rust_tracy_client/compare/tracy-client-sys-v0.22.2...tracy-client-sys-v0.23.0)

Updates `wayland-backend` from 0.3.5 to 0.3.6
- [Release notes](https://github.com/smithay/wayland-rs/releases)
- [Changelog](https://github.com/Smithay/wayland-rs/blob/master/historical_changelog.md)
- [Commits](https://github.com/smithay/wayland-rs/commits)

Updates `xcursor` from 0.3.5 to 0.3.6
- [Commits](https://github.com/esposm03/xcursor-rs/commits)

---
updated-dependencies:
- dependency-name: bit-vec
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: libloading
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: tracy-client
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: thiserror
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: bit-set
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: glow
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: tokio
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: syn
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: arrayref
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: cc
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: thiserror-impl
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: thread-id
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: tracy-client-sys
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: wayland-backend
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: xcursor
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock           | 128 +++++++++++++++++++++----------------------
 Cargo.toml           |   8 +--
 naga/Cargo.toml      |   4 +-
 wgpu-core/Cargo.toml |   2 +-
 wgpu-hal/Cargo.toml  |   4 +-
 5 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b36c3efc98..8b4e604f4e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -186,7 +186,7 @@ dependencies = [
  "argh_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -200,9 +200,9 @@ dependencies = [
 
 [[package]]
 name = "arrayref"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
+checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a"
 
 [[package]]
 name = "arrayvec"
@@ -231,7 +231,7 @@ version = "0.38.0+1.3.281"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f"
 dependencies = [
- "libloading 0.8.4",
+ "libloading 0.8.5",
 ]
 
 [[package]]
@@ -242,7 +242,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -308,18 +308,18 @@ dependencies = [
 
 [[package]]
 name = "bit-set"
-version = "0.6.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0481a0e032742109b1133a095184ee93d88f3dc9e0d28a5d033dc77a073f44f"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
 dependencies = [
  "bit-vec",
 ]
 
 [[package]]
 name = "bit-vec"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
 
 [[package]]
 name = "bitflags"
@@ -385,7 +385,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -448,9 +448,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.5"
+version = "1.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
+checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
 dependencies = [
  "jobserver",
  "libc",
@@ -541,7 +541,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -886,7 +886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -920,7 +920,7 @@ name = "d3d12"
 version = "22.0.0"
 dependencies = [
  "bitflags 2.6.0",
- "libloading 0.8.4",
+ "libloading 0.7.4",
  "winapi",
 ]
 
@@ -1033,7 +1033,7 @@ dependencies = [
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.71",
+ "syn 2.0.72",
  "thiserror",
 ]
 
@@ -1106,7 +1106,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1119,7 +1119,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustc_version 0.4.0",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1140,7 +1140,7 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412"
 dependencies = [
- "libloading 0.8.4",
+ "libloading 0.7.4",
 ]
 
 [[package]]
@@ -1207,7 +1207,7 @@ checksum = "b36f2ddfca91251bed7f931f24b192e4eaf0a0e0fa70cf81cfb1416a1973620e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1353,7 +1353,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1478,7 +1478,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1573,9 +1573,9 @@ checksum = "779ae4bf7e8421cf91c0b3b64e7e8b40b862fba4d393f59150042de7c4965a94"
 
 [[package]]
 name = "glow"
-version = "0.13.1"
+version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1"
+checksum = "f865cbd94bd355b89611211e49508da98a1fce0ad755c1e8448fb96711b24528"
 dependencies = [
  "js-sys",
  "slotmap",
@@ -1747,7 +1747,7 @@ dependencies = [
  "bitflags 2.6.0",
  "com",
  "libc",
- "libloading 0.8.4",
+ "libloading 0.7.4",
  "thiserror",
  "widestring",
  "winapi",
@@ -1955,7 +1955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76"
 dependencies = [
  "libc",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "pkg-config",
 ]
 
@@ -2009,12 +2009,12 @@ dependencies = [
 
 [[package]]
 name = "libloading"
-version = "0.8.4"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
+checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
- "windows-targets 0.52.5",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -2455,7 +2455,7 @@ dependencies = [
  "proc-macro-crate 3.1.0",
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -2656,7 +2656,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -2795,7 +2795,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f"
 dependencies = [
  "proc-macro-rules-macros",
  "proc-macro2",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -2807,7 +2807,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3160,7 +3160,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3427,7 +3427,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3443,9 +3443,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.71"
+version = "2.0.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
+checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3463,29 +3463,29 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.62"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2675633b1499176c2dff06b0856a27976a8f9d436737b4cf4f312d4d91d8bbb"
+checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.62"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c"
+checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
 name = "thread-id"
-version = "4.2.1"
+version = "4.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0ec81c46e9eb50deaa257be2f148adf052d1fb7701cfd55ccfab2525280b70b"
+checksum = "cfe8f25bbdd100db7e1d34acf7fd2dc59c4bf8f7483f505eaa7d4f12f76cc0ea"
 dependencies = [
  "libc",
  "winapi",
@@ -3587,9 +3587,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.0"
+version = "1.38.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
+checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
 dependencies = [
  "backtrace",
  "bytes",
@@ -3612,7 +3612,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3694,9 +3694,9 @@ dependencies = [
 
 [[package]]
 name = "tracy-client"
-version = "0.17.0"
+version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59fb931a64ff88984f86d3e9bcd1ae8843aa7fe44dd0f8097527bc172351741d"
+checksum = "63de1e1d4115534008d8fd5788b39324d6f58fc707849090533828619351d855"
 dependencies = [
  "loom",
  "once_cell",
@@ -3705,9 +3705,9 @@ dependencies = [
 
 [[package]]
 name = "tracy-client-sys"
-version = "0.22.2"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d104d610dfa9dd154535102cc9c6164ae1fa37842bc2d9e83f9ac82b0ae0882"
+checksum = "98b98232a2447ce0a58f9a0bfb5f5e39647b5c597c994b63945fcccd1306fafb"
 dependencies = [
  "cc",
 ]
@@ -3921,7 +3921,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
  "wasm-bindgen-shared",
 ]
 
@@ -3955,7 +3955,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3988,21 +3988,21 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
 name = "wayland-backend"
-version = "0.3.5"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "269c04f203640d0da2092d1b8d89a2d081714ae3ac2f1b53e99f205740517198"
+checksum = "f90e11ce2ca99c97b940ee83edbae9da2d56a08f9ea8158550fd77fa31722993"
 dependencies = [
  "cc",
  "downcast-rs",
  "rustix",
  "scoped-tls",
  "smallvec",
- "wayland-sys 0.31.3",
+ "wayland-sys 0.31.4",
 ]
 
 [[package]]
@@ -4173,9 +4173,9 @@ dependencies = [
 
 [[package]]
 name = "wayland-sys"
-version = "0.31.3"
+version = "0.31.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a6754825230fa5b27bafaa28c30b3c9e72c55530581220cef401fa422c0fae7"
+checksum = "43676fe2daf68754ecf1d72026e4e6c15483198b5d24e888b74d3f22f887a148"
 dependencies = [
  "dlib",
  "log",
@@ -4368,7 +4368,7 @@ version = "22.0.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -4863,7 +4863,7 @@ dependencies = [
  "as-raw-xcb-connection",
  "gethostname",
  "libc",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "once_cell",
  "rustix",
  "x11rb-protocol",
@@ -4877,9 +4877,9 @@ checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d"
 
 [[package]]
 name = "xcursor"
-version = "0.3.5"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a0ccd7b4a5345edfcd0c3535718a4e9ff7798ffc536bb5b5a0e26ff84732911"
+checksum = "d491ee231a51ae64a5b762114c3ac2104b967aadba1de45c86ca42cf051513b7"
 
 [[package]]
 name = "xkbcommon-dl"
@@ -4923,5 +4923,5 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 4ea60eb59e..51fe42197e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,7 +73,7 @@ version = "22.0.0"
 anyhow = "1.0.86"
 arrayvec = "0.7"
 bincode = "1"
-bit-vec = "0.7"
+bit-vec = "0.8"
 bitflags = "2"
 bytemuck = { version = "1.16", features = ["derive"] }
 cfg_aliases = "0.1"
@@ -145,7 +145,7 @@ gpu-alloc = "0.6"
 gpu-descriptor = "0.3"
 
 # DX dependencies
-bit-set = "0.6"
+bit-set = "0.8"
 gpu-allocator = { version = "0.26", default-features = false, features = [
     "d3d12",
     "public-winapi",
@@ -157,7 +157,7 @@ hassle-rs = "0.11.0"
 
 # Gles dependencies
 khronos-egl = "6"
-glow = "0.13.1"
+glow = "0.14.0"
 glutin = "0.29.1"
 
 # wasm32 dependencies
@@ -177,7 +177,7 @@ deno_url = "0.143.0"
 deno_web = "0.174.0"
 deno_webidl = "0.143.0"
 deno_webgpu = { version = "0.118.0", path = "./deno_webgpu" }
-tokio = "1.38.0"
+tokio = "1.38.1"
 termcolor = "1.4.1"
 
 [patch."https://github.com/gfx-rs/naga"]
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index d6c543b567..2d54de8c65 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -60,7 +60,7 @@ compact = []
 [dependencies]
 arbitrary = { version = "1.3", features = ["derive"], optional = true }
 bitflags = "2.6"
-bit-set = "0.6"
+bit-set = "0.8"
 termcolor = { version = "1.4.1" }
 # remove termcolor dep when updating to the next version of codespan-reporting
 # termcolor minimum version was wrong and was fixed in
@@ -70,7 +70,7 @@ rustc-hash = "1.1.0"
 indexmap = { version = "2", features = ["std"] }
 log = "0.4"
 spirv = { version = "0.3", optional = true }
-thiserror = "1.0.62"
+thiserror = "1.0.63"
 serde = { version = "1.0.204", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index ca03b99e2b..d6fe534629 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -104,7 +104,7 @@ dx12 = ["hal/dx12"]
 
 [dependencies]
 arrayvec = "0.7"
-bit-vec = "0.7"
+bit-vec = "0.8"
 bitflags = "2"
 bytemuck = { version = "1.16", optional = true }
 document-features.workspace = true
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index 07424c8f9d..a54332fef6 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -120,7 +120,7 @@ rustc-hash = "1.1"
 log = "0.4"
 
 # backend: Gles
-glow = { version = "0.13.1", optional = true }
+glow = { version = "0.14.0", optional = true }
 
 [dependencies.wgt]
 package = "wgpu-types"
@@ -145,7 +145,7 @@ libloading = { version = ">=0.7, <0.9", optional = true }
 
 [target.'cfg(windows)'.dependencies]
 # backend: Dx12
-bit-set = { version = "0.6", optional = true }
+bit-set = { version = "0.8", optional = true }
 range-alloc = { version = "0.1", optional = true }
 gpu-allocator = { version = "0.27", default-features = false, features = [
     "d3d12",

From 205f1e3ab60a4c8c6b6f901803eb9cfc3a5b62f3 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Mon, 22 Jul 2024 11:21:06 +0200
Subject: [PATCH 068/226] [wgpu-core] fix length of copy in
 `queue_write_texture` #2

Follow-up to 6f16ea460ab437173e14d2f5f3584ca7e1c9841d & 7e112ca4c0686c9626be4c8ddd113e954a2dab21.
---
 tests/tests/write_texture.rs      | 45 +++++++++++++++-
 wgpu-core/src/command/transfer.rs | 14 ++---
 wgpu-core/src/device/queue.rs     | 87 ++++++++++++++-----------------
 3 files changed, 88 insertions(+), 58 deletions(-)

diff --git a/tests/tests/write_texture.rs b/tests/tests/write_texture.rs
index f8d99d6d14..fbb0485918 100644
--- a/tests/tests/write_texture.rs
+++ b/tests/tests/write_texture.rs
@@ -32,7 +32,7 @@ static WRITE_TEXTURE_SUBSET_2D: GpuTestConfiguration =
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            bytemuck::cast_slice(&data),
+            &data,
             wgpu::ImageDataLayout {
                 offset: 0,
                 bytes_per_row: Some(size),
@@ -127,7 +127,7 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            bytemuck::cast_slice(&data),
+            &data,
             wgpu::ImageDataLayout {
                 offset: 0,
                 bytes_per_row: Some(size),
@@ -191,3 +191,44 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
             assert_eq!(*byte, 0);
         }
     });
+
+#[gpu_test]
+static WRITE_TEXTURE_NO_OOB: GpuTestConfiguration =
+    GpuTestConfiguration::new().run_async(|ctx| async move {
+        let size = 256;
+
+        let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
+            label: None,
+            dimension: wgpu::TextureDimension::D2,
+            size: wgpu::Extent3d {
+                width: size,
+                height: size,
+                depth_or_array_layers: 1,
+            },
+            format: wgpu::TextureFormat::R8Uint,
+            usage: wgpu::TextureUsages::COPY_DST,
+            mip_level_count: 1,
+            sample_count: 1,
+            view_formats: &[],
+        });
+        let data = vec![1u8; size as usize * 2 + 100]; // check that we don't attempt to copy OOB internally by adding 100 bytes here
+        ctx.queue.write_texture(
+            wgpu::ImageCopyTexture {
+                texture: &tex,
+                mip_level: 0,
+                origin: wgpu::Origin3d::ZERO,
+                aspect: wgpu::TextureAspect::All,
+            },
+            &data,
+            wgpu::ImageDataLayout {
+                offset: 0,
+                bytes_per_row: Some(size),
+                rows_per_image: Some(size),
+            },
+            wgpu::Extent3d {
+                width: size,
+                height: 2,
+                depth_or_array_layers: 1,
+            },
+        );
+    });
diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs
index 4379777eb5..0e4c21f999 100644
--- a/wgpu-core/src/command/transfer.rs
+++ b/wgpu-core/src/command/transfer.rs
@@ -225,7 +225,7 @@ pub(crate) fn validate_linear_texture_data(
     // the copy size before calling this function (for example via `validate_texture_copy_range`).
     let copy_width = copy_size.width as BufferAddress;
     let copy_height = copy_size.height as BufferAddress;
-    let copy_depth = copy_size.depth_or_array_layers as BufferAddress;
+    let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress;
 
     let offset = layout.offset;
 
@@ -253,19 +253,19 @@ pub(crate) fn validate_linear_texture_data(
         }
         bytes_per_row
     } else {
-        if copy_depth > 1 || height_in_blocks > 1 {
+        if depth_or_array_layers > 1 || height_in_blocks > 1 {
             return Err(TransferError::UnspecifiedBytesPerRow);
         }
         0
     };
-    let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
+    let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
         let rows_per_image = rows_per_image as BufferAddress;
         if rows_per_image < height_in_blocks {
             return Err(TransferError::InvalidRowsPerImage);
         }
         rows_per_image
     } else {
-        if copy_depth > 1 {
+        if depth_or_array_layers > 1 {
             return Err(TransferError::UnspecifiedRowsPerImage);
         }
         0
@@ -287,12 +287,12 @@ pub(crate) fn validate_linear_texture_data(
         }
     }
 
-    let bytes_per_image = bytes_per_row * block_rows_per_image;
+    let bytes_per_image = bytes_per_row * rows_per_image;
 
-    let required_bytes_in_copy = if copy_depth == 0 {
+    let required_bytes_in_copy = if depth_or_array_layers == 0 {
         0
     } else {
-        let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1);
+        let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1);
         if height_in_blocks > 0 {
             required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
         }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 220085f8f7..833d6c2c95 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -666,7 +666,7 @@ impl Global {
 
         // Note: `_source_bytes_per_array_layer` is ignored since we
         // have a staging copy, and it can have a different value.
-        let (_, _source_bytes_per_array_layer) = validate_linear_texture_data(
+        let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
             data_layout,
             dst.desc.format,
             destination.aspect,
@@ -682,32 +682,6 @@ impl Global {
                 .map_err(TransferError::from)?;
         }
 
-        let (block_width, block_height) = dst.desc.format.block_dimensions();
-        let width_blocks = size.width / block_width;
-        let height_blocks = size.height / block_height;
-
-        let block_rows_per_image = data_layout.rows_per_image.unwrap_or(
-            // doesn't really matter because we need this only if we copy
-            // more than one layer, and then we validate for this being not
-            // None
-            height_blocks,
-        );
-
-        let block_size = dst
-            .desc
-            .format
-            .block_copy_size(Some(destination.aspect))
-            .unwrap();
-        let bytes_per_row_alignment =
-            get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
-        let stage_bytes_per_row =
-            wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment);
-
-        let block_rows_in_copy =
-            (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
-        let stage_size =
-            wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64).unwrap();
-
         let mut pending_writes = device.pending_writes.lock();
         let encoder = pending_writes.activate();
 
@@ -763,33 +737,47 @@ impl Global {
 
         let dst_raw = dst.try_raw(&snatch_guard)?;
 
-        let bytes_per_row = data_layout
-            .bytes_per_row
-            .unwrap_or(width_blocks * block_size);
+        let (block_width, block_height) = dst.desc.format.block_dimensions();
+        let width_in_blocks = size.width / block_width;
+        let height_in_blocks = size.height / block_height;
+
+        let block_size = dst
+            .desc
+            .format
+            .block_copy_size(Some(destination.aspect))
+            .unwrap();
+        let bytes_in_last_row = width_in_blocks * block_size;
+
+        let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row);
+        let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks);
+
+        let bytes_per_row_alignment =
+            get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
+        let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment);
 
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
-
-        if stage_bytes_per_row == bytes_per_row {
+        let staging_buffer = if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
             // Fast path if the data is already being aligned optimally.
-            unsafe {
-                staging_buffer.write_with_offset(
-                    data,
-                    data_layout.offset as isize,
-                    0,
-                    (data.len() as u64 - data_layout.offset) as usize,
-                );
-            }
+            let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap();
+            let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
+            staging_buffer.write(&data[data_layout.offset as usize..]);
+            staging_buffer
         } else {
             profiling::scope!("copy chunked");
             // Copy row by row into the optimal alignment.
+            let block_rows_in_copy =
+                (size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks;
+            let stage_size =
+                wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64)
+                    .unwrap();
+            let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
             let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
             for layer in 0..size.depth_or_array_layers {
-                let rows_offset = layer * block_rows_per_image;
-                for row in rows_offset..rows_offset + height_blocks {
+                let rows_offset = layer * rows_per_image;
+                for row in rows_offset..rows_offset + height_in_blocks {
                     let src_offset = data_layout.offset as u32 + row * bytes_per_row;
                     let dst_offset = row * stage_bytes_per_row;
                     unsafe {
@@ -802,20 +790,21 @@ impl Global {
                     }
                 }
             }
-        }
+            staging_buffer
+        };
 
         let staging_buffer = staging_buffer.flush();
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
+        let regions = (0..array_layer_count).map(|array_layer_offset| {
             let mut texture_base = dst_base.clone();
-            texture_base.array_layer += rel_array_layer;
+            texture_base.array_layer += array_layer_offset;
             hal::BufferTextureCopy {
                 buffer_layout: wgt::ImageDataLayout {
-                    offset: rel_array_layer as u64
-                        * block_rows_per_image as u64
+                    offset: array_layer_offset as u64
+                        * rows_per_image as u64
                         * stage_bytes_per_row as u64,
                     bytes_per_row: Some(stage_bytes_per_row),
-                    rows_per_image: Some(block_rows_per_image),
+                    rows_per_image: Some(rows_per_image),
                 },
                 texture_base,
                 size: hal_copy_size,

From 34d492a647d00b1a1a32322a2c9dab7ce933d1be Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Tue, 23 Jul 2024 15:25:42 +0200
Subject: [PATCH 069/226] Reexport InternalCounters, HalCounters and
 CoreCounters in wgpu

---
 wgpu/src/lib.rs | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index e8c33ab583..9e0f4c42b1 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -58,19 +58,19 @@ pub use wgt::{
     AdapterInfo, AddressMode, AstcBlock, AstcChannel, Backend, Backends, BindGroupLayoutEntry,
     BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress,
     BufferBindingType, BufferSize, BufferUsages, Color, ColorTargetState, ColorWrites,
-    CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, DepthBiasState,
+    CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, CoreCounters, DepthBiasState,
     DepthStencilState, DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags,
     Dx12Compiler, DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace,
-    Gles3MinorVersion, ImageDataLayout, ImageSubresourceRange, IndexFormat, InstanceDescriptor,
-    InstanceFlags, Limits, MaintainResult, MemoryHints, MultisampleState, Origin2d, Origin3d,
-    PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace, PresentMode,
-    PresentationTimestamp, PrimitiveState, PrimitiveTopology, PushConstantRange, QueryType,
-    RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor, ShaderLocation, ShaderModel,
-    ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess,
-    SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension, TextureFormat,
-    TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureUsages,
-    TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode, WasmNotSend,
-    WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT,
+    Gles3MinorVersion, HalCounters, ImageDataLayout, ImageSubresourceRange, IndexFormat,
+    InstanceDescriptor, InstanceFlags, InternalCounters, Limits, MaintainResult, MemoryHints,
+    MultisampleState, Origin2d, Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference,
+    PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology,
+    PushConstantRange, QueryType, RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor,
+    ShaderLocation, ShaderModel, ShaderStages, StencilFaceState, StencilOperation, StencilState,
+    StorageTextureAccess, SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension,
+    TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType,
+    TextureUsages, TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode,
+    WasmNotSend, WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT,
     MAP_ALIGNMENT, PUSH_CONSTANT_ALIGNMENT, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES,
     QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT,
 };

From ebb011fc6b24925e9475e36766ba05a88b434ac3 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Mon, 22 Jul 2024 13:15:37 -0700
Subject: [PATCH 070/226] [core] Use `ManuallyDrop` for
 `wgpu_core::device::Queue::raw`.

Change the field `wgpu_core::device::Queue::raw` from an
`Option<A::Queue>` to a `std::mem::ManuallyDrop<A::Queue>`. Replace
various `.as_ref().unwrap()` chains with calls to a new accessor
function `Queue::raw`.

An `Option` is misleading, as this field is always populated during
the lifetime of a `Queue`. Instead, we simply have a field whose value
needs to be moved in `<Queue as Drop>::drop`; `ManuallyDrop` is the
Rust idiom for this situation.
---
 wgpu-core/src/device/queue.rs    | 34 +++++++++++++++++++++-----------
 wgpu-core/src/device/resource.rs |  2 +-
 wgpu-core/src/instance.rs        |  6 +-----
 wgpu-core/src/present.rs         |  8 +-------
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 833d6c2c95..f5bc296534 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -30,7 +30,7 @@ use smallvec::SmallVec;
 
 use std::{
     iter,
-    mem::{self},
+    mem::{self, ManuallyDrop},
     ptr::NonNull,
     sync::{atomic::Ordering, Arc},
 };
@@ -39,10 +39,23 @@ use thiserror::Error;
 use super::Device;
 
 pub struct Queue<A: HalApi> {
-    pub(crate) raw: Option<A::Queue>,
+    raw: ManuallyDrop<A::Queue>,
     pub(crate) device: Arc<Device<A>>,
 }
 
+impl<A: HalApi> Queue<A> {
+    pub(crate) fn new(device: Arc<Device<A>>, raw: A::Queue) -> Self {
+        Queue {
+            raw: ManuallyDrop::new(raw),
+            device,
+        }
+    }
+
+    pub(crate) fn raw(&self) -> &A::Queue {
+        &self.raw
+    }
+}
+
 crate::impl_resource_type!(Queue);
 // TODO: https://github.com/gfx-rs/wgpu/issues/4014
 impl<A: HalApi> Labeled for Queue<A> {
@@ -56,7 +69,8 @@ crate::impl_storage_item!(Queue);
 impl<A: HalApi> Drop for Queue<A> {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
-        let queue = self.raw.take().unwrap();
+        // SAFETY: we never access `self.raw` beyond this point.
+        let queue = unsafe { std::mem::ManuallyDrop::take(&mut self.raw) };
         self.device.release_queue(queue);
     }
 }
@@ -1272,11 +1286,9 @@ impl Global {
                 }
             }
 
-            if let Some(pending_execution) = pending_writes.pre_submit(
-                &device.command_allocator,
-                device.raw(),
-                queue.raw.as_ref().unwrap(),
-            )? {
+            if let Some(pending_execution) =
+                pending_writes.pre_submit(&device.command_allocator, device.raw(), queue.raw())?
+            {
                 active_executions.insert(0, pending_execution);
             }
 
@@ -1298,9 +1310,7 @@ impl Global {
 
                 unsafe {
                     queue
-                        .raw
-                        .as_ref()
-                        .unwrap()
+                        .raw()
                         .submit(
                             &hal_command_buffers,
                             &submit_surface_textures,
@@ -1356,7 +1366,7 @@ impl Global {
     ) -> Result<f32, InvalidQueue> {
         let hub = A::hub(self);
         match hub.queues.get(queue_id) {
-            Ok(queue) => Ok(unsafe { queue.raw.as_ref().unwrap().get_timestamp_period() }),
+            Ok(queue) => Ok(unsafe { queue.raw().get_timestamp_period() }),
             Err(_) => Err(InvalidQueue),
         }
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e0f2ddfe57..4a063fbf2f 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1601,7 +1601,7 @@ impl<A: HalApi> Device<A> {
 
         let encoder = self
             .command_allocator
-            .acquire_encoder(self.raw(), queue.raw.as_ref().unwrap())?;
+            .acquire_encoder(self.raw(), queue.raw())?;
 
         Ok(command::CommandBuffer::new(
             encoder,
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index a16fb0a29f..ee50bd949f 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -293,11 +293,7 @@ impl<A: HalApi> Adapter<A> {
             instance_flags,
         ) {
             let device = Arc::new(device);
-            let queue = Queue {
-                device: device.clone(),
-                raw: Some(hal_device.queue),
-            };
-            let queue = Arc::new(queue);
+            let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
             device.set_queue(&queue);
             return Ok((device, queue));
         }
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index b59493f316..7a2200eae1 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -326,13 +326,7 @@ impl Global {
                             log::error!("Presented frame is from a different surface");
                             Err(hal::SurfaceError::Lost)
                         } else {
-                            unsafe {
-                                queue
-                                    .raw
-                                    .as_ref()
-                                    .unwrap()
-                                    .present(suf.unwrap(), raw.take().unwrap())
-                            }
+                            unsafe { queue.raw().present(suf.unwrap(), raw.take().unwrap()) }
                         }
                     }
                     _ => unreachable!(),

From b350ca432b67391f936e7a4fc296983c1b6b2c0c Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 22 Jul 2024 16:35:34 -0400
Subject: [PATCH 071/226] style: use uppercase for `SAFETY` comments

---
 wgpu-hal/src/vulkan/instance.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs
index ec720f3788..f27cef55fa 100644
--- a/wgpu-hal/src/vulkan/instance.rs
+++ b/wgpu-hal/src/vulkan/instance.rs
@@ -950,7 +950,7 @@ impl crate::Surface for super::Surface {
         device: &super::Device,
         config: &crate::SurfaceConfiguration,
     ) -> Result<(), crate::SurfaceError> {
-        // Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
+        // SAFETY: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
         let mut swap_chain = self.swapchain.write();
         let old = swap_chain
             .take()
@@ -964,7 +964,7 @@ impl crate::Surface for super::Surface {
 
     unsafe fn unconfigure(&self, device: &super::Device) {
         if let Some(sc) = self.swapchain.write().take() {
-            // Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
+            // SAFETY: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
             let swapchain = unsafe { sc.release_resources(&device.shared.raw) };
             unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) };
         }

From 667096491de04a6128a928789285b11bb38779dd Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 22 Jul 2024 17:10:08 -0400
Subject: [PATCH 072/226] style: remove trailing colons in `Safety` section
 names

---
 wgpu-hal/src/gles/emscripten.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-hal/src/gles/emscripten.rs b/wgpu-hal/src/gles/emscripten.rs
index 7372dbd369..8174614f02 100644
--- a/wgpu-hal/src/gles/emscripten.rs
+++ b/wgpu-hal/src/gles/emscripten.rs
@@ -11,7 +11,7 @@ extern "C" {
 ///
 /// returns true on success
 ///
-/// # Safety:
+/// # Safety
 ///
 /// - opengl context MUST BE current
 /// - extension_name_null_terminated argument must be a valid string with null terminator.

From 6d7975eb3b443f6ecc9c81495abdd555eaf18eec Mon Sep 17 00:00:00 2001
From: Imbris <imbrisf@gmail.com>
Date: Thu, 25 Apr 2024 00:23:41 -0400
Subject: [PATCH 073/226] [naga hlsl-out glsl-out] Work around backend
 loop/switch bugs.

Introduce a new module, `naga::back::continue_forward`, containing
shared code for rendering Naga `Continue` statements as backend
`break` statements and assignments to introduced `bool` locals.
See the module's documentation for details.

- [hlsl-out] Transform degenerate single body switches into `do-while`
  loops. Properly render `Continue` statements enclosed by
  `Switch` statements enclosed by `Loop` statements.

- [glsl-out] Transform degenerate single body switches into `do-while`
  loops.

Improve `naga xtask validate spv` error message.

Fixes #4485.
Fixes #4514.
---
 CHANGELOG.md                                  |   5 +
 naga/src/back/continue_forward.rs             | 311 ++++++++++++++++++
 naga/src/back/glsl/mod.rs                     | 128 +++++--
 naga/src/back/hlsl/mod.rs                     |   1 +
 naga/src/back/hlsl/writer.rs                  | 255 ++++++++------
 naga/src/back/mod.rs                          |   3 +
 naga/tests/in/control-flow.wgsl               |  93 ++++++
 .../out/glsl/control-flow.main.Compute.glsl   | 121 ++++++-
 naga/tests/out/hlsl/control-flow.hlsl         | 154 ++++++++-
 naga/tests/out/msl/control-flow.msl           | 108 ++++++
 naga/tests/out/spv/control-flow.spvasm        | 251 ++++++++++----
 naga/tests/out/wgsl/control-flow.wgsl         |  86 +++++
 naga/xtask/src/validate.rs                    |  18 +-
 tests/src/init.rs                             |  17 +-
 tests/src/params.rs                           |  11 +
 tests/src/run.rs                              |   3 +-
 tests/tests/create_surface_error.rs           |   2 +-
 tests/tests/device.rs                         |   2 +-
 tests/tests/regression/issue_4485.rs          | 106 ++++++
 tests/tests/regression/issue_4485.wgsl        | 108 ++++++
 tests/tests/regression/issue_4514.rs          | 106 ++++++
 tests/tests/regression/issue_4514.wgsl        |  68 ++++
 tests/tests/root.rs                           |   2 +
 23 files changed, 1743 insertions(+), 216 deletions(-)
 create mode 100644 naga/src/back/continue_forward.rs
 create mode 100644 tests/tests/regression/issue_4485.rs
 create mode 100644 tests/tests/regression/issue_4485.wgsl
 create mode 100644 tests/tests/regression/issue_4514.rs
 create mode 100644 tests/tests/regression/issue_4514.wgsl

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c781d3f604..c9eccafcda 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -296,6 +296,11 @@ This release fixes the validation errors whenever a surface is used with the vul
 
 -  Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
 
+#### Naga
+
+- Work around shader consumers that have bugs handling `switch` statements with a single body for all cases. These are now written as `do {} while(false);` loops in hlsl-out and glsl-out. By @Imberflur in [#5654](https://github.com/gfx-rs/wgpu/pull/5654)
+- In hlsl-out, defer `continue` statements in switches by setting a flag and breaking from the switch. This allows such constructs to work with FXC which does not support `continue` within a switch. By @Imberflur in [#5654](https://github.com/gfx-rs/wgpu/pull/5654)
+
 ## v0.20.0 (2024-04-28)
 
 ### Major Changes
diff --git a/naga/src/back/continue_forward.rs b/naga/src/back/continue_forward.rs
new file mode 100644
index 0000000000..cecb93a837
--- /dev/null
+++ b/naga/src/back/continue_forward.rs
@@ -0,0 +1,311 @@
+//! Workarounds for platform bugs and limitations in switches and loops.
+//!
+//! In these docs, we use CamelCase links for Naga IR concepts, and ordinary
+//! `code` formatting for HLSL or GLSL concepts.
+//!
+//! ## Avoiding `continue` within `switch`
+//!
+//! As described in <https://github.com/gfx-rs/wgpu/issues/4485>, the FXC HLSL
+//! compiler doesn't allow `continue` statements within `switch` statements, but
+//! Naga IR does. We work around this by introducing synthetic boolean local
+//! variables and branches.
+//!
+//! Specifically:
+//!
+//! - We generate code for [`Continue`] statements within [`SwitchCase`]s that
+//!   sets an introduced `bool` local to `true` and does a `break`, jumping to
+//!   immediately after the generated `switch`.
+//!
+//! - When generating code for a [`Switch`] statement, we conservatively assume
+//!   it might contain such a [`Continue`] statement, so:
+//!
+//!   - If it's the outermost such [`Switch`] within a [`Loop`], we declare the
+//!     `bool` local ahead of the switch, initialized to `false`. Immediately
+//!     after the `switch`, we check the local and do a `continue` if it's set.
+//!
+//!   - If the [`Switch`] is nested within other [`Switch`]es, then after the
+//!     generated `switch`, we check the local (which we know was declared
+//!     before the surrounding `switch`) and do a `break` if it's set.
+//!
+//!   - As an optimization, we only generate the check of the local if a
+//!     [`Continue`] statement is encountered within the [`Switch`]. This may
+//!     help drivers more easily identify that the `bool` is unused.
+//!
+//! So while we "weaken" the [`Continue`] statement by rendering it as a `break`
+//! statement, we also place checks immediately at the locations to which those
+//! `break` statements will jump, until we can be sure we've reached the
+//! intended target of the original [`Continue`].
+//!
+//! In the case of nested [`Loop`] and [`Switch`] statements, there may be
+//! multiple introduced `bool` locals in scope, but there's no problem knowing
+//! which one to operate on. At any point, there is at most one [`Loop`]
+//! statement that could be targeted by a [`Continue`] statement, so the correct
+//! `bool` local to set and test is always the one introduced for the innermost
+//! enclosing [`Loop`]'s outermost [`Switch`].
+//!
+//! # Avoiding single body `switch` statements
+//!
+//! As described in <https://github.com/gfx-rs/wgpu/issues/4514>, some language
+//! front ends miscompile `switch` statements where all cases branch to the same
+//! body. Our HLSL and GLSL backends render [`Switch`] statements with a single
+//! [`SwitchCase`] as `do {} while(false);` loops.
+//!
+//! However, this rewriting introduces a new loop that could "capture"
+//! `continue` statements in its body. To avoid doing so, we apply the
+//! [`Continue`]-to-`break` transformation described above.
+//!
+//! [`Continue`]: crate::Statement::Continue
+//! [`Loop`]: crate::Statement::Loop
+//! [`Switch`]: crate::Statement::Switch
+//! [`SwitchCase`]: crate::SwitchCase
+
+use crate::proc::Namer;
+use std::rc::Rc;
+
+/// A summary of the code surrounding a statement.
+enum Nesting {
+    /// Currently nested in at least one [`Loop`] statement.
+    ///
+    /// [`Continue`] should apply to the innermost loop.
+    ///
+    /// When this entry is on the top of the stack:
+    ///
+    /// * When entering an inner [`Loop`] statement, push a [`Loop`][nl] state
+    ///   onto the stack.
+    ///
+    /// * When entering a nested [`Switch`] statement, push a [`Switch`][ns]
+    ///   state onto the stack with a new variable name. Before the generated
+    ///   `switch`, introduce a `bool` local with that name, initialized to
+    ///   `false`.
+    ///
+    /// When exiting the [`Loop`] for which this entry was pushed, pop it from
+    /// the stack.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    /// [ns]: Nesting::Switch
+    /// [nl]: Nesting::Loop
+    Loop,
+
+    /// Currently nested in at least one [`Switch`] that may need to forward
+    /// [`Continue`]s.
+    ///
+    /// This includes [`Switch`]es rendered as `do {} while(false)` loops, but
+    /// doesn't need to include regular [`Switch`]es in backends that can
+    /// support `continue` within switches.
+    ///
+    /// [`Continue`] should be forwarded to the innermost surrounding [`Loop`].
+    ///
+    /// When this entry is on the top of the stack:
+    ///
+    /// * When entering a nested [`Loop`], push a [`Loop`][nl] state onto the
+    ///   stack.
+    ///
+    /// * When entering a nested [`Switch`], push a [`Switch`][ns] state onto
+    ///   the stack with a clone of the introduced `bool` variable's name.
+    ///
+    /// * When encountering a [`Continue`] statement, render it as code to set
+    ///   the introduced `bool` local (whose name is held in [`variable`]) to
+    ///   `true`, and then `break`. Set [`continue_encountered`] to `true` to
+    ///   record that the [`Switch`] contains a [`Continue`].
+    ///
+    /// * When exiting this [`Switch`], pop its entry from the stack. If
+    ///   [`continue_encountered`] is set, then we have rendered [`Continue`]
+    ///   statements as `break` statements that jump to this point. Generate
+    ///   code to check `variable`, and if it is `true`:
+    ///
+    ///     * If there is another [`Switch`][ns] left on top of the stack, set
+    ///       its `continue_encountered` flag, and generate a `break`. (Both
+    ///       [`Switch`][ns]es are within the same [`Loop`] and share the same
+    ///       introduced variable, so there's no need to set another flag to
+    ///       continue to exit the `switch`es.)
+    ///
+    ///     * Otherwise, `continue`.
+    ///
+    /// When we exit the [`Switch`] for which this entry was pushed, pop it.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    /// [`variable`]: Nesting::Switch::variable
+    /// [`continue_encountered`]: Nesting::Switch::continue_encountered
+    /// [ns]: Nesting::Switch
+    /// [nl]: Nesting::Loop
+    Switch {
+        variable: Rc<String>,
+
+        /// Set if we've generated code for a [`Continue`] statement with this
+        /// entry on the top of the stack.
+        ///
+        /// If this is still clear when we finish rendering the [`Switch`], then
+        /// we know we don't need to generate branch forwarding code. Omitting
+        /// that may make it easier for drivers to tell that the `bool` we
+        /// introduced ahead of the [`Switch`] is actually unused.
+        ///
+        /// [`Continue`]: crate::Statement::Continue
+        /// [`Switch`]: crate::Statement::Switch
+        continue_encountered: bool,
+    },
+}
+
+/// A micro-IR for code a backend should generate after a [`Switch`].
+///
+/// [`Switch`]: crate::Statement::Switch
+pub(super) enum ExitControlFlow {
+    None,
+    /// Emit `if (continue_variable) { continue; }`
+    Continue {
+        variable: Rc<String>,
+    },
+    /// Emit `if (continue_variable) { break; }`
+    ///
+    /// Used after a [`Switch`] to exit from an enclosing [`Switch`].
+    ///
+    /// After the enclosing switch, its associated check will consult this same
+    /// variable, see that it is set, and exit early.
+    ///
+    /// [`Switch`]: crate::Statement::Switch
+    Break {
+        variable: Rc<String>,
+    },
+}
+
+/// Utility for tracking nesting of loops and switches to orchestrate forwarding
+/// of continue statements inside of a switch to the enclosing loop.
+///
+/// See [module docs](self) for why we need this.
+#[derive(Default)]
+pub(super) struct ContinueCtx {
+    stack: Vec<Nesting>,
+}
+
+impl ContinueCtx {
+    /// Resets internal state.
+    ///
+    /// Use this to reuse memory between writing sessions.
+    pub fn clear(&mut self) {
+        self.stack.clear();
+    }
+
+    /// Updates internal state to record entering a [`Loop`] statement.
+    ///
+    /// [`Loop`]: crate::Statement::Loop
+    pub fn enter_loop(&mut self) {
+        self.stack.push(Nesting::Loop);
+    }
+
+    /// Updates internal state to record exiting a [`Loop`] statement.
+    ///
+    /// [`Loop`]: crate::Statement::Loop
+    pub fn exit_loop(&mut self) {
+        if !matches!(self.stack.pop(), Some(Nesting::Loop)) {
+            unreachable!("ContinueCtx stack out of sync");
+        }
+    }
+
+    /// Updates internal state to record entering a [`Switch`] statement.
+    ///
+    /// Return `Some(variable)` if this [`Switch`] is nested within a [`Loop`],
+    /// and the caller should introcue a new `bool` local variable named
+    /// `variable` above the `switch`, for forwarding [`Continue`] statements.
+    ///
+    /// `variable` is guaranteed not to conflict with any names used by the
+    /// program itself.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn enter_switch(&mut self, namer: &mut Namer) -> Option<Rc<String>> {
+        match self.stack.last() {
+            // If the stack is empty, we are not in loop, so we don't need to
+            // forward continue statements within this `Switch`. We can leave
+            // the stack empty.
+            None => None,
+            Some(&Nesting::Loop { .. }) => {
+                let variable = Rc::new(namer.call("should_continue"));
+                self.stack.push(Nesting::Switch {
+                    variable: Rc::clone(&variable),
+                    continue_encountered: false,
+                });
+                Some(variable)
+            }
+            Some(&Nesting::Switch { ref variable, .. }) => {
+                self.stack.push(Nesting::Switch {
+                    variable: Rc::clone(variable),
+                    continue_encountered: false,
+                });
+                // We have already declared the variable before some enclosing
+                // `Switch`.
+                None
+            }
+        }
+    }
+
+    /// Update internal state to record leaving a [`Switch`] statement.
+    ///
+    /// Return an [`ExitControlFlow`] value indicating what code should be
+    /// introduced after the generated `switch` to forward continues.
+    ///
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn exit_switch(&mut self) -> ExitControlFlow {
+        match self.stack.pop() {
+            // This doesn't indicate a problem: we don't start pushing entries
+            // for `Switch` statements unless we have an enclosing `Loop`.
+            None => ExitControlFlow::None,
+            Some(Nesting::Loop { .. }) => {
+                unreachable!("Unexpected loop state when exiting switch");
+            }
+            Some(Nesting::Switch {
+                variable,
+                continue_encountered: inner_continue,
+            }) => {
+                if !inner_continue {
+                    // No `Continue` statement was encountered, so we didn't
+                    // introduce any `break`s jumping to this point.
+                    ExitControlFlow::None
+                } else if let Some(&mut Nesting::Switch {
+                    continue_encountered: ref mut outer_continue,
+                    ..
+                }) = self.stack.last_mut()
+                {
+                    // This is nested in another `Switch`. Propagate upwards
+                    // that there is a continue statement present.
+                    *outer_continue = true;
+                    ExitControlFlow::Break { variable }
+                } else {
+                    ExitControlFlow::Continue { variable }
+                }
+            }
+        }
+    }
+
+    /// Determine what to generate for a [`Continue`] statement.
+    ///
+    /// If we can generate an ordinary `continue` statement, return `None`.
+    ///
+    /// Otherwise, we're enclosed by a [`Switch`] that is itself enclosed by a
+    /// [`Loop`]. Return `Some(variable)` to indicate that the [`Continue`]
+    /// should be rendered as setting `variable` to `true`, and then doing a
+    /// `break`.
+    ///
+    /// This also notes that we've encountered a [`Continue`] statement, so that
+    /// we can generate the right code to forward the branch following the
+    /// enclosing `switch`.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn continue_encountered(&mut self) -> Option<&str> {
+        if let Some(&mut Nesting::Switch {
+            ref variable,
+            ref mut continue_encountered,
+        }) = self.stack.last_mut()
+        {
+            *continue_encountered = true;
+            Some(variable)
+        } else {
+            None
+        }
+    }
+}
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index bc2d2a90d8..7ad1f3c597 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -545,6 +545,11 @@ pub struct Writer<'a, W> {
     named_expressions: crate::NamedExpressions,
     /// Set of expressions that need to be baked to avoid unnecessary repetition in output
     need_bake_expressions: back::NeedBakeExpressions,
+    /// Information about nesting of loops and switches.
+    ///
+    /// Used for forwarding continue statements in switches that have been
+    /// transformed to `do {} while(false);` loops.
+    continue_ctx: back::continue_forward::ContinueCtx,
     /// How many views to render to, if doing multiview rendering.
     multiview: Option<std::num::NonZeroU32>,
     /// Mapping of varying variables to their location. Needed for reflections.
@@ -619,6 +624,7 @@ impl<'a, W: Write> Writer<'a, W> {
             block_id: IdGenerator::default(),
             named_expressions: Default::default(),
             need_bake_expressions: Default::default(),
+            continue_ctx: back::continue_forward::ContinueCtx::default(),
             varying: Default::default(),
         };
 
@@ -2082,42 +2088,94 @@ impl<'a, W: Write> Writer<'a, W> {
                 selector,
                 ref cases,
             } => {
-                // Start the switch
-                write!(self.out, "{level}")?;
-                write!(self.out, "switch(")?;
-                self.write_expr(selector, ctx)?;
-                writeln!(self.out, ") {{")?;
-
-                // Write all cases
                 let l2 = level.next();
-                for case in cases {
-                    match case.value {
-                        crate::SwitchValue::I32(value) => write!(self.out, "{l2}case {value}:")?,
-                        crate::SwitchValue::U32(value) => write!(self.out, "{l2}case {value}u:")?,
-                        crate::SwitchValue::Default => write!(self.out, "{l2}default:")?,
-                    }
+                // Some GLSL consumers may not handle switches with a single
+                // body correctly: See wgpu#4514. Write such switch statements
+                // as a `do {} while(false);` loop instead.
+                //
+                // Since doing so may inadvertently capture `continue`
+                // statements in the switch body, we must apply continue
+                // forwarding. See the `naga::back::continue_forward` module
+                // docs for details.
+                let one_body = cases
+                    .iter()
+                    .rev()
+                    .skip(1)
+                    .all(|case| case.fall_through && case.body.is_empty());
+                if one_body {
+                    // Unlike HLSL, in GLSL `continue_ctx` only needs to know
+                    // about [`Switch`] statements that are being rendered as
+                    // `do-while` loops.
+                    if let Some(variable) = self.continue_ctx.enter_switch(&mut self.namer) {
+                        writeln!(self.out, "{level}bool {variable} = false;",)?;
+                    };
+                    writeln!(self.out, "{level}do {{")?;
+                    // Note: Expressions have no side-effects so we don't need to emit selector expression.
 
-                    let write_block_braces = !(case.fall_through && case.body.is_empty());
-                    if write_block_braces {
-                        writeln!(self.out, " {{")?;
-                    } else {
-                        writeln!(self.out)?;
+                    // Body
+                    if let Some(case) = cases.last() {
+                        for sta in case.body.iter() {
+                            self.write_stmt(sta, ctx, l2)?;
+                        }
                     }
-
-                    for sta in case.body.iter() {
-                        self.write_stmt(sta, ctx, l2.next())?;
+                    // End do-while
+                    writeln!(self.out, "{level}}} while(false);")?;
+
+                    // Handle any forwarded continue statements.
+                    use back::continue_forward::ExitControlFlow;
+                    let op = match self.continue_ctx.exit_switch() {
+                        ExitControlFlow::None => None,
+                        ExitControlFlow::Continue { variable } => Some(("continue", variable)),
+                        ExitControlFlow::Break { variable } => Some(("break", variable)),
+                    };
+                    if let Some((control_flow, variable)) = op {
+                        writeln!(self.out, "{level}if ({variable}) {{")?;
+                        writeln!(self.out, "{l2}{control_flow};")?;
+                        writeln!(self.out, "{level}}}")?;
                     }
+                } else {
+                    // Start the switch
+                    write!(self.out, "{level}")?;
+                    write!(self.out, "switch(")?;
+                    self.write_expr(selector, ctx)?;
+                    writeln!(self.out, ") {{")?;
+
+                    // Write all cases
+                    for case in cases {
+                        match case.value {
+                            crate::SwitchValue::I32(value) => {
+                                write!(self.out, "{l2}case {value}:")?
+                            }
+                            crate::SwitchValue::U32(value) => {
+                                write!(self.out, "{l2}case {value}u:")?
+                            }
+                            crate::SwitchValue::Default => write!(self.out, "{l2}default:")?,
+                        }
 
-                    if !case.fall_through && case.body.last().map_or(true, |s| !s.is_terminator()) {
-                        writeln!(self.out, "{}break;", l2.next())?;
-                    }
+                        let write_block_braces = !(case.fall_through && case.body.is_empty());
+                        if write_block_braces {
+                            writeln!(self.out, " {{")?;
+                        } else {
+                            writeln!(self.out)?;
+                        }
+
+                        for sta in case.body.iter() {
+                            self.write_stmt(sta, ctx, l2.next())?;
+                        }
+
+                        if !case.fall_through
+                            && case.body.last().map_or(true, |s| !s.is_terminator())
+                        {
+                            writeln!(self.out, "{}break;", l2.next())?;
+                        }
 
-                    if write_block_braces {
-                        writeln!(self.out, "{l2}}}")?;
+                        if write_block_braces {
+                            writeln!(self.out, "{l2}}}")?;
+                        }
                     }
-                }
 
-                writeln!(self.out, "{level}}}")?
+                    writeln!(self.out, "{level}}}")?
+                }
             }
             // Loops in naga IR are based on wgsl loops, glsl can emulate the behaviour by using a
             // while true loop and appending the continuing block to the body resulting on:
@@ -2134,6 +2192,7 @@ impl<'a, W: Write> Writer<'a, W> {
                 ref continuing,
                 break_if,
             } => {
+                self.continue_ctx.enter_loop();
                 if !continuing.is_empty() || break_if.is_some() {
                     let gate_name = self.namer.call("loop_init");
                     writeln!(self.out, "{level}bool {gate_name} = true;")?;
@@ -2159,7 +2218,8 @@ impl<'a, W: Write> Writer<'a, W> {
                 for sta in body {
                     self.write_stmt(sta, ctx, level.next())?;
                 }
-                writeln!(self.out, "{level}}}")?
+                writeln!(self.out, "{level}}}")?;
+                self.continue_ctx.exit_loop();
             }
             // Break, continue and return as written as in C
             // `break;`
@@ -2169,8 +2229,14 @@ impl<'a, W: Write> Writer<'a, W> {
             }
             // `continue;`
             Statement::Continue => {
-                write!(self.out, "{level}")?;
-                writeln!(self.out, "continue;")?
+                // Sometimes we must render a `Continue` statement as a `break`.
+                // See the docs for the `back::continue_forward` module.
+                if let Some(variable) = self.continue_ctx.continue_encountered() {
+                    writeln!(self.out, "{level}{variable} = true;",)?;
+                    writeln!(self.out, "{level}break;")?
+                } else {
+                    writeln!(self.out, "{level}continue;")?
+                }
             }
             // `return expr;`, `expr` is optional
             Statement::Return { value } => {
diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs
index 49ff07ebf2..d28b387bf7 100644
--- a/naga/src/back/hlsl/mod.rs
+++ b/naga/src/back/hlsl/mod.rs
@@ -327,6 +327,7 @@ pub struct Writer<'a, W> {
     /// Set of expressions that have associated temporary variables
     named_expressions: crate::NamedExpressions,
     wrapped: Wrapped,
+    continue_ctx: back::continue_forward::ContinueCtx,
 
     /// A reference to some part of a global variable, lowered to a series of
     /// byte offset calculations.
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index afa12cccab..982bf0cfea 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -104,6 +104,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
             entry_point_io: Vec::new(),
             named_expressions: crate::NamedExpressions::default(),
             wrapped: super::Wrapped::default(),
+            continue_ctx: back::continue_forward::ContinueCtx::default(),
             temp_access_chain: Vec::new(),
             need_bake_expressions: Default::default(),
         }
@@ -122,6 +123,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         self.entry_point_io.clear();
         self.named_expressions.clear();
         self.wrapped.clear();
+        self.continue_ctx.clear();
         self.need_bake_expressions.clear();
     }
 
@@ -1439,6 +1441,151 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         self.write_barrier(crate::Barrier::WORK_GROUP, level)
     }
 
+    /// Helper method used to write switches
+    fn write_switch(
+        &mut self,
+        module: &Module,
+        func_ctx: &back::FunctionCtx<'_>,
+        level: back::Level,
+        selector: Handle<crate::Expression>,
+        cases: &[crate::SwitchCase],
+    ) -> BackendResult {
+        // Write all cases
+        let indent_level_1 = level.next();
+        let indent_level_2 = indent_level_1.next();
+
+        // See docs of `back::continue_forward` module.
+        if let Some(variable) = self.continue_ctx.enter_switch(&mut self.namer) {
+            writeln!(self.out, "{level}bool {variable} = false;",)?;
+        };
+
+        // Check if there is only one body, by seeing if all except the last case are fall through
+        // with empty bodies. FXC doesn't handle these switches correctly, so
+        // we generate a `do {} while(false);` loop instead. There must be a default case, so there
+        // is no need to check if one of the cases would have matched.
+        let one_body = cases
+            .iter()
+            .rev()
+            .skip(1)
+            .all(|case| case.fall_through && case.body.is_empty());
+        if one_body {
+            // Start the do-while
+            writeln!(self.out, "{level}do {{")?;
+            // Note: Expressions have no side-effects so we don't need to emit selector expression.
+
+            // Body
+            if let Some(case) = cases.last() {
+                for sta in case.body.iter() {
+                    self.write_stmt(module, sta, func_ctx, indent_level_1)?;
+                }
+            }
+            // End do-while
+            writeln!(self.out, "{level}}} while(false);")?;
+        } else {
+            // Start the switch
+            write!(self.out, "{level}")?;
+            write!(self.out, "switch(")?;
+            self.write_expr(module, selector, func_ctx)?;
+            writeln!(self.out, ") {{")?;
+
+            for (i, case) in cases.iter().enumerate() {
+                match case.value {
+                    crate::SwitchValue::I32(value) => {
+                        write!(self.out, "{indent_level_1}case {value}:")?
+                    }
+                    crate::SwitchValue::U32(value) => {
+                        write!(self.out, "{indent_level_1}case {value}u:")?
+                    }
+                    crate::SwitchValue::Default => write!(self.out, "{indent_level_1}default:")?,
+                }
+
+                // The new block is not only stylistic, it plays a role here:
+                // We might end up having to write the same case body
+                // multiple times due to FXC not supporting fallthrough.
+                // Therefore, some `Expression`s written by `Statement::Emit`
+                // will end up having the same name (`_expr<handle_index>`).
+                // So we need to put each case in its own scope.
+                let write_block_braces = !(case.fall_through && case.body.is_empty());
+                if write_block_braces {
+                    writeln!(self.out, " {{")?;
+                } else {
+                    writeln!(self.out)?;
+                }
+
+                // Although FXC does support a series of case clauses before
+                // a block[^yes], it does not support fallthrough from a
+                // non-empty case block to the next[^no]. If this case has a
+                // non-empty body with a fallthrough, emulate that by
+                // duplicating the bodies of all the cases it would fall
+                // into as extensions of this case's own body. This makes
+                // the HLSL output potentially quadratic in the size of the
+                // Naga IR.
+                //
+                // [^yes]: ```hlsl
+                // case 1:
+                // case 2: do_stuff()
+                // ```
+                // [^no]: ```hlsl
+                // case 1: do_this();
+                // case 2: do_that();
+                // ```
+                if case.fall_through && !case.body.is_empty() {
+                    let curr_len = i + 1;
+                    let end_case_idx = curr_len
+                        + cases
+                            .iter()
+                            .skip(curr_len)
+                            .position(|case| !case.fall_through)
+                            .unwrap();
+                    let indent_level_3 = indent_level_2.next();
+                    for case in &cases[i..=end_case_idx] {
+                        writeln!(self.out, "{indent_level_2}{{")?;
+                        let prev_len = self.named_expressions.len();
+                        for sta in case.body.iter() {
+                            self.write_stmt(module, sta, func_ctx, indent_level_3)?;
+                        }
+                        // Clear all named expressions that were previously inserted by the statements in the block
+                        self.named_expressions.truncate(prev_len);
+                        writeln!(self.out, "{indent_level_2}}}")?;
+                    }
+
+                    let last_case = &cases[end_case_idx];
+                    if last_case.body.last().map_or(true, |s| !s.is_terminator()) {
+                        writeln!(self.out, "{indent_level_2}break;")?;
+                    }
+                } else {
+                    for sta in case.body.iter() {
+                        self.write_stmt(module, sta, func_ctx, indent_level_2)?;
+                    }
+                    if !case.fall_through && case.body.last().map_or(true, |s| !s.is_terminator()) {
+                        writeln!(self.out, "{indent_level_2}break;")?;
+                    }
+                }
+
+                if write_block_braces {
+                    writeln!(self.out, "{indent_level_1}}}")?;
+                }
+            }
+
+            writeln!(self.out, "{level}}}")?;
+        }
+
+        // Handle any forwarded continue statements.
+        use back::continue_forward::ExitControlFlow;
+        let op = match self.continue_ctx.exit_switch() {
+            ExitControlFlow::None => None,
+            ExitControlFlow::Continue { variable } => Some(("continue", variable)),
+            ExitControlFlow::Break { variable } => Some(("break", variable)),
+        };
+        if let Some((control_flow, variable)) = op {
+            writeln!(self.out, "{level}if ({variable}) {{")?;
+            writeln!(self.out, "{indent_level_1}{control_flow};")?;
+            writeln!(self.out, "{level}}}")?;
+        }
+
+        Ok(())
+    }
+
     /// Helper method used to write statements
     ///
     /// # Notes
@@ -1882,6 +2029,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 ref continuing,
                 break_if,
             } => {
+                self.continue_ctx.enter_loop();
                 let l2 = level.next();
                 if !continuing.is_empty() || break_if.is_some() {
                     let gate_name = self.namer.call("loop_init");
@@ -1908,10 +2056,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 for sta in body.iter() {
                     self.write_stmt(module, sta, func_ctx, l2)?;
                 }
-                writeln!(self.out, "{level}}}")?
+                writeln!(self.out, "{level}}}")?;
+                self.continue_ctx.exit_loop();
             }
             Statement::Break => writeln!(self.out, "{level}break;")?,
-            Statement::Continue => writeln!(self.out, "{level}continue;")?,
+            Statement::Continue => {
+                if let Some(variable) = self.continue_ctx.continue_encountered() {
+                    writeln!(self.out, "{level}{variable} = true;")?;
+                    writeln!(self.out, "{level}break;")?
+                } else {
+                    writeln!(self.out, "{level}continue;")?
+                }
+            }
             Statement::Barrier(barrier) => {
                 self.write_barrier(barrier, level)?;
             }
@@ -2063,100 +2219,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 selector,
                 ref cases,
             } => {
-                // Start the switch
-                write!(self.out, "{level}")?;
-                write!(self.out, "switch(")?;
-                self.write_expr(module, selector, func_ctx)?;
-                writeln!(self.out, ") {{")?;
-
-                // Write all cases
-                let indent_level_1 = level.next();
-                let indent_level_2 = indent_level_1.next();
-
-                for (i, case) in cases.iter().enumerate() {
-                    match case.value {
-                        crate::SwitchValue::I32(value) => {
-                            write!(self.out, "{indent_level_1}case {value}:")?
-                        }
-                        crate::SwitchValue::U32(value) => {
-                            write!(self.out, "{indent_level_1}case {value}u:")?
-                        }
-                        crate::SwitchValue::Default => {
-                            write!(self.out, "{indent_level_1}default:")?
-                        }
-                    }
-
-                    // The new block is not only stylistic, it plays a role here:
-                    // We might end up having to write the same case body
-                    // multiple times due to FXC not supporting fallthrough.
-                    // Therefore, some `Expression`s written by `Statement::Emit`
-                    // will end up having the same name (`_expr<handle_index>`).
-                    // So we need to put each case in its own scope.
-                    let write_block_braces = !(case.fall_through && case.body.is_empty());
-                    if write_block_braces {
-                        writeln!(self.out, " {{")?;
-                    } else {
-                        writeln!(self.out)?;
-                    }
-
-                    // Although FXC does support a series of case clauses before
-                    // a block[^yes], it does not support fallthrough from a
-                    // non-empty case block to the next[^no]. If this case has a
-                    // non-empty body with a fallthrough, emulate that by
-                    // duplicating the bodies of all the cases it would fall
-                    // into as extensions of this case's own body. This makes
-                    // the HLSL output potentially quadratic in the size of the
-                    // Naga IR.
-                    //
-                    // [^yes]: ```hlsl
-                    // case 1:
-                    // case 2: do_stuff()
-                    // ```
-                    // [^no]: ```hlsl
-                    // case 1: do_this();
-                    // case 2: do_that();
-                    // ```
-                    if case.fall_through && !case.body.is_empty() {
-                        let curr_len = i + 1;
-                        let end_case_idx = curr_len
-                            + cases
-                                .iter()
-                                .skip(curr_len)
-                                .position(|case| !case.fall_through)
-                                .unwrap();
-                        let indent_level_3 = indent_level_2.next();
-                        for case in &cases[i..=end_case_idx] {
-                            writeln!(self.out, "{indent_level_2}{{")?;
-                            let prev_len = self.named_expressions.len();
-                            for sta in case.body.iter() {
-                                self.write_stmt(module, sta, func_ctx, indent_level_3)?;
-                            }
-                            // Clear all named expressions that were previously inserted by the statements in the block
-                            self.named_expressions.truncate(prev_len);
-                            writeln!(self.out, "{indent_level_2}}}")?;
-                        }
-
-                        let last_case = &cases[end_case_idx];
-                        if last_case.body.last().map_or(true, |s| !s.is_terminator()) {
-                            writeln!(self.out, "{indent_level_2}break;")?;
-                        }
-                    } else {
-                        for sta in case.body.iter() {
-                            self.write_stmt(module, sta, func_ctx, indent_level_2)?;
-                        }
-                        if !case.fall_through
-                            && case.body.last().map_or(true, |s| !s.is_terminator())
-                        {
-                            writeln!(self.out, "{indent_level_2}break;")?;
-                        }
-                    }
-
-                    if write_block_braces {
-                        writeln!(self.out, "{indent_level_1}}}")?;
-                    }
-                }
-
-                writeln!(self.out, "{level}}}")?
+                self.write_switch(module, func_ctx, level, selector, cases)?;
             }
             Statement::RayQuery { .. } => unreachable!(),
             Statement::SubgroupBallot { result, predicate } => {
diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs
index cd9496e3ff..43d88a437d 100644
--- a/naga/src/back/mod.rs
+++ b/naga/src/back/mod.rs
@@ -19,6 +19,9 @@ pub mod wgsl;
 #[cfg(any(hlsl_out, msl_out, spv_out, glsl_out))]
 pub mod pipeline_constants;
 
+#[cfg(any(feature = "hlsl-out", feature = "glsl-out"))]
+mod continue_forward;
+
 /// Names of vector components.
 pub const COMPONENTS: &[char] = &['x', 'y', 'z', 'w'];
 /// Indent for backends.
diff --git a/naga/tests/in/control-flow.wgsl b/naga/tests/in/control-flow.wgsl
index 5a0ef1cbbf..a25c899a44 100644
--- a/naga/tests/in/control-flow.wgsl
+++ b/naga/tests/in/control-flow.wgsl
@@ -88,3 +88,96 @@ fn loop_switch_continue(x: i32) {
         }
     }
 }
+
+fn loop_switch_continue_nesting(x: i32, y: i32, z: i32) {
+    loop {
+        switch x {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        loop {
+                            switch z {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {}
+                            }
+                        }
+                    }
+                }
+            }
+            default: {}
+        }
+
+
+        // Degenerate switch with continue
+        switch y {
+            default: {
+                continue;
+            }
+        }
+    }
+
+    // In separate loop to avoid spv validation error:
+    // See https://github.com/gfx-rs/wgpu/issues/5658
+    loop {
+        // Nested degenerate switch with continue
+        switch y {
+            case 1, default: {
+                switch z {
+                    default: {
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Cases with some of the loop nested switches not containing continues.
+// See `continue_forward` module in `naga`.
+fn loop_switch_omit_continue_variable_checks(x: i32, y: i32, z: i32, w: i32) {
+    // switch in loop with no continues, we expect checks after the switch
+    // statement to not be generated
+    var pos: i32 = 0;
+    loop {
+        switch x {
+            case 1: {
+                pos = 1;
+            }
+            default: {}
+        }
+        // check here can be omitted
+    }
+
+    loop {
+        switch x {
+            case 1: {}
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch z {
+                            case 1: {
+                                pos = 2;
+                            }
+                            default: {}
+                        }
+                        // check here can be omitted
+                    }
+                }
+                // check needs to be generated here
+            }
+            default: {}
+        }
+        // check needs to be generated here
+    }
+}
diff --git a/naga/tests/out/glsl/control-flow.main.Compute.glsl b/naga/tests/out/glsl/control-flow.main.Compute.glsl
index b877f9cb69..391fca84f4 100644
--- a/naga/tests/out/glsl/control-flow.main.Compute.glsl
+++ b/naga/tests/out/glsl/control-flow.main.Compute.glsl
@@ -7,11 +7,9 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 
 void switch_default_break(int i) {
-    switch(i) {
-        default: {
-            break;
-        }
-    }
+    do {
+        break;
+    } while(false);
 }
 
 void switch_case_break() {
@@ -40,6 +38,110 @@ void loop_switch_continue(int x) {
     return;
 }
 
+void loop_switch_continue_nesting(int x_1, int y, int z) {
+    while(true) {
+        switch(x_1) {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        while(true) {
+                            switch(z) {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        bool should_continue = false;
+        do {
+            should_continue = true;
+            break;
+        } while(false);
+        if (should_continue) {
+            continue;
+        }
+    }
+    while(true) {
+        bool should_continue_1 = false;
+        do {
+            do {
+                should_continue_1 = true;
+                break;
+            } while(false);
+            if (should_continue_1) {
+                break;
+            }
+        } while(false);
+        if (should_continue_1) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) {
+    int pos_1 = 0;
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    return;
+}
+
 void main() {
     uvec3 global_id = gl_GlobalInvocationID;
     int pos = 0;
@@ -47,12 +149,9 @@ void main() {
     barrier();
     memoryBarrierShared();
     barrier();
-    switch(1) {
-        default: {
-            pos = 1;
-            break;
-        }
-    }
+    do {
+        pos = 1;
+    } while(false);
     int _e4 = pos;
     switch(_e4) {
         case 1: {
diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl
index 1e253add21..2438858a8a 100644
--- a/naga/tests/out/hlsl/control-flow.hlsl
+++ b/naga/tests/out/hlsl/control-flow.hlsl
@@ -1,10 +1,8 @@
 void switch_default_break(int i)
 {
-    switch(i) {
-        default: {
-            break;
-        }
-    }
+    do {
+        break;
+    } while(false);
 }
 
 void switch_case_break()
@@ -23,14 +21,149 @@ void switch_case_break()
 void loop_switch_continue(int x)
 {
     while(true) {
+        bool should_continue = false;
         switch(x) {
             case 1: {
-                continue;
+                should_continue = true;
+                break;
             }
             default: {
                 break;
             }
         }
+        if (should_continue) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_continue_nesting(int x_1, int y, int z)
+{
+    while(true) {
+        bool should_continue_1 = false;
+        switch(x_1) {
+            case 1: {
+                should_continue_1 = true;
+                break;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        should_continue_1 = true;
+                        break;
+                    }
+                    default: {
+                        while(true) {
+                            bool should_continue_2 = false;
+                            switch(z) {
+                                case 1: {
+                                    should_continue_2 = true;
+                                    break;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                            if (should_continue_2) {
+                                continue;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (should_continue_1) {
+                    break;
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        if (should_continue_1) {
+            continue;
+        }
+        bool should_continue_3 = false;
+        do {
+            should_continue_3 = true;
+            break;
+        } while(false);
+        if (should_continue_3) {
+            continue;
+        }
+    }
+    while(true) {
+        bool should_continue_4 = false;
+        do {
+            do {
+                should_continue_4 = true;
+                break;
+            } while(false);
+            if (should_continue_4) {
+                break;
+            }
+        } while(false);
+        if (should_continue_4) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w)
+{
+    int pos_1 = 0;
+
+    while(true) {
+        bool should_continue_5 = false;
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        bool should_continue_6 = false;
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        should_continue_6 = true;
+                        break;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (should_continue_6) {
+                    break;
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        if (should_continue_6) {
+            continue;
+        }
     }
     return;
 }
@@ -42,12 +175,9 @@ void main(uint3 global_id : SV_DispatchThreadID)
 
     DeviceMemoryBarrierWithGroupSync();
     GroupMemoryBarrierWithGroupSync();
-    switch(1) {
-        default: {
-            pos = 1;
-            break;
-        }
-    }
+    do {
+        pos = 1;
+    } while(false);
     int _e4 = pos;
     switch(_e4) {
         case 1: {
diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl
index 0d0e082e41..11771693aa 100644
--- a/naga/tests/out/msl/control-flow.msl
+++ b/naga/tests/out/msl/control-flow.msl
@@ -44,6 +44,114 @@ void loop_switch_continue(
     return;
 }
 
+void loop_switch_continue_nesting(
+    int x_1,
+    int y,
+    int z
+) {
+    while(true) {
+        switch(x_1) {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        while(true) {
+                            switch(z) {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        switch(y) {
+            default: {
+                continue;
+            }
+        }
+    }
+    while(true) {
+        switch(y) {
+            case 1:
+            default: {
+                switch(z) {
+                    default: {
+                        continue;
+                    }
+                }
+                break;
+            }
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(
+    int x_2,
+    int y_1,
+    int z_1,
+    int w
+) {
+    int pos_1 = 0;
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    return;
+}
+
 struct main_Input {
 };
 kernel void main_(
diff --git a/naga/tests/out/spv/control-flow.spvasm b/naga/tests/out/spv/control-flow.spvasm
index 2fc9337cfe..f3c3644b4f 100644
--- a/naga/tests/out/spv/control-flow.spvasm
+++ b/naga/tests/out/spv/control-flow.spvasm
@@ -1,13 +1,13 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 69
+; Bound: 134
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %36 "main" %33
-OpExecutionMode %36 LocalSize 1 1 1
-OpDecorate %33 BuiltIn GlobalInvocationId
+OpEntryPoint GLCompute %104 "main" %101
+OpExecutionMode %104 LocalSize 1 1 1
+OpDecorate %101 BuiltIn GlobalInvocationId
 %2 = OpTypeVoid
 %4 = OpTypeInt 32 0
 %3 = OpTypeVector %4 3
@@ -15,19 +15,21 @@ OpDecorate %33 BuiltIn GlobalInvocationId
 %9 = OpTypeFunction %2 %5
 %15 = OpTypeFunction %2
 %16 = OpConstant  %5  0
-%34 = OpTypePointer Input %3
-%33 = OpVariable  %34  Input
-%37 = OpConstant  %5  1
-%38 = OpConstant  %5  2
-%39 = OpConstant  %5  3
-%40 = OpConstant  %5  4
-%41 = OpConstant  %4  0
-%43 = OpTypePointer Function %5
-%44 = OpConstantNull  %5
-%46 = OpConstant  %4  2
-%47 = OpConstant  %4  1
-%48 = OpConstant  %4  72
-%49 = OpConstant  %4  264
+%37 = OpTypeFunction %2 %5 %5 %5
+%73 = OpTypeFunction %2 %5 %5 %5 %5
+%74 = OpConstant  %5  1
+%75 = OpConstant  %5  2
+%77 = OpTypePointer Function %5
+%102 = OpTypePointer Input %3
+%101 = OpVariable  %102  Input
+%105 = OpConstant  %5  3
+%106 = OpConstant  %5  4
+%107 = OpConstant  %4  0
+%109 = OpConstantNull  %5
+%111 = OpConstant  %4  2
+%112 = OpConstant  %4  1
+%113 = OpConstant  %4  72
+%114 = OpConstant  %4  264
 %8 = OpFunction  %2  None %9
 %7 = OpFunctionParameter  %5
 %6 = OpLabel
@@ -76,63 +78,198 @@ OpBranch %25
 %26 = OpLabel
 OpReturn
 OpFunctionEnd
-%36 = OpFunction  %2  None %15
+%36 = OpFunction  %2  None %37
+%33 = OpFunctionParameter  %5
+%34 = OpFunctionParameter  %5
+%35 = OpFunctionParameter  %5
 %32 = OpLabel
-%42 = OpVariable  %43  Function %44
-%35 = OpLoad  %3  %33
-OpBranch %45
+OpBranch %38
+%38 = OpLabel
+OpBranch %39
+%39 = OpLabel
+OpLoopMerge %40 %42 None
+OpBranch %41
+%41 = OpLabel
+OpSelectionMerge %43 None
+OpSwitch %33 %46 1 %44 2 %45
+%44 = OpLabel
+OpBranch %42
 %45 = OpLabel
-OpControlBarrier %46 %47 %48
-OpControlBarrier %46 %46 %49
-OpSelectionMerge %50 None
-OpSwitch %37 %51
-%51 = OpLabel
-OpStore %42 %37
+OpSelectionMerge %47 None
+OpSwitch %34 %49 1 %48
+%48 = OpLabel
+OpBranch %42
+%49 = OpLabel
 OpBranch %50
 %50 = OpLabel
-%52 = OpLoad  %5  %42
-OpSelectionMerge %53 None
-OpSwitch %52 %58 1 %54 2 %55 3 %56 4 %56 5 %57 6 %58
-%54 = OpLabel
-OpStore %42 %16
-OpBranch %53
+OpLoopMerge %51 %53 None
+OpBranch %52
+%52 = OpLabel
+OpSelectionMerge %54 None
+OpSwitch %35 %56 1 %55
 %55 = OpLabel
-OpStore %42 %37
 OpBranch %53
 %56 = OpLabel
-OpStore %42 %38
-OpBranch %53
-%57 = OpLabel
-OpStore %42 %39
-OpBranch %53
-%58 = OpLabel
-OpStore %42 %40
+OpBranch %54
+%54 = OpLabel
 OpBranch %53
 %53 = OpLabel
-OpSelectionMerge %59 None
-OpSwitch %41 %61 0 %60
-%60 = OpLabel
-OpBranch %59
-%61 = OpLabel
+OpBranch %50
+%51 = OpLabel
+OpBranch %47
+%47 = OpLabel
+OpBranch %43
+%46 = OpLabel
+OpBranch %43
+%43 = OpLabel
+OpSelectionMerge %57 None
+OpSwitch %34 %58
+%58 = OpLabel
+OpBranch %42
+%57 = OpLabel
+OpBranch %42
+%42 = OpLabel
+OpBranch %39
+%40 = OpLabel
 OpBranch %59
 %59 = OpLabel
-%62 = OpLoad  %5  %42
+OpLoopMerge %60 %62 None
+OpBranch %61
+%61 = OpLabel
 OpSelectionMerge %63 None
-OpSwitch %62 %68 1 %64 2 %65 3 %66 4 %67
+OpSwitch %34 %64 1 %64
 %64 = OpLabel
-OpStore %42 %16
-OpBranch %63
-%65 = OpLabel
-OpStore %42 %37
-OpReturn
+OpSelectionMerge %65 None
+OpSwitch %35 %66
 %66 = OpLabel
-OpStore %42 %38
+OpBranch %62
+%65 = OpLabel
+OpBranch %63
+%63 = OpLabel
+OpBranch %62
+%62 = OpLabel
+OpBranch %59
+%60 = OpLabel
 OpReturn
+OpFunctionEnd
+%72 = OpFunction  %2  None %73
+%68 = OpFunctionParameter  %5
+%69 = OpFunctionParameter  %5
+%70 = OpFunctionParameter  %5
+%71 = OpFunctionParameter  %5
 %67 = OpLabel
+%76 = OpVariable  %77  Function %16
+OpBranch %78
+%78 = OpLabel
+OpBranch %79
+%79 = OpLabel
+OpLoopMerge %80 %82 None
+OpBranch %81
+%81 = OpLabel
+OpSelectionMerge %83 None
+OpSwitch %68 %85 1 %84
+%84 = OpLabel
+OpStore %76 %74
+OpBranch %83
+%85 = OpLabel
+OpBranch %83
+%83 = OpLabel
+OpBranch %82
+%82 = OpLabel
+OpBranch %79
+%80 = OpLabel
+OpBranch %86
+%86 = OpLabel
+OpLoopMerge %87 %89 None
+OpBranch %88
+%88 = OpLabel
+OpSelectionMerge %90 None
+OpSwitch %68 %93 1 %91 2 %92
+%91 = OpLabel
+OpBranch %90
+%92 = OpLabel
+OpSelectionMerge %94 None
+OpSwitch %69 %96 1 %95
+%95 = OpLabel
+OpBranch %89
+%96 = OpLabel
+OpSelectionMerge %97 None
+OpSwitch %70 %99 1 %98
+%98 = OpLabel
+OpStore %76 %75
+OpBranch %97
+%99 = OpLabel
+OpBranch %97
+%97 = OpLabel
+OpBranch %94
+%94 = OpLabel
+OpBranch %90
+%93 = OpLabel
+OpBranch %90
+%90 = OpLabel
+OpBranch %89
+%89 = OpLabel
+OpBranch %86
+%87 = OpLabel
 OpReturn
-%68 = OpLabel
-OpStore %42 %39
+OpFunctionEnd
+%104 = OpFunction  %2  None %15
+%100 = OpLabel
+%108 = OpVariable  %77  Function %109
+%103 = OpLoad  %3  %101
+OpBranch %110
+%110 = OpLabel
+OpControlBarrier %111 %112 %113
+OpControlBarrier %111 %111 %114
+OpSelectionMerge %115 None
+OpSwitch %74 %116
+%116 = OpLabel
+OpStore %108 %74
+OpBranch %115
+%115 = OpLabel
+%117 = OpLoad  %5  %108
+OpSelectionMerge %118 None
+OpSwitch %117 %123 1 %119 2 %120 3 %121 4 %121 5 %122 6 %123
+%119 = OpLabel
+OpStore %108 %16
+OpBranch %118
+%120 = OpLabel
+OpStore %108 %74
+OpBranch %118
+%121 = OpLabel
+OpStore %108 %75
+OpBranch %118
+%122 = OpLabel
+OpStore %108 %105
+OpBranch %118
+%123 = OpLabel
+OpStore %108 %106
+OpBranch %118
+%118 = OpLabel
+OpSelectionMerge %124 None
+OpSwitch %107 %126 0 %125
+%125 = OpLabel
+OpBranch %124
+%126 = OpLabel
+OpBranch %124
+%124 = OpLabel
+%127 = OpLoad  %5  %108
+OpSelectionMerge %128 None
+OpSwitch %127 %133 1 %129 2 %130 3 %131 4 %132
+%129 = OpLabel
+OpStore %108 %16
+OpBranch %128
+%130 = OpLabel
+OpStore %108 %74
 OpReturn
-%63 = OpLabel
+%131 = OpLabel
+OpStore %108 %75
+OpReturn
+%132 = OpLabel
+OpReturn
+%133 = OpLabel
+OpStore %108 %105
+OpReturn
+%128 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/control-flow.wgsl b/naga/tests/out/wgsl/control-flow.wgsl
index dcc3f90365..ad071af58a 100644
--- a/naga/tests/out/wgsl/control-flow.wgsl
+++ b/naga/tests/out/wgsl/control-flow.wgsl
@@ -30,6 +30,92 @@ fn loop_switch_continue(x: i32) {
     return;
 }
 
+fn loop_switch_continue_nesting(x_1: i32, y: i32, z: i32) {
+    loop {
+        switch x_1 {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        loop {
+                            switch z {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            default: {
+            }
+        }
+        switch y {
+            default: {
+                continue;
+            }
+        }
+    }
+    loop {
+        switch y {
+            case 1, default: {
+                switch z {
+                    default: {
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+fn loop_switch_omit_continue_variable_checks(x_2: i32, y_1: i32, z_1: i32, w: i32) {
+    var pos_1: i32 = 0i;
+
+    loop {
+        switch x_2 {
+            case 1: {
+                pos_1 = 1i;
+            }
+            default: {
+            }
+        }
+    }
+    loop {
+        switch x_2 {
+            case 1: {
+            }
+            case 2: {
+                switch y_1 {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch z_1 {
+                            case 1: {
+                                pos_1 = 2i;
+                            }
+                            default: {
+                            }
+                        }
+                    }
+                }
+            }
+            default: {
+            }
+        }
+    }
+    return;
+}
+
 @compute @workgroup_size(1, 1, 1) 
 fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
     var pos: i32;
diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs
index d90ee8d84a..fa330f0a96 100644
--- a/naga/xtask/src/validate.rs
+++ b/naga/xtask/src/validate.rs
@@ -208,7 +208,10 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
         buf
     };
     let expected_header_prefix = "; Version: ";
-    let Some(version) = second_line.strip_prefix(expected_header_prefix) else {
+    let Some(version) = second_line
+        .strip_prefix(expected_header_prefix)
+        .map(str::trim)
+    else {
         bail!("no {expected_header_prefix:?} header found in {path:?}");
     };
     let file = open_file(path)?;
@@ -222,7 +225,18 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
     let child = spirv_as_cmd
         .spawn()
         .with_context(|| format!("failed to spawn {spirv_as_cmd:?}"))?;
-    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap())).success()
+    let error_message = || {
+        format!(
+            "Failed to validate {path:?}.
+Note: Labels and line numbers will not match the input file.
+      Use this command to view the corresponding spvasm:
+      '{spirv_as} --target-env spv{version} {} -o - | spirv-dis'\n",
+            path.display(),
+        )
+    };
+    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap()))
+        .success()
+        .with_context(error_message)
 }
 
 fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> {
diff --git a/tests/src/init.rs b/tests/src/init.rs
index 3a11b3abe3..140bb202fc 100644
--- a/tests/src/init.rs
+++ b/tests/src/init.rs
@@ -11,7 +11,7 @@ pub fn init_logger() {
 }
 
 /// Initialize a wgpu instance with the options from the environment.
-pub fn initialize_instance() -> Instance {
+pub fn initialize_instance(force_fxc: bool) -> Instance {
     // We ignore `WGPU_BACKEND` for now, merely using test filtering to only run a single backend's tests.
     //
     // We can potentially work support back into the test runner in the future, but as the adapters are matched up
@@ -27,7 +27,13 @@ pub fn initialize_instance() -> Instance {
     } else {
         Backends::all()
     };
-    let dx12_shader_compiler = wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default();
+    // Some tests need to be able to force demote to FXC, to specifically test workarounds for FXC
+    // behavior.
+    let dx12_shader_compiler = if force_fxc {
+        wgpu::Dx12Compiler::Fxc
+    } else {
+        wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default()
+    };
     let gles_minor_version = wgpu::util::gles_minor_version_from_env().unwrap_or_default();
     Instance::new(wgpu::InstanceDescriptor {
         backends,
@@ -38,8 +44,11 @@ pub fn initialize_instance() -> Instance {
 }
 
 /// Initialize a wgpu adapter, taking the `n`th adapter from the instance.
-pub async fn initialize_adapter(adapter_index: usize) -> (Instance, Adapter, Option<SurfaceGuard>) {
-    let instance = initialize_instance();
+pub async fn initialize_adapter(
+    adapter_index: usize,
+    force_fxc: bool,
+) -> (Instance, Adapter, Option<SurfaceGuard>) {
+    let instance = initialize_instance(force_fxc);
     #[allow(unused_variables)]
     let surface: Option<wgpu::Surface>;
     let surface_guard: Option<SurfaceGuard>;
diff --git a/tests/src/params.rs b/tests/src/params.rs
index 2f54e65bbb..e5d50a4859 100644
--- a/tests/src/params.rs
+++ b/tests/src/params.rs
@@ -19,6 +19,11 @@ pub struct TestParameters {
     pub required_downlevel_caps: DownlevelCapabilities,
     pub required_limits: Limits,
 
+    /// On Dx12, specifically test against the Fxc compiler.
+    ///
+    /// For testing workarounds to Fxc bugs.
+    pub force_fxc: bool,
+
     /// Conditions under which this test should be skipped.
     pub skips: Vec<FailureCase>,
 
@@ -32,6 +37,7 @@ impl Default for TestParameters {
             required_features: Features::empty(),
             required_downlevel_caps: LOWEST_DOWNLEVEL_PROPERTIES,
             required_limits: Limits::downlevel_webgl2_defaults(),
+            force_fxc: false,
             skips: Vec::new(),
             failures: Vec::new(),
         }
@@ -63,6 +69,11 @@ impl TestParameters {
         self
     }
 
+    pub fn force_fxc(mut self, force_fxc: bool) -> Self {
+        self.force_fxc = force_fxc;
+        self
+    }
+
     /// Mark the test as always failing, but not to be skipped.
     pub fn expect_fail(mut self, when: FailureCase) -> Self {
         self.failures.push(when);
diff --git a/tests/src/run.rs b/tests/src/run.rs
index 82ddb93399..82c1d34e69 100644
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@@ -42,7 +42,8 @@ pub async fn execute_test(
 
     let _test_guard = isolation::OneTestPerProcessGuard::new();
 
-    let (instance, adapter, _surface_guard) = initialize_adapter(adapter_index).await;
+    let (instance, adapter, _surface_guard) =
+        initialize_adapter(adapter_index, config.params.force_fxc).await;
 
     let adapter_info = adapter.get_info();
     let adapter_downlevel_capabilities = adapter.get_downlevel_capabilities();
diff --git a/tests/tests/create_surface_error.rs b/tests/tests/create_surface_error.rs
index 87aeb15726..e3b48cb757 100644
--- a/tests/tests/create_surface_error.rs
+++ b/tests/tests/create_surface_error.rs
@@ -6,7 +6,7 @@
 #[wasm_bindgen_test::wasm_bindgen_test]
 fn canvas_get_context_returned_null() {
     // Not using the normal testing infrastructure because that goes straight to creating the canvas for us.
-    let instance = wgpu_test::initialize_instance();
+    let instance = wgpu_test::initialize_instance(false);
     // Create canvas
     let canvas = wgpu_test::initialize_html_canvas();
 
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index f932faa2f1..ae463cca46 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -107,7 +107,7 @@ static REQUEST_DEVICE_ERROR_MESSAGE_NATIVE: GpuTestConfiguration =
 async fn request_device_error_message() {
     // Not using initialize_test() because that doesn't let us catch the error
     // nor .await anything
-    let (_instance, adapter, _surface_guard) = wgpu_test::initialize_adapter(0).await;
+    let (_instance, adapter, _surface_guard) = wgpu_test::initialize_adapter(0, false).await;
 
     let device_error = adapter
         .request_device(
diff --git a/tests/tests/regression/issue_4485.rs b/tests/tests/regression/issue_4485.rs
new file mode 100644
index 0000000000..101712fe02
--- /dev/null
+++ b/tests/tests/regression/issue_4485.rs
@@ -0,0 +1,106 @@
+use wgpu_test::{gpu_test, image, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// FXC doesn't accept `continue` inside a switch. Instead we store a flag for whether
+/// the loop should continue that is checked after the switch.
+///
+/// See <https://github.com/gfx-rs/wgpu/issues/4485>.
+///
+/// The shader will fail to compile on Dx12 with FXC without this fix.
+///
+/// This also tests that shaders generated with this fix execute correctly.
+#[gpu_test]
+static CONTINUE_SWITCH: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().force_fxc(true))
+    .run_async(|ctx| async move { test_impl(&ctx).await });
+
+async fn test_impl(ctx: &TestingContext) {
+    const TEXTURE_HEIGHT: u32 = 2;
+    const TEXTURE_WIDTH: u32 = 2;
+    const BUFFER_SIZE: usize = (TEXTURE_WIDTH * TEXTURE_HEIGHT * 4) as usize;
+
+    let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("Offscreen texture"),
+        size: wgpu::Extent3d {
+            width: TEXTURE_WIDTH,
+            height: TEXTURE_HEIGHT,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[],
+    });
+    let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("issue_4514.wgsl"));
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Pipeline"),
+            layout: None,
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: "vs_main",
+                compilation_options: Default::default(),
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: "fs_main",
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        });
+
+    let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Renderpass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        // Important: this isn't the color expected below
+                        load: wgpu::LoadOp::Clear(wgpu::Color {
+                            r: 0.0,
+                            g: 0.0,
+                            b: 0.0,
+                            a: 0.0,
+                        }),
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
+            });
+            render_pass.set_pipeline(&pipeline);
+            render_pass.draw(0..3, 0..1);
+        }
+        readback_buffer.copy_from(&ctx.device, &mut encoder, &texture);
+        ctx.queue.submit(Some(encoder.finish()));
+    }
+
+    let expected_data = [255; BUFFER_SIZE];
+    readback_buffer
+        .assert_buffer_contents(ctx, &expected_data)
+        .await;
+}
diff --git a/tests/tests/regression/issue_4485.wgsl b/tests/tests/regression/issue_4485.wgsl
new file mode 100644
index 0000000000..e72ed6d1ea
--- /dev/null
+++ b/tests/tests/regression/issue_4485.wgsl
@@ -0,0 +1,108 @@
+// meant to be called with 3 vertex indices: 0, 1, 2
+// draws one large triangle over the clip space like this:
+// (the asterisks represent the clip space bounds)
+//-1,1           1,1
+// ---------------------------------
+// |              *              .
+// |              *           .
+// |              *        .
+// |              *      .
+// |              *    .
+// |              * .
+// |***************
+// |            . 1,-1
+// |          .
+// |       .
+// |     .
+// |   .
+// |.
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) ->  @builtin(position) vec4<f32> {
+    let x = i32(vertex_index) / 2;
+    let y = i32(vertex_index) & 1;
+    return vec4<f32>(
+        f32(x) * 4.0 - 1.0,
+        1.0 - f32(y) * 4.0,
+        0.0, 1.0
+    );
+}
+
+
+@fragment
+fn fs_main() -> @location(0) vec4<f32> {
+    var x = 0.0;
+    loop {
+        if x != 0.0 { break; }
+        x = 0.5;
+        // Compiled to a do-while in hlsl and glsl,
+        // we want to confirm that continue applies to outer loop.
+        switch 0 {
+            default {
+                x = 1.0;
+                continue;
+            }
+        }
+        x = 0.0;
+    }
+    // expect X == 1.0
+
+    var y = 0.0;
+    loop {
+        if y != 0.0 { break; }
+        y = 0.5;
+        switch 1 {
+            case 0 {
+                continue;
+            }
+            case 1 {}
+        }
+        // test that loop doesn't continue after the switch when the continue case wasn't executed
+        y = 1.0;
+        break;
+    }
+    // expect y == 1.0
+
+    var z = 0.0;
+    loop {
+        if z != 0.0 { break; }
+        switch 0 {
+            case 0 {
+                z = 0.5;
+            }
+            case 1 {
+                z = 0.5;
+            }
+        }
+        // test that loop doesn't continue after the switch that contains no continue statements
+        z = 1.0
+    }
+    // expect z == 1.0
+
+    var w = 0.0;
+    loop {
+        if w != 0.0 { break; }
+        switch 0 {
+            case 0 {
+                loop {
+                    // continue in loop->switch->loop->switch->switch should affect inner loop
+                    switch 1 {
+                        case 0 {}
+                        case 1 {
+                            switch 0 {
+                                default { continue; }
+                            }
+                        }
+                    }
+                    w = 0.5
+                }
+            }
+            case 1 {
+                w = 0.5;
+            }
+        }
+        if w == 0.0 { w = 1.0; }
+    }
+    // expect w == 1.0
+
+    return vec4<f32>(x, y, z, w);
+}
diff --git a/tests/tests/regression/issue_4514.rs b/tests/tests/regression/issue_4514.rs
new file mode 100644
index 0000000000..f447f879bf
--- /dev/null
+++ b/tests/tests/regression/issue_4514.rs
@@ -0,0 +1,106 @@
+use wgpu_test::{gpu_test, image, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// FXC and potentially some glsl consumers have a bug when handling switch statements on a constant
+/// with just a default case. (not sure if the constant part is relevant)
+/// See <https://github.com/gfx-rs/wgpu/issues/4514>.
+///
+/// This test will fail on Dx12 with FXC if this issue is not worked around.
+///
+/// So far no specific buggy glsl consumers have been identified and it isn't known whether the
+/// bug is avoided there.
+#[gpu_test]
+static DEGENERATE_SWITCH: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().force_fxc(true))
+    .run_async(|ctx| async move { test_impl(&ctx).await });
+
+async fn test_impl(ctx: &TestingContext) {
+    const TEXTURE_HEIGHT: u32 = 2;
+    const TEXTURE_WIDTH: u32 = 2;
+    const BUFFER_SIZE: usize = (TEXTURE_WIDTH * TEXTURE_HEIGHT * 4) as usize;
+
+    let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("Offscreen texture"),
+        size: wgpu::Extent3d {
+            width: TEXTURE_WIDTH,
+            height: TEXTURE_HEIGHT,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[],
+    });
+    let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("issue_4514.wgsl"));
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Pipeline"),
+            layout: None,
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: "vs_main",
+                compilation_options: Default::default(),
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: "fs_main",
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        });
+
+    let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Renderpass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        // Important: this isn't the color expected below
+                        load: wgpu::LoadOp::Clear(wgpu::Color {
+                            r: 0.0,
+                            g: 0.0,
+                            b: 0.0,
+                            a: 0.0,
+                        }),
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
+            });
+            render_pass.set_pipeline(&pipeline);
+            render_pass.draw(0..3, 0..1);
+        }
+        readback_buffer.copy_from(&ctx.device, &mut encoder, &texture);
+        ctx.queue.submit(Some(encoder.finish()));
+    }
+
+    let expected_data = [255; BUFFER_SIZE];
+    readback_buffer
+        .assert_buffer_contents(ctx, &expected_data)
+        .await;
+}
diff --git a/tests/tests/regression/issue_4514.wgsl b/tests/tests/regression/issue_4514.wgsl
new file mode 100644
index 0000000000..d4bd2f80c0
--- /dev/null
+++ b/tests/tests/regression/issue_4514.wgsl
@@ -0,0 +1,68 @@
+// meant to be called with 3 vertex indices: 0, 1, 2
+// draws one large triangle over the clip space like this:
+// (the asterisks represent the clip space bounds)
+//-1,1           1,1
+// ---------------------------------
+// |              *              .
+// |              *           .
+// |              *        .
+// |              *      .
+// |              *    .
+// |              * .
+// |***************
+// |            . 1,-1
+// |          .
+// |       .
+// |     .
+// |   .
+// |.
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) ->  @builtin(position) vec4<f32> {
+    let x = i32(vertex_index) / 2;
+    let y = i32(vertex_index) & 1;
+    return vec4<f32>(
+        f32(x) * 4.0 - 1.0,
+        1.0 - f32(y) * 4.0,
+        0.0, 1.0
+    );
+}
+
+
+@fragment
+fn fs_main(@builtin(position) coord_in: vec4<f32>) -> @location(0) vec4<f32> {
+    var x = 0.0;
+    // Succeeds on FXC without workaround.
+    switch i32(coord_in.x) {
+        default {
+            x = 1.0;
+        }
+    }
+    var y = 0.0;
+    // Fails on FXC without workaround.
+    // (even if we adjust switch above to give different x values based on the input coord)
+    switch i32(x * 30.0) {
+        default {
+            y = 1.0;
+        }
+    }
+    var z = 0.0;
+    // Multiple cases with a single body also fails on FXC without a workaround.
+    switch 0 {
+        case 0, 2, default {
+            z = 1.0;
+        }
+    }
+
+    var w = 0.0;
+    // Succeeds on FXC without workaround.
+    switch 0 {
+        case 0 {
+            w = 1.0;
+        }
+        default {
+            w = 1.0;
+        }
+    }
+
+    return vec4<f32>(x, y, z, w);
+}
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 384cfcf78f..df0dce5fed 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -3,6 +3,8 @@ mod regression {
     mod issue_3457;
     mod issue_4024;
     mod issue_4122;
+    mod issue_4485;
+    mod issue_4514;
     mod issue_5553;
 }
 

From 0aca442d1563504773d683c3d57d78cda4d3123c Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Wed, 24 Jul 2024 13:55:10 +0700
Subject: [PATCH 074/226] typos: More precise config, remove refs to "implace"
 (#6018)

The config can be made more precise so as to not accidentally
ignore some issues due to case (in-)sensitivity and searching for
substrings with `extend-words`.

Additionally, we can check the configuration directories as
well like `.github`.

The usage of `implace_it` went away some time ago, but not all
references were removed.
---
 .github/dependabot.yml     |  2 +-
 typos.toml                 | 20 +++++++++++++++-----
 wgpu-hal/src/vulkan/mod.rs |  2 --
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 3958eade2c..edfc210ef8 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -7,7 +7,7 @@ updates:
       interval: weekly
     # This allows dependabot to update _all_ lockfile packages.
     #
-    # These will be grouped into the existing group update PRs, so shoudn't generate additional jobs.
+    # These will be grouped into the existing group update PRs, so shouldn't generate additional jobs.
     allow:
       # Allow both direct and indirect updates for all packages
       - dependency-type: "all"
diff --git a/typos.toml b/typos.toml
index cb33d95bd9..47406a8074 100644
--- a/typos.toml
+++ b/typos.toml
@@ -1,5 +1,8 @@
 [files]
+# Include .github, .cargo, etc.
+ignore-hidden = false
 extend-exclude = [
+    '/.git',
     # spirv-asm isn't real source code
     '*.spvasm',
     'etc/big-picture.xml',
@@ -13,15 +16,22 @@ extend-exclude = [
 [default.extend-words]
 # Things that aren't typos
 lod = "lod"
-inout = "inout"
-derivate = "derivate"
-implace = "implace"
-Ded = "Ded"           # This shows up in "ANDed"
-pn = "pn"             # used as a normal name in debug-symbol-terrain.wgsl
 
 # Usernames
 Healthire = "Healthire"
 REASY = "REASY"
 
 [type.rust.extend-identifiers]
+ANDed = "ANDed"
 D3DCOLORtoUBYTE4 = "D3DCOLORtoUBYTE4"
+Derivate = "Derivate"
+inout = "inout"
+
+[type.wgsl]
+extend-glob = ["*.wgsl"]
+
+[type.wgsl.extend-identifiers]
+pn = "pn"
+
+[type.yaml.extend-words]
+dota = "dota"
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index f0d881614c..d4be64548a 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -7,7 +7,6 @@ Ash expects slices, which we don't generally have available.
 We cope with this requirement by the combination of the following ways:
   - temporarily allocating `Vec` on heap, where overhead is permitted
   - growing temporary local storage
-  - using `implace_it` on iterators
 
 ## Framebuffers and Render passes
 
@@ -714,7 +713,6 @@ impl Temp {
         self.marker.clear();
         self.buffer_barriers.clear();
         self.image_barriers.clear();
-        //see also - https://github.com/NotIntMan/inplace_it/issues/8
     }
 
     fn make_c_str(&mut self, name: &str) -> &CStr {

From 9b680e69971dddd2fc890d0a7bfd50c95d2d0f86 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Wed, 24 Jul 2024 08:56:14 +0200
Subject: [PATCH 075/226] Undo doing less bench iterations under `cfg(test)`
 (#6021)

---
 benches/benches/computepass.rs | 6 ------
 benches/benches/renderpass.rs  | 4 ----
 2 files changed, 10 deletions(-)

diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
index 6ddbf55620..c42e16a136 100644
--- a/benches/benches/computepass.rs
+++ b/benches/benches/computepass.rs
@@ -10,20 +10,14 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-#[cfg(not(test))]
 const DISPATCH_COUNT: usize = 10_000;
-#[cfg(test)]
-const DISPATCH_COUNT: usize = 8; // Running with up to 8 threads.
 
 // Currently bindless is _much_ slower than with regularly resources,
 // since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
 // This is in fact so slow that it makes the benchmark unusable when we use the same amount of
 // resources as the regular benchmark.
 // For details see https://github.com/gfx-rs/wgpu/issues/5766
-#[cfg(not(test))]
 const DISPATCH_COUNT_BINDLESS: usize = 1_000;
-#[cfg(test)]
-const DISPATCH_COUNT_BINDLESS: usize = 8; // Running with up to 8 threads.
 
 // Must match the number of textures in the computepass.wgsl shader
 const TEXTURES_PER_DISPATCH: usize = 2;
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index 9a204c0f79..37387b4fdf 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -10,10 +10,6 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-#[cfg(test)]
-const DRAW_COUNT: usize = 8; // Running with up to 8 threads.
-
-#[cfg(not(test))]
 const DRAW_COUNT: usize = 10_000;
 
 // Must match the number of textures in the renderpass.wgsl shader

From 7b2e08fb94f0dd93236f9b050cc4f5538d473d96 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 24 Jul 2024 02:56:45 -0400
Subject: [PATCH 076/226] refactor: satisfy
 `clippy::missing_transmute_annotations` (#6024)

* refactor(metal): satisfy `clippy::missing_transmute_annotations`

* refactor(gles): satisfy `clippy::missing_transmute_annotations`

* refactor(metal): `metal::Surface::view`: use `ptr::cast` instead of `as`
---
 wgpu-core/src/instance.rs     | 17 ++++++++++++++---
 wgpu-hal/src/gles/device.rs   | 16 ++++++----------
 wgpu-hal/src/metal/surface.rs | 15 ++++++++++++---
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index ee50bd949f..cd38942187 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -577,9 +577,20 @@ impl Global {
             metal: Some(self.instance.metal.as_ref().map_or(
                 Err(CreateSurfaceError::BackendNotEnabled(Backend::Metal)),
                 |inst| {
-                    // we don't want to link to metal-rs for this
-                    #[allow(clippy::transmute_ptr_to_ref)]
-                    Ok(inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }))
+                    let layer = layer.cast();
+                    // SAFETY: We do this cast and deref. (rather than using `metal` to get the
+                    // object we want) to avoid direct coupling on the `metal` crate.
+                    //
+                    // To wit, this pointer…
+                    //
+                    // - …is properly aligned.
+                    // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+                    //   field.
+                    // - …points to an _initialized_ `MetalLayerRef`.
+                    // - …is only ever aliased via an immutable reference that lives within this
+                    //   lexical scope.
+                    let layer = unsafe { &*layer };
+                    Ok(inst.create_surface_from_layer(layer))
                 },
             )?),
             #[cfg(dx12)]
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 67d0a29713..4f187709a7 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -9,8 +9,6 @@ use std::{
 };
 
 use arrayvec::ArrayVec;
-#[cfg(native)]
-use std::mem;
 use std::sync::atomic::Ordering;
 
 type ShaderStage<'a> = (
@@ -178,9 +176,7 @@ impl super::Device {
         let raw = unsafe { gl.create_shader(target) }.unwrap();
         #[cfg(native)]
         if gl.supports_debug() {
-            //TODO: remove all transmutes from `object_label`
-            // https://github.com/grovesNL/glow/issues/186
-            let name = unsafe { mem::transmute(raw) };
+            let name = raw.0.get();
             unsafe { gl.object_label(glow::SHADER, name, label) };
         }
 
@@ -366,7 +362,7 @@ impl super::Device {
         #[cfg(native)]
         if let Some(label) = label {
             if private_caps.contains(PrivateCapabilities::DEBUG_FNS) {
-                let name = unsafe { mem::transmute(program) };
+                let name = program.0.get();
                 unsafe { gl.object_label(glow::PROGRAM, name, Some(label)) };
             }
         }
@@ -621,7 +617,7 @@ impl crate::Device for super::Device {
                 .private_caps
                 .contains(PrivateCapabilities::DEBUG_FNS)
             {
-                let name = unsafe { mem::transmute(raw) };
+                let name = raw.map_or(0, |buf| buf.0.get());
                 unsafe { gl.object_label(glow::BUFFER, name, Some(label)) };
             }
         }
@@ -768,7 +764,7 @@ impl crate::Device for super::Device {
                     .private_caps
                     .contains(PrivateCapabilities::DEBUG_FNS)
                 {
-                    let name = unsafe { mem::transmute(raw) };
+                    let name = raw.0.get();
                     unsafe { gl.object_label(glow::RENDERBUFFER, name, Some(label)) };
                 }
             }
@@ -936,7 +932,7 @@ impl crate::Device for super::Device {
                     .private_caps
                     .contains(PrivateCapabilities::DEBUG_FNS)
                 {
-                    let name = unsafe { mem::transmute(raw) };
+                    let name = raw.0.get();
                     unsafe { gl.object_label(glow::TEXTURE, name, Some(label)) };
                 }
             }
@@ -1088,7 +1084,7 @@ impl crate::Device for super::Device {
                 .private_caps
                 .contains(PrivateCapabilities::DEBUG_FNS)
             {
-                let name = unsafe { mem::transmute(raw) };
+                let name = raw.0.get();
                 unsafe { gl.object_label(glow::SAMPLER, name, Some(label)) };
             }
         }
diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs
index 1a11056609..b0ea55e9fe 100644
--- a/wgpu-hal/src/metal/surface.rs
+++ b/wgpu-hal/src/metal/surface.rs
@@ -1,6 +1,6 @@
 #![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type
 
-use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread};
+use std::{os::raw::c_void, ptr::NonNull, sync::Once, thread};
 
 use core_graphics_types::{
     base::CGFloat,
@@ -82,10 +82,19 @@ impl super::Surface {
         view: *mut c_void,
         delegate: Option<&HalManagedMetalLayerDelegate>,
     ) -> Self {
-        let view = view as *mut Object;
+        let view = view.cast::<Object>();
         let render_layer = {
             let layer = unsafe { Self::get_metal_layer(view, delegate) };
-            unsafe { mem::transmute::<_, &metal::MetalLayerRef>(layer) }
+            let layer = layer.cast::<metal::MetalLayerRef>();
+            // SAFETY: This pointer…
+            //
+            // - …is properly aligned.
+            // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+            //   field.
+            // - …points to an _initialized_ `MetalLayerRef`.
+            // - …is only ever aliased via an immutable reference that lives within this
+            //   lexical scope.
+            unsafe { &*layer }
         }
         .to_owned();
         let _: *mut c_void = msg_send![view, retain];

From 4af1991569784abcac3456328c6682bf84784b10 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 24 Jul 2024 09:19:51 +0200
Subject: [PATCH 077/226] build(deps): bump crate-ci/typos from 1.23.2 to
 1.23.3 (#6028)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.2 to 1.23.3.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.23.2...v1.23.3)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a03a08f7ca..edf8501c57 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -631,7 +631,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.23.2
+        uses: crate-ci/typos@v1.23.3
 
   check-cts-runner:
     # runtime is normally 2 minutes

From e216566e48502a32b4ca1f5b2db49515aa7b80db Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 29 Jan 2024 21:41:55 -0500
Subject: [PATCH 078/226] feat(shader)!: make `ProgrammableStage::entry_point`
 optional

---
 benches/benches/computepass.rs                |  4 ++--
 benches/benches/renderpass.rs                 |  8 +++----
 examples/src/boids/mod.rs                     |  6 ++---
 examples/src/bunnymark/mod.rs                 |  4 ++--
 examples/src/conservative_raster/mod.rs       | 16 +++++++-------
 examples/src/cube/mod.rs                      |  8 +++----
 examples/src/hello_compute/mod.rs             |  2 +-
 examples/src/hello_synchronization/mod.rs     |  4 ++--
 examples/src/hello_triangle/mod.rs            |  4 ++--
 examples/src/hello_workgroups/mod.rs          |  2 +-
 examples/src/mipmap/mod.rs                    |  8 +++----
 examples/src/msaa_line/mod.rs                 |  4 ++--
 examples/src/render_to_texture/mod.rs         |  4 ++--
 examples/src/repeated_compute/mod.rs          |  2 +-
 examples/src/shadow/mod.rs                    |  8 +++----
 examples/src/skybox/mod.rs                    |  8 +++----
 examples/src/srgb_blend/mod.rs                |  4 ++--
 examples/src/stencil_triangles/mod.rs         |  8 +++----
 examples/src/storage_texture/mod.rs           |  2 +-
 examples/src/texture_arrays/mod.rs            |  4 ++--
 examples/src/timestamp_queries/mod.rs         |  6 ++---
 examples/src/uniform_values/mod.rs            |  4 ++--
 examples/src/water/mod.rs                     |  8 +++----
 tests/src/image.rs                            |  2 +-
 tests/tests/bgra8unorm_storage.rs             |  2 +-
 tests/tests/bind_group_layout_dedup.rs        | 10 ++++-----
 tests/tests/buffer.rs                         |  4 ++--
 tests/tests/compute_pass_ownership.rs         |  2 +-
 tests/tests/device.rs                         | 10 ++++-----
 tests/tests/mem_leaks.rs                      |  4 ++--
 tests/tests/nv12_texture/mod.rs               |  4 ++--
 tests/tests/occlusion_query/mod.rs            |  2 +-
 tests/tests/partially_bounded_arrays/mod.rs   |  2 +-
 tests/tests/pipeline.rs                       |  4 ++--
 tests/tests/pipeline_cache.rs                 |  4 ++--
 tests/tests/push_constants.rs                 |  2 +-
 tests/tests/regression/issue_3349.rs          |  4 ++--
 tests/tests/regression/issue_3457.rs          |  8 +++----
 tests/tests/regression/issue_5553.rs          |  4 ++--
 tests/tests/render_pass_ownership.rs          |  4 ++--
 tests/tests/scissor_tests/mod.rs              |  4 ++--
 tests/tests/shader/mod.rs                     |  2 +-
 tests/tests/shader/zero_init_workgroup_mem.rs |  4 ++--
 tests/tests/shader_primitive_index/mod.rs     |  4 ++--
 tests/tests/shader_view_format/mod.rs         |  5 +++--
 tests/tests/subgroup_operations/mod.rs        |  2 +-
 tests/tests/vertex_formats/mod.rs             |  4 ++--
 tests/tests/vertex_indices/mod.rs             |  6 ++---
 wgpu/src/api/compute_pipeline.rs              | 11 +++++++---
 wgpu/src/api/render_pipeline.rs               | 22 ++++++++++++++-----
 wgpu/src/backend/webgpu.rs                    | 12 +++++++---
 wgpu/src/backend/wgpu_core.rs                 |  6 ++---
 52 files changed, 152 insertions(+), 130 deletions(-)

diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
index c42e16a136..9a69eb46eb 100644
--- a/benches/benches/computepass.rs
+++ b/benches/benches/computepass.rs
@@ -236,7 +236,7 @@ impl ComputepassState {
                     label: Some("Compute Pipeline"),
                     layout: Some(&pipeline_layout),
                     module: &sm,
-                    entry_point: "cs_main",
+                    entry_point: Some("cs_main"),
                     compilation_options: wgpu::PipelineCompilationOptions::default(),
                     cache: None,
                 });
@@ -331,7 +331,7 @@ impl ComputepassState {
                         label: Some("Compute Pipeline bindless"),
                         layout: Some(&bindless_pipeline_layout),
                         module: &bindless_sm,
-                        entry_point: "cs_main",
+                        entry_point: Some("cs_main"),
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                         cache: None,
                     });
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index 37387b4fdf..f31fc07580 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -182,7 +182,7 @@ impl RenderpassState {
                     layout: Some(&pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &sm,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         buffers: &vertex_buffer_layouts,
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                     },
@@ -199,7 +199,7 @@ impl RenderpassState {
                     multisample: wgpu::MultisampleState::default(),
                     fragment: Some(wgpu::FragmentState {
                         module: &sm,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         targets: &[Some(wgpu::ColorTargetState {
                             format: wgpu::TextureFormat::Rgba8UnormSrgb,
                             blend: None,
@@ -280,7 +280,7 @@ impl RenderpassState {
                     layout: Some(&bindless_pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &bindless_shader_module,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         buffers: &vertex_buffer_layouts,
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                     },
@@ -297,7 +297,7 @@ impl RenderpassState {
                     multisample: wgpu::MultisampleState::default(),
                     fragment: Some(wgpu::FragmentState {
                         module: &bindless_shader_module,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         targets: &[Some(wgpu::ColorTargetState {
                             format: wgpu::TextureFormat::Rgba8UnormSrgb,
                             blend: None,
diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs
index 7b1b8f0bc3..8c3581824b 100644
--- a/examples/src/boids/mod.rs
+++ b/examples/src/boids/mod.rs
@@ -131,7 +131,7 @@ impl crate::framework::Example for Example {
             layout: Some(&render_pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &draw_shader,
-                entry_point: "main_vs",
+                entry_point: Some("main_vs"),
                 compilation_options: Default::default(),
                 buffers: &[
                     wgpu::VertexBufferLayout {
@@ -148,7 +148,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &draw_shader,
-                entry_point: "main_fs",
+                entry_point: Some("main_fs"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -165,7 +165,7 @@ impl crate::framework::Example for Example {
             label: Some("Compute pipeline"),
             layout: Some(&compute_pipeline_layout),
             module: &compute_shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs
index b5b33b54d5..54bdc2a941 100644
--- a/examples/src/bunnymark/mod.rs
+++ b/examples/src/bunnymark/mod.rs
@@ -202,13 +202,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs
index 116ed8623b..d029134756 100644
--- a/examples/src/conservative_raster/mod.rs
+++ b/examples/src/conservative_raster/mod.rs
@@ -96,13 +96,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout_empty),
                 vertex: wgpu::VertexState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "fs_main_red",
+                    entry_point: Some("fs_main_red"),
                     compilation_options: Default::default(),
                     targets: &[Some(RENDER_TARGET_FORMAT.into())],
                 }),
@@ -122,13 +122,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout_empty),
                 vertex: wgpu::VertexState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "fs_main_blue",
+                    entry_point: Some("fs_main_blue"),
                     compilation_options: Default::default(),
                     targets: &[Some(RENDER_TARGET_FORMAT.into())],
                 }),
@@ -149,13 +149,13 @@ impl crate::framework::Example for Example {
                     layout: Some(&pipeline_layout_empty),
                     vertex: wgpu::VertexState {
                         module: &shader_triangle_and_lines,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         compilation_options: Default::default(),
                         buffers: &[],
                     },
                     fragment: Some(wgpu::FragmentState {
                         module: &shader_triangle_and_lines,
-                        entry_point: "fs_main_white",
+                        entry_point: Some("fs_main_white"),
                         compilation_options: Default::default(),
                         targets: &[Some(config.view_formats[0].into())],
                     }),
@@ -213,13 +213,13 @@ impl crate::framework::Example for Example {
                     layout: Some(&pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &shader,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         compilation_options: Default::default(),
                         buffers: &[],
                     },
                     fragment: Some(wgpu::FragmentState {
                         module: &shader,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         compilation_options: Default::default(),
                         targets: &[Some(config.view_formats[0].into())],
                     }),
diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs
index 9828157e57..608fae0088 100644
--- a/examples/src/cube/mod.rs
+++ b/examples/src/cube/mod.rs
@@ -243,13 +243,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -272,13 +272,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &vertex_buffers,
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: "fs_wire",
+                    entry_point: Some("fs_wire"),
                     compilation_options: Default::default(),
                     targets: &[Some(wgpu::ColorTargetState {
                         format: config.view_formats[0],
diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs
index fb23e13955..7f3c3f05bf 100644
--- a/examples/src/hello_compute/mod.rs
+++ b/examples/src/hello_compute/mod.rs
@@ -109,7 +109,7 @@ async fn execute_gpu_inner(
         label: None,
         layout: None,
         module: &cs_module,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs
index d98f1bb8d4..397af48c98 100644
--- a/examples/src/hello_synchronization/mod.rs
+++ b/examples/src/hello_synchronization/mod.rs
@@ -103,7 +103,7 @@ async fn execute(
         label: None,
         layout: Some(&pipeline_layout),
         module: &shaders_module,
-        entry_point: "patient_main",
+        entry_point: Some("patient_main"),
         compilation_options: Default::default(),
         cache: None,
     });
@@ -111,7 +111,7 @@ async fn execute(
         label: None,
         layout: Some(&pipeline_layout),
         module: &shaders_module,
-        entry_point: "hasty_main",
+        entry_point: Some("hasty_main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index 41c0583506..7c82d49cf0 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -59,13 +59,13 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         layout: Some(&pipeline_layout),
         vertex: wgpu::VertexState {
             module: &shader,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             buffers: &[],
             compilation_options: Default::default(),
         },
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(swapchain_format.into())],
         }),
diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs
index 0184981c05..3260aa8628 100644
--- a/examples/src/hello_workgroups/mod.rs
+++ b/examples/src/hello_workgroups/mod.rs
@@ -110,7 +110,7 @@ async fn run() {
         label: None,
         layout: Some(&pipeline_layout),
         module: &shader,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs
index 3e9250c702..33e23a474a 100644
--- a/examples/src/mipmap/mod.rs
+++ b/examples/src/mipmap/mod.rs
@@ -92,13 +92,13 @@ impl Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(TEXTURE_FORMAT.into())],
             }),
@@ -292,13 +292,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs
index 46bb743e99..e57a4461ab 100644
--- a/examples/src/msaa_line/mod.rs
+++ b/examples/src/msaa_line/mod.rs
@@ -53,7 +53,7 @@ impl Example {
             layout: Some(pipeline_layout),
             vertex: wgpu::VertexState {
                 module: shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: std::mem::size_of::<Vertex>() as wgpu::BufferAddress,
@@ -63,7 +63,7 @@ impl Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index c0922bc2ec..1d6f488d52 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -59,13 +59,13 @@ async fn run(_path: Option<String>) {
         layout: None,
         vertex: wgpu::VertexState {
             module: &shader,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             compilation_options: Default::default(),
             buffers: &[],
         },
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(wgpu::TextureFormat::Rgba8UnormSrgb.into())],
         }),
diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs
index 330b930f6f..5dac9ce7c2 100644
--- a/examples/src/repeated_compute/mod.rs
+++ b/examples/src/repeated_compute/mod.rs
@@ -245,7 +245,7 @@ impl WgpuContext {
             label: None,
             layout: Some(&pipeline_layout),
             module: &shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs
index b2c27f5892..7047ab598c 100644
--- a/examples/src/shadow/mod.rs
+++ b/examples/src/shadow/mod.rs
@@ -499,7 +499,7 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_bake",
+                    entry_point: Some("vs_bake"),
                     compilation_options: Default::default(),
                     buffers: &[vb_desc.clone()],
                 },
@@ -633,17 +633,17 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[vb_desc],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: if supports_storage_resources {
+                    entry_point: Some(if supports_storage_resources {
                         "fs_main"
                     } else {
                         "fs_main_without_storage"
-                    },
+                    }),
                     compilation_options: Default::default(),
                     targets: &[Some(config.view_formats[0].into())],
                 }),
diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs
index e526feedae..fd5532e6d1 100644
--- a/examples/src/skybox/mod.rs
+++ b/examples/src/skybox/mod.rs
@@ -198,13 +198,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_sky",
+                entry_point: Some("vs_sky"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_sky",
+                entry_point: Some("fs_sky"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -228,7 +228,7 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_entity",
+                entry_point: Some("vs_entity"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: std::mem::size_of::<Vertex>() as wgpu::BufferAddress,
@@ -238,7 +238,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_entity",
+                entry_point: Some("fs_entity"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs
index 314fc92df2..63e5e79cb5 100644
--- a/examples/src/srgb_blend/mod.rs
+++ b/examples/src/srgb_blend/mod.rs
@@ -130,13 +130,13 @@ impl<const SRGB: bool> crate::framework::Example for Example<SRGB> {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs
index 8d638d20d1..d497eccc32 100644
--- a/examples/src/stencil_triangles/mod.rs
+++ b/examples/src/stencil_triangles/mod.rs
@@ -73,13 +73,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
@@ -114,13 +114,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs
index d6a06d6e2f..76b95d09dd 100644
--- a/examples/src/storage_texture/mod.rs
+++ b/examples/src/storage_texture/mod.rs
@@ -100,7 +100,7 @@ async fn run(_path: Option<String>) {
         label: None,
         layout: Some(&pipeline_layout),
         module: &shader,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs
index b0f474b957..785b461802 100644
--- a/examples/src/texture_arrays/mod.rs
+++ b/examples/src/texture_arrays/mod.rs
@@ -320,7 +320,7 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &base_shader_module,
-                entry_point: "vert_main",
+                entry_point: Some("vert_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: vertex_size as wgpu::BufferAddress,
@@ -330,7 +330,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: fragment_shader_module,
-                entry_point: fragment_entry_point,
+                entry_point: Some(fragment_entry_point),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index d712762cfd..3edcd7b83c 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -298,7 +298,7 @@ fn compute_pass(
         label: None,
         layout: None,
         module,
-        entry_point: "main_cs",
+        entry_point: Some("main_cs"),
         compilation_options: Default::default(),
         cache: None,
     });
@@ -354,13 +354,13 @@ fn render_pass(
         layout: Some(&pipeline_layout),
         vertex: wgpu::VertexState {
             module,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             compilation_options: Default::default(),
             buffers: &[],
         },
         fragment: Some(wgpu::FragmentState {
             module,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(format.into())],
         }),
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index 629aba4328..f275853ba2 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -179,13 +179,13 @@ impl WgpuContext {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(swapchain_format.into())],
             }),
diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs
index b21ec70c4d..6b4943d45e 100644
--- a/examples/src/water/mod.rs
+++ b/examples/src/water/mod.rs
@@ -511,7 +511,7 @@ impl crate::framework::Example for Example {
             // Vertex shader and input buffers
             vertex: wgpu::VertexState {
                 module: &water_module,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 // Layout of our vertices. This should match the structs
                 // which are uploaded to the GPU. This should also be
@@ -527,7 +527,7 @@ impl crate::framework::Example for Example {
             // Fragment shader and output targets
             fragment: Some(wgpu::FragmentState {
                 module: &water_module,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 // Describes how the colour will be interpolated
                 // and assigned to the output attachment.
@@ -584,7 +584,7 @@ impl crate::framework::Example for Example {
             layout: Some(&terrain_pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &terrain_module,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: terrain_vertex_size as wgpu::BufferAddress,
@@ -594,7 +594,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &terrain_module,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/tests/src/image.rs b/tests/src/image.rs
index 19bbc1a913..e72d3ee442 100644
--- a/tests/src/image.rs
+++ b/tests/src/image.rs
@@ -368,7 +368,7 @@ fn copy_via_compute(
         label: Some("pipeline read"),
         layout: Some(&pll),
         module: &sm,
-        entry_point: "copy_texture_to_buffer",
+        entry_point: Some("copy_texture_to_buffer"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs
index 7bc117f097..0859473b2f 100644
--- a/tests/tests/bgra8unorm_storage.rs
+++ b/tests/tests/bgra8unorm_storage.rs
@@ -95,7 +95,7 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new()
         let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
             label: None,
             layout: Some(&pl),
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             module: &module,
             cache: None,
diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs
index e4262ea215..5c38779f13 100644
--- a/tests/tests/bind_group_layout_dedup.rs
+++ b/tests/tests/bind_group_layout_dedup.rs
@@ -89,7 +89,7 @@ async fn bgl_dedupe(ctx: TestingContext) {
             label: None,
             layout: Some(&pipeline_layout),
             module: &module,
-            entry_point: "no_resources",
+            entry_point: Some("no_resources"),
             compilation_options: Default::default(),
             cache: None,
         };
@@ -219,7 +219,7 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
             label: None,
             layout: Some(&pipeline_layout),
             module: &module,
-            entry_point: "no_resources",
+            entry_point: Some("no_resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -266,7 +266,7 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
             label: None,
             layout: None,
             module: &module,
-            entry_point: "resources",
+            entry_point: Some("resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -338,7 +338,7 @@ fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
         label: None,
         layout: None,
         module: &module,
-        entry_point: "resources",
+        entry_point: Some("resources"),
         compilation_options: Default::default(),
         cache: None,
     };
@@ -405,7 +405,7 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
             label: None,
             layout: None,
             module: &module,
-            entry_point: "resources",
+            entry_point: Some("resources"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/buffer.rs b/tests/tests/buffer.rs
index e2316daadc..77bc9e0640 100644
--- a/tests/tests/buffer.rs
+++ b/tests/tests/buffer.rs
@@ -225,7 +225,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_LAYOUT: GpuTestConfiguration = GpuTestConfigu
                         label: None,
                         layout: Some(&pipeline_layout),
                         module: &shader_module,
-                        entry_point: "main",
+                        entry_point: Some("main"),
                         compilation_options: Default::default(),
                         cache: None,
                     });
@@ -297,7 +297,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_DISPATCH: GpuTestConfiguration = GpuTestConfi
                 label: None,
                 layout: Some(&pipeline_layout),
                 module: &shader_module,
-                entry_point: "main",
+                entry_point: Some("main"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/compute_pass_ownership.rs b/tests/tests/compute_pass_ownership.rs
index 5c0971c6d9..80f81f4d81 100644
--- a/tests/tests/compute_pass_ownership.rs
+++ b/tests/tests/compute_pass_ownership.rs
@@ -317,7 +317,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index ae463cca46..a577379c20 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -533,7 +533,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         layout: None,
                         vertex: wgpu::VertexState {
                             module: &shader_module,
-                            entry_point: "",
+                            entry_point: Some(""),
                             compilation_options: Default::default(),
                             buffers: &[],
                         },
@@ -557,7 +557,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         label: None,
                         layout: None,
                         module: &shader_module,
-                        entry_point: "",
+                        entry_point: None,
                         compilation_options: Default::default(),
                         cache: None,
                     });
@@ -574,7 +574,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         label: None,
                         layout: None,
                         module: &shader_module,
-                        entry_point: "",
+                        entry_point: None,
                         compilation_options: Default::default(),
                         cache: None,
                     });
@@ -823,7 +823,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
             .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
                 fragment: Some(wgpu::FragmentState {
                     module: &trivial_shaders_with_some_reversed_bindings,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(wgt::ColorTargetState {
                         format: wgt::TextureFormat::Bgra8Unorm,
@@ -837,7 +837,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
                 label: None,
                 vertex: wgpu::VertexState {
                     module: &trivial_shaders_with_some_reversed_bindings,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index 3c59aec036..c0840f63fb 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -96,7 +96,7 @@ async fn draw_test_with_reports(
             vertex: wgpu::VertexState {
                 buffers: &[],
                 module: &shader,
-                entry_point: "vs_main_builtin",
+                entry_point: Some("vs_main_builtin"),
                 compilation_options: Default::default(),
             },
             primitive: wgpu::PrimitiveState::default(),
@@ -104,7 +104,7 @@ async fn draw_test_with_reports(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs
index 6b5a4e0c6b..2f149d0148 100644
--- a/tests/tests/nv12_texture/mod.rs
+++ b/tests/tests/nv12_texture/mod.rs
@@ -23,13 +23,13 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati
                 layout: None,
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(target_format.into())],
                 }),
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
index a888320e28..a9b1f12649 100644
--- a/tests/tests/occlusion_query/mod.rs
+++ b/tests/tests/occlusion_query/mod.rs
@@ -36,7 +36,7 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new()
                 layout: None,
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs
index 83f9cee382..195fd88dd4 100644
--- a/tests/tests/partially_bounded_arrays/mod.rs
+++ b/tests/tests/partially_bounded_arrays/mod.rs
@@ -68,7 +68,7 @@ static PARTIALLY_BOUNDED_ARRAY: GpuTestConfiguration = GpuTestConfiguration::new
             label: None,
             layout: Some(&pipeline_layout),
             module: &cs_module,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/pipeline.rs b/tests/tests/pipeline.rs
index 99d0e8da4a..3cf8d13dfe 100644
--- a/tests/tests/pipeline.rs
+++ b/tests/tests/pipeline.rs
@@ -29,7 +29,7 @@ static PIPELINE_DEFAULT_LAYOUT_BAD_MODULE: GpuTestConfiguration = GpuTestConfigu
                             label: Some("mandelbrot compute pipeline"),
                             layout: None,
                             module: &module,
-                            entry_point: "doesn't exist",
+                            entry_point: Some("doesn't exist"),
                             compilation_options: Default::default(),
                             cache: None,
                         });
@@ -66,7 +66,7 @@ static NO_TARGETLESS_RENDER: GpuTestConfiguration = GpuTestConfiguration::new()
                                 module: &ctx
                                     .device
                                     .create_shader_module(TRIVIAL_VERTEX_SHADER_DESC),
-                                entry_point: "main",
+                                entry_point: Some("main"),
                                 compilation_options: Default::default(),
                                 buffers: &[],
                             },
diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
index 58dae4694f..67e9e68270 100644
--- a/tests/tests/pipeline_cache.rs
+++ b/tests/tests/pipeline_cache.rs
@@ -113,7 +113,7 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                 label: Some("pipeline"),
                 layout: Some(&pipeline_layout),
                 module: &sm,
-                entry_point: "main",
+                entry_point: Some("main"),
                 compilation_options: Default::default(),
                 cache: Some(&first_cache),
             });
@@ -136,7 +136,7 @@ async fn pipeline_cache_test(ctx: TestingContext) {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: Some(&second_cache),
         });
diff --git a/tests/tests/push_constants.rs b/tests/tests/push_constants.rs
index a18207bef6..905578d533 100644
--- a/tests/tests/push_constants.rs
+++ b/tests/tests/push_constants.rs
@@ -102,7 +102,7 @@ async fn partial_update_test(ctx: TestingContext) {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs
index 35d35e5bdf..21929bd9b7 100644
--- a/tests/tests/regression/issue_3349.rs
+++ b/tests/tests/regression/issue_3349.rs
@@ -101,13 +101,13 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) {
             layout: Some(&pll),
             vertex: wgpu::VertexState {
                 module: &vs_sm,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &fs_sm,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index f0f7e64636..386b5c34bb 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -51,7 +51,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "double_buffer_vert",
+                    entry_point: Some("double_buffer_vert"),
                     compilation_options: Default::default(),
                     buffers: &[
                         VertexBufferLayout {
@@ -71,7 +71,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "double_buffer_frag",
+                    entry_point: Some("double_buffer_frag"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
@@ -90,7 +90,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "single_buffer_vert",
+                    entry_point: Some("single_buffer_vert"),
                     compilation_options: Default::default(),
                     buffers: &[VertexBufferLayout {
                         array_stride: 16,
@@ -103,7 +103,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "single_buffer_frag",
+                    entry_point: Some("single_buffer_frag"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_5553.rs b/tests/tests/regression/issue_5553.rs
index 19247eec1c..6debb03485 100644
--- a/tests/tests/regression/issue_5553.rs
+++ b/tests/tests/regression/issue_5553.rs
@@ -30,7 +30,7 @@ static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
@@ -39,7 +39,7 @@ static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/render_pass_ownership.rs b/tests/tests/render_pass_ownership.rs
index 95fc0fbdc9..502375e736 100644
--- a/tests/tests/render_pass_ownership.rs
+++ b/tests/tests/render_pass_ownership.rs
@@ -498,7 +498,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &sm,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: 4,
@@ -508,7 +508,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &sm,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(target_format.into())],
             }),
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 3f1e7df135..583be021f3 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -43,7 +43,7 @@ async fn scissor_test_impl(
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
@@ -52,7 +52,7 @@ async fn scissor_test_impl(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index f5c2d4c96b..7d6ed7aaaa 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -314,7 +314,7 @@ async fn shader_input_output_test(
                 label: Some(&format!("pipeline {test_name}")),
                 layout: Some(&pll),
                 module: &sm,
-                entry_point: "cs_main",
+                entry_point: Some("cs_main"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs
index eb774f7b35..beacb4fcc8 100644
--- a/tests/tests/shader/zero_init_workgroup_mem.rs
+++ b/tests/tests/shader/zero_init_workgroup_mem.rs
@@ -79,7 +79,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 label: Some("pipeline read"),
                 layout: Some(&pll),
                 module: &sm,
-                entry_point: "read",
+                entry_point: Some("read"),
                 compilation_options: Default::default(),
                 cache: None,
             });
@@ -90,7 +90,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 label: Some("pipeline write"),
                 layout: None,
                 module: &sm,
-                entry_point: "write",
+                entry_point: Some("write"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index 9972f81aa1..10708a24a2 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -121,7 +121,7 @@ async fn pulling_common(
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: 8,
@@ -138,7 +138,7 @@ async fn pulling_common(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index d34b8d851d..b2bc0426eb 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -92,13 +92,14 @@ async fn reinterpret(
             layout: None,
             vertex: wgpu::VertexState {
                 module: shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
+
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(src_format.into())],
             }),
diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs
index 7d0aec8241..7696fb78df 100644
--- a/tests/tests/subgroup_operations/mod.rs
+++ b/tests/tests/subgroup_operations/mod.rs
@@ -73,7 +73,7 @@ static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new()
             label: None,
             layout: Some(&pipeline_layout),
             module: &cs_module,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs
index 1d6aca5968..60ef177efa 100644
--- a/tests/tests/vertex_formats/mod.rs
+++ b/tests/tests/vertex_formats/mod.rs
@@ -250,7 +250,7 @@ async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) {
                     attributes: test.attributes,
                 }],
                 module: &shader,
-                entry_point: test.entry_point,
+                entry_point: Some(test.entry_point),
                 compilation_options: Default::default(),
             },
             primitive: wgpu::PrimitiveState::default(),
@@ -258,7 +258,7 @@ async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) {
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fragment_main",
+                entry_point: Some("fragment_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index dcc2ca82f5..5a847d0fbb 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -259,7 +259,7 @@ async fn vertex_index_common(ctx: TestingContext) {
         vertex: wgpu::VertexState {
             buffers: &[],
             module: &shader,
-            entry_point: "vs_main_builtin",
+            entry_point: Some("vs_main_builtin"),
             compilation_options: Default::default(),
         },
         primitive: wgpu::PrimitiveState::default(),
@@ -267,7 +267,7 @@ async fn vertex_index_common(ctx: TestingContext) {
         multisample: wgpu::MultisampleState::default(),
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(wgpu::ColorTargetState {
                 format: wgpu::TextureFormat::Rgba8Unorm,
@@ -280,7 +280,7 @@ async fn vertex_index_common(ctx: TestingContext) {
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
 
-    pipeline_desc.vertex.entry_point = "vs_main_buffers";
+    pipeline_desc.vertex.entry_point = Some("vs_main_buffers");
     pipeline_desc.vertex.buffers = &[
         wgpu::VertexBufferLayout {
             array_stride: 4,
diff --git a/wgpu/src/api/compute_pipeline.rs b/wgpu/src/api/compute_pipeline.rs
index d226dd5500..ea2de4b8b2 100644
--- a/wgpu/src/api/compute_pipeline.rs
+++ b/wgpu/src/api/compute_pipeline.rs
@@ -62,9 +62,14 @@ pub struct ComputePipelineDescriptor<'a> {
     pub layout: Option<&'a PipelineLayout>,
     /// The compiled shader module for this stage.
     pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// and no return value in the shader.
-    pub entry_point: &'a str,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a compute shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one compute shader entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `FragmentState::entry_point`
+    // NOTE: keep phrasing in sync. with `VertexState::entry_point`
+    pub entry_point: Option<&'a str>,
     /// Advanced options for when this pipeline is compiled
     ///
     /// This implements `Default`, and for most users can be set to `Default::default()`
diff --git a/wgpu/src/api/render_pipeline.rs b/wgpu/src/api/render_pipeline.rs
index 2b81aa95a7..7e74127167 100644
--- a/wgpu/src/api/render_pipeline.rs
+++ b/wgpu/src/api/render_pipeline.rs
@@ -73,9 +73,14 @@ static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
 pub struct VertexState<'a> {
     /// The compiled shader module for this stage.
     pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a vertex-stage shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one vertex-stage entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `ComputePipelineDescriptor::entry_point`
+    // NOTE: keep phrasing in sync. with `FragmentState::entry_point`
+    pub entry_point: Option<&'a str>,
     /// Advanced options for when this pipeline is compiled
     ///
     /// This implements `Default`, and for most users can be set to `Default::default()`
@@ -96,9 +101,14 @@ static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
 pub struct FragmentState<'a> {
     /// The compiled shader module for this stage.
     pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a `@fragment` shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one fragment-stage entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `ComputePipelineDescriptor::entry_point`
+    // NOTE: keep phrasing in sync. with `VertexState::entry_point`
+    pub entry_point: Option<&'a str>,
     /// Advanced options for when this pipeline is compiled
     ///
     /// This implements `Default`, and for most users can be set to `Default::default()`
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index be3d9b42cd..573db58a83 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1880,7 +1880,9 @@ impl crate::context::Context for ContextWebGpu {
             &mapped_vertex_state,
             desc.vertex.compilation_options.constants,
         );
-        mapped_vertex_state.entry_point(desc.vertex.entry_point);
+        if let Some(ep) = desc.vertex.entry_point {
+            mapped_vertex_state.entry_point(ep);
+        }
 
         let buffers = desc
             .vertex
@@ -1957,7 +1959,9 @@ impl crate::context::Context for ContextWebGpu {
             let mut mapped_fragment_desc =
                 webgpu_sys::GpuFragmentState::new(&module.0.module, &targets);
             insert_constants_map(&mapped_vertex_state, frag.compilation_options.constants);
-            mapped_fragment_desc.entry_point(frag.entry_point);
+            if let Some(ep) = frag.entry_point {
+                mapped_fragment_desc.entry_point(ep);
+            }
             mapped_desc.fragment(&mapped_fragment_desc);
         }
 
@@ -1984,7 +1988,9 @@ impl crate::context::Context for ContextWebGpu {
         let mut mapped_compute_stage =
             webgpu_sys::GpuProgrammableStage::new(&shader_module.0.module);
         insert_constants_map(&mapped_compute_stage, desc.compilation_options.constants);
-        mapped_compute_stage.entry_point(desc.entry_point);
+        if let Some(ep) = desc.entry_point {
+            mapped_compute_stage.entry_point(ep);
+        }
         let auto_layout = wasm_bindgen::JsValue::from(webgpu_sys::GpuAutoLayoutMode::Auto);
         let mut mapped_desc = webgpu_sys::GpuComputePipelineDescriptor::new(
             &match desc.layout {
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 7491d01557..b7560268e9 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1174,7 +1174,7 @@ impl crate::Context for ContextWgpuCore {
             vertex: pipe::VertexState {
                 stage: pipe::ProgrammableStageDescriptor {
                     module: desc.vertex.module.id.into(),
-                    entry_point: Some(Borrowed(desc.vertex.entry_point)),
+                    entry_point: desc.vertex.entry_point.map(Borrowed),
                     constants: Borrowed(desc.vertex.compilation_options.constants),
                     zero_initialize_workgroup_memory: desc
                         .vertex
@@ -1189,7 +1189,7 @@ impl crate::Context for ContextWgpuCore {
             fragment: desc.fragment.as_ref().map(|frag| pipe::FragmentState {
                 stage: pipe::ProgrammableStageDescriptor {
                     module: frag.module.id.into(),
-                    entry_point: Some(Borrowed(frag.entry_point)),
+                    entry_point: frag.entry_point.map(Borrowed),
                     constants: Borrowed(frag.compilation_options.constants),
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
@@ -1234,7 +1234,7 @@ impl crate::Context for ContextWgpuCore {
             layout: desc.layout.map(|l| l.id.into()),
             stage: pipe::ProgrammableStageDescriptor {
                 module: desc.module.id.into(),
-                entry_point: Some(Borrowed(desc.entry_point)),
+                entry_point: desc.entry_point.map(Borrowed),
                 constants: Borrowed(desc.compilation_options.constants),
                 zero_initialize_workgroup_memory: desc
                     .compilation_options

From 7446790354562b2d439cc4d604ce097763488bd9 Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Wed, 24 Jul 2024 13:58:09 +0200
Subject: [PATCH 079/226] Fix a few entry_point parameters

---
 tests/tests/regression/issue_4485.rs | 4 ++--
 tests/tests/regression/issue_4514.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/tests/regression/issue_4485.rs b/tests/tests/regression/issue_4485.rs
index 101712fe02..4944afe49f 100644
--- a/tests/tests/regression/issue_4485.rs
+++ b/tests/tests/regression/issue_4485.rs
@@ -45,7 +45,7 @@ async fn test_impl(ctx: &TestingContext) {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
@@ -54,7 +54,7 @@ async fn test_impl(ctx: &TestingContext) {
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_4514.rs b/tests/tests/regression/issue_4514.rs
index f447f879bf..b3609ff9ad 100644
--- a/tests/tests/regression/issue_4514.rs
+++ b/tests/tests/regression/issue_4514.rs
@@ -45,7 +45,7 @@ async fn test_impl(ctx: &TestingContext) {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
@@ -54,7 +54,7 @@ async fn test_impl(ctx: &TestingContext) {
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,

From 86507f33cd303cfcd81446fd989e5adf65d298dd Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Wed, 24 Jul 2024 13:40:32 +0200
Subject: [PATCH 080/226] Reduce the number of iterations in benchmarks to a
 small number when running on CI

---
 .github/workflows/ci.yml       |  1 +
 benches/benches/computepass.rs | 87 ++++++++++++++++++++++------------
 benches/benches/renderpass.rs  | 74 ++++++++++++++++++-----------
 3 files changed, 103 insertions(+), 59 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index edf8501c57..70a83b51d7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -59,6 +59,7 @@ env:
   RUSTDOCFLAGS: -D warnings
   WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes
   CACHE_SUFFIX: c # cache busting
+  WGPU_TESTING: true
 
 # We distinguish the following kinds of builds:
 # - native: build for the same target as we compile on
diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
index 9a69eb46eb..2af1413605 100644
--- a/benches/benches/computepass.rs
+++ b/benches/benches/computepass.rs
@@ -10,24 +10,36 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-const DISPATCH_COUNT: usize = 10_000;
+fn dispatch_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
 
 // Currently bindless is _much_ slower than with regularly resources,
 // since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
 // This is in fact so slow that it makes the benchmark unusable when we use the same amount of
 // resources as the regular benchmark.
 // For details see https://github.com/gfx-rs/wgpu/issues/5766
-const DISPATCH_COUNT_BINDLESS: usize = 1_000;
+fn dispatch_count_bindless() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        1_000
+    }
+}
 
 // Must match the number of textures in the computepass.wgsl shader
 const TEXTURES_PER_DISPATCH: usize = 2;
 const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
 const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
 
-const TEXTURE_COUNT: usize = DISPATCH_COUNT * TEXTURES_PER_DISPATCH;
-const STORAGE_TEXTURE_COUNT: usize = DISPATCH_COUNT * STORAGE_TEXTURES_PER_DISPATCH;
-const STORAGE_BUFFER_COUNT: usize = DISPATCH_COUNT * STORAGE_BUFFERS_PER_DISPATCH;
-
 const BUFFER_SIZE: u64 = 16;
 
 struct ComputepassState {
@@ -45,6 +57,12 @@ impl ComputepassState {
     fn new() -> Self {
         let device_state = DeviceState::new();
 
+        let dispatch_count = dispatch_count();
+        let dispatch_count_bindless = dispatch_count_bindless();
+        let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+        let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+        let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
         let supports_bindless = device_state.device.features().contains(
             wgpu::Features::BUFFER_BINDING_ARRAY
                 | wgpu::Features::TEXTURE_BINDING_ARRAY
@@ -106,8 +124,8 @@ impl ComputepassState {
                     entries: &bind_group_layout_entries,
                 });
 
-        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -132,8 +150,8 @@ impl ComputepassState {
         random.shuffle(&mut texture_views);
         let texture_view_refs: Vec<_> = texture_views.iter().collect();
 
-        let mut storage_texture_views = Vec::with_capacity(STORAGE_TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut storage_texture_views = Vec::with_capacity(storage_texture_count);
+        for i in 0..storage_texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -158,8 +176,8 @@ impl ComputepassState {
         random.shuffle(&mut storage_texture_views);
         let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
 
-        let mut storage_buffers = Vec::with_capacity(STORAGE_BUFFER_COUNT);
-        for i in 0..STORAGE_BUFFER_COUNT {
+        let mut storage_buffers = Vec::with_capacity(storage_buffer_count);
+        for i in 0..storage_buffer_count {
             storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: Some(&format!("Buffer {i}")),
                 size: BUFFER_SIZE,
@@ -173,8 +191,8 @@ impl ComputepassState {
             .map(|b| b.as_entire_buffer_binding())
             .collect();
 
-        let mut bind_groups = Vec::with_capacity(DISPATCH_COUNT);
-        for dispatch_idx in 0..DISPATCH_COUNT {
+        let mut bind_groups = Vec::with_capacity(dispatch_count);
+        for dispatch_idx in 0..dispatch_count {
             let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
             for tex_idx in 0..TEXTURES_PER_DISPATCH {
                 entries.push(wgpu::BindGroupEntry {
@@ -258,7 +276,7 @@ impl ComputepassState {
                                     view_dimension: wgpu::TextureViewDimension::D2,
                                     multisampled: false,
                                 },
-                                count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                                count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
                             },
                             wgpu::BindGroupLayoutEntry {
                                 binding: 1,
@@ -268,7 +286,7 @@ impl ComputepassState {
                                     format: wgpu::TextureFormat::R32Float,
                                     view_dimension: wgpu::TextureViewDimension::D2,
                                 },
-                                count: Some(NonZeroU32::new(STORAGE_TEXTURE_COUNT as u32).unwrap()),
+                                count: Some(NonZeroU32::new(storage_texture_count as u32).unwrap()),
                             },
                             wgpu::BindGroupLayoutEntry {
                                 binding: 2,
@@ -278,7 +296,7 @@ impl ComputepassState {
                                     has_dynamic_offset: false,
                                     min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
                                 },
-                                count: Some(NonZeroU32::new(STORAGE_BUFFER_COUNT as u32).unwrap()),
+                                count: Some(NonZeroU32::new(storage_buffer_count as u32).unwrap()),
                             },
                         ],
                     });
@@ -293,19 +311,19 @@ impl ComputepassState {
                             wgpu::BindGroupEntry {
                                 binding: 0,
                                 resource: wgpu::BindingResource::TextureViewArray(
-                                    &texture_view_refs[..DISPATCH_COUNT_BINDLESS],
+                                    &texture_view_refs[..dispatch_count_bindless],
                                 ),
                             },
                             wgpu::BindGroupEntry {
                                 binding: 1,
                                 resource: wgpu::BindingResource::TextureViewArray(
-                                    &storage_texture_view_refs[..DISPATCH_COUNT_BINDLESS],
+                                    &storage_texture_view_refs[..dispatch_count_bindless],
                                 ),
                             },
                             wgpu::BindGroupEntry {
                                 binding: 2,
                                 resource: wgpu::BindingResource::BufferArray(
-                                    &storage_buffer_bindings[..DISPATCH_COUNT_BINDLESS],
+                                    &storage_buffer_bindings[..dispatch_count_bindless],
                                 ),
                             },
                         ],
@@ -354,7 +372,8 @@ impl ComputepassState {
     fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
 
-        let dispatch_per_pass = DISPATCH_COUNT / total_passes;
+        let dispatch_count = dispatch_count();
+        let dispatch_per_pass = dispatch_count / total_passes;
 
         let mut encoder = self
             .device_state
@@ -379,7 +398,7 @@ impl ComputepassState {
         encoder.finish()
     }
 
-    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+    fn run_bindless_pass(&self, dispatch_count_bindless: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Bindless Computepass");
 
         let mut encoder = self
@@ -394,7 +413,7 @@ impl ComputepassState {
 
         compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
         compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
-        for _ in 0..DISPATCH_COUNT_BINDLESS {
+        for _ in 0..dispatch_count_bindless {
             compute_pass.dispatch_workgroups(1, 1, 1);
         }
 
@@ -407,13 +426,19 @@ impl ComputepassState {
 fn run_bench(ctx: &mut Criterion) {
     let state = Lazy::new(ComputepassState::new);
 
+    let dispatch_count = dispatch_count();
+    let dispatch_count_bindless = dispatch_count_bindless();
+    let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+    let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+    let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
     // Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
     let mut group = ctx.benchmark_group("Computepass: Single Threaded");
-    group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
+    group.throughput(Throughput::Elements(dispatch_count as _));
 
     for time_submit in [false, true] {
         for cpasses in [1, 2, 4, 8] {
-            let dispatch_per_pass = DISPATCH_COUNT / cpasses;
+            let dispatch_per_pass = dispatch_count / cpasses;
 
             let label = if time_submit {
                 "Submit Time"
@@ -466,10 +491,10 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k dispatch calls split up over 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
-    group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
+    group.throughput(Throughput::Elements(dispatch_count as _));
 
     for threads in [2, 4, 8] {
-        let dispatch_per_pass = DISPATCH_COUNT / threads;
+        let dispatch_per_pass = dispatch_count / threads;
         group.bench_function(
             &format!("{threads} threads x {dispatch_per_pass} dispatch"),
             |b| {
@@ -510,9 +535,9 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Computepass: Bindless");
-    group.throughput(Throughput::Elements(DISPATCH_COUNT_BINDLESS as _));
+    group.throughput(Throughput::Elements(dispatch_count_bindless as _));
 
-    group.bench_function(&format!("{DISPATCH_COUNT_BINDLESS} dispatch"), |b| {
+    group.bench_function(&format!("{dispatch_count_bindless} dispatch"), |b| {
         Lazy::force(&state);
 
         b.iter_custom(|iters| {
@@ -535,7 +560,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                 let start = Instant::now();
 
-                let buffer = state.run_bindless_pass();
+                let buffer = state.run_bindless_pass(dispatch_count_bindless);
 
                 duration += start.elapsed();
 
@@ -551,7 +576,7 @@ fn run_bench(ctx: &mut Criterion) {
     ctx.bench_function(
         &format!(
             "Computepass: Empty Submit with {} Resources",
-            TEXTURE_COUNT + STORAGE_TEXTURE_COUNT + STORAGE_BUFFER_COUNT
+            texture_count + storage_texture_count + storage_buffer_count
         ),
         |b| {
             Lazy::force(&state);
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index f31fc07580..7f2e14116e 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -10,14 +10,19 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-const DRAW_COUNT: usize = 10_000;
+fn draw_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
 
 // Must match the number of textures in the renderpass.wgsl shader
 const TEXTURES_PER_DRAW: usize = 7;
 const VERTEX_BUFFERS_PER_DRAW: usize = 2;
-const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
-
-const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
 
 struct RenderpassState {
     device_state: DeviceState,
@@ -37,6 +42,10 @@ impl RenderpassState {
     fn new() -> Self {
         let device_state = DeviceState::new();
 
+        let draw_count = draw_count();
+        let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+        let texture_count = draw_count * TEXTURES_PER_DRAW;
+
         let supports_bindless = device_state.device.features().contains(
             wgpu::Features::TEXTURE_BINDING_ARRAY
                 | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
@@ -44,7 +53,7 @@ impl RenderpassState {
             .device
             .limits()
             .max_sampled_textures_per_shader_stage
-            >= TEXTURE_COUNT as _;
+            >= texture_count as _;
 
         // Performance gets considerably worse if the resources are shuffled.
         //
@@ -74,8 +83,8 @@ impl RenderpassState {
                     entries: &bind_group_layout_entries,
                 });
 
-        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -101,8 +110,8 @@ impl RenderpassState {
 
         let texture_view_refs: Vec<_> = texture_views.iter().collect();
 
-        let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
-        for draw_idx in 0..DRAW_COUNT {
+        let mut bind_groups = Vec::with_capacity(draw_count);
+        for draw_idx in 0..draw_count {
             let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
             for tex_idx in 0..TEXTURES_PER_DRAW {
                 entries.push(wgpu::BindGroupEntry {
@@ -138,8 +147,8 @@ impl RenderpassState {
                     push_constant_ranges: &[],
                 });
 
-        let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
-        for _ in 0..VERTEX_BUFFER_COUNT {
+        let mut vertex_buffers = Vec::with_capacity(vertex_buffer_count);
+        for _ in 0..vertex_buffer_count {
             vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 16,
@@ -149,8 +158,8 @@ impl RenderpassState {
         }
         random.shuffle(&mut vertex_buffers);
 
-        let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
-        for _ in 0..DRAW_COUNT {
+        let mut index_buffers = Vec::with_capacity(draw_count);
+        for _ in 0..draw_count {
             index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 4,
@@ -246,7 +255,7 @@ impl RenderpassState {
                                 view_dimension: wgpu::TextureViewDimension::D2,
                                 multisampled: false,
                             },
-                            count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                            count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
                         }],
                     });
 
@@ -324,10 +333,15 @@ impl RenderpassState {
         }
     }
 
-    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+    fn run_subpass(
+        &self,
+        pass_number: usize,
+        total_passes: usize,
+        draw_count: usize,
+    ) -> wgpu::CommandBuffer {
         profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
 
-        let draws_per_pass = DRAW_COUNT / total_passes;
+        let draws_per_pass = draw_count / total_passes;
 
         let mut encoder = self
             .device_state
@@ -372,7 +386,7 @@ impl RenderpassState {
         encoder.finish()
     }
 
-    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+    fn run_bindless_pass(&self, draw_count: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Bindless Renderpass");
 
         let mut encoder = self
@@ -402,7 +416,7 @@ impl RenderpassState {
         }
         render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
 
-        for draw_idx in 0..DRAW_COUNT {
+        for draw_idx in 0..draw_count {
             render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
         }
 
@@ -415,13 +429,17 @@ impl RenderpassState {
 fn run_bench(ctx: &mut Criterion) {
     let state = Lazy::new(RenderpassState::new);
 
+    let draw_count = draw_count();
+    let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+    let texture_count = draw_count * TEXTURES_PER_DRAW;
+
     // Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
     let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for time_submit in [false, true] {
         for rpasses in [1, 2, 4, 8] {
-            let draws_per_pass = DRAW_COUNT / rpasses;
+            let draws_per_pass = draw_count / rpasses;
 
             let label = if time_submit {
                 "Submit Time"
@@ -451,7 +469,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                             let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
                             for i in 0..rpasses {
-                                buffers.push(state.run_subpass(i, rpasses));
+                                buffers.push(state.run_subpass(i, rpasses, draw_count));
                             }
 
                             if time_submit {
@@ -479,10 +497,10 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for threads in [2, 4, 8] {
-        let draws_per_pass = DRAW_COUNT / threads;
+        let draws_per_pass = draw_count / threads;
         group.bench_function(
             &format!("{threads} threads x {draws_per_pass} draws"),
             |b| {
@@ -505,7 +523,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                         let buffers = (0..threads)
                             .into_par_iter()
-                            .map(|i| state.run_subpass(i, threads))
+                            .map(|i| state.run_subpass(i, threads, draw_count))
                             .collect::<Vec<_>>();
 
                         duration += start.elapsed();
@@ -523,9 +541,9 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 1, 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Bindless");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
-    group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
+    group.bench_function(&format!("{draw_count} draws"), |b| {
         Lazy::force(&state);
 
         b.iter_custom(|iters| {
@@ -543,7 +561,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                 let start = Instant::now();
 
-                let buffer = state.run_bindless_pass();
+                let buffer = state.run_bindless_pass(draw_count);
 
                 duration += start.elapsed();
 
@@ -559,7 +577,7 @@ fn run_bench(ctx: &mut Criterion) {
     ctx.bench_function(
         &format!(
             "Renderpass: Empty Submit with {} Resources",
-            TEXTURE_COUNT + VERTEX_BUFFER_COUNT
+            texture_count + vertex_buffer_count
         ),
         |b| {
             Lazy::force(&state);

From 380387e8e2c566118f6ff8886e2bbdbe074fbfbc Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Fri, 19 Jan 2024 14:07:22 -0500
Subject: [PATCH 081/226] refactor(const_eval): derive `Debug` for
 component-wise `enum`s

---
 naga/src/proc/constant_evaluator.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index b5c821f412..6f70e13a2a 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -27,6 +27,7 @@ macro_rules! gen_component_wise_extractor {
         scalar_kinds: [$( $scalar_kind:ident ),* $(,)?],
     ) => {
         /// A subset of [`Literal`]s intended to be used for implementing numeric built-ins.
+        #[derive(Debug)]
         enum $target<const N: usize> {
             $(
                 #[doc = concat!(

From 3650f90079d853b22a0879de5fdaa22e007d77d9 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Fri, 19 Jan 2024 14:07:22 -0500
Subject: [PATCH 082/226] refactor(const_eval): derive `PartialEq` for testing
 in component-wise `enum`s

---
 naga/src/proc/constant_evaluator.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index 6f70e13a2a..cd7bb9f34b 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -28,6 +28,7 @@ macro_rules! gen_component_wise_extractor {
     ) => {
         /// A subset of [`Literal`]s intended to be used for implementing numeric built-ins.
         #[derive(Debug)]
+        #[cfg_attr(test, derive(PartialEq))]
         enum $target<const N: usize> {
             $(
                 #[doc = concat!(

From c5fce7b433a17fab9bc73d8295b52c95ca58869f Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Fri, 19 Jan 2024 14:04:57 -0500
Subject: [PATCH 083/226] refactor(naga): rename `MathFunction::FindMsb` to
 `FirstLeadingBit`

---
 naga/src/back/glsl/mod.rs         | 8 +++++---
 naga/src/back/hlsl/writer.rs      | 2 +-
 naga/src/back/msl/writer.rs       | 6 +++---
 naga/src/back/spv/block.rs        | 4 ++--
 naga/src/back/wgsl/writer.rs      | 2 +-
 naga/src/front/glsl/builtins.rs   | 8 +++++---
 naga/src/front/spv/mod.rs         | 2 +-
 naga/src/front/wgsl/parse/conv.rs | 2 +-
 naga/src/lib.rs                   | 2 +-
 naga/src/proc/mod.rs              | 2 +-
 naga/src/proc/typifier.rs         | 2 +-
 naga/src/valid/expression.rs      | 2 +-
 12 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index 7ad1f3c597..9ea6eed91a 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -3648,7 +3648,7 @@ impl<'a, W: Write> Writer<'a, W> {
                         return Ok(());
                     }
                     Mf::FindLsb => "findLSB",
-                    Mf::FindMsb => "findMSB",
+                    Mf::FirstLeadingBit => "findMSB",
                     // data packing
                     Mf::Pack4x8snorm => "packSnorm4x8",
                     Mf::Pack4x8unorm => "packUnorm4x8",
@@ -3722,8 +3722,10 @@ impl<'a, W: Write> Writer<'a, W> {
 
                 // Some GLSL functions always return signed integers (like findMSB),
                 // so they need to be cast to uint if the argument is also an uint.
-                let ret_might_need_int_to_uint =
-                    matches!(fun, Mf::FindLsb | Mf::FindMsb | Mf::CountOneBits | Mf::Abs);
+                let ret_might_need_int_to_uint = matches!(
+                    fun,
+                    Mf::FindLsb | Mf::FirstLeadingBit | Mf::CountOneBits | Mf::Abs
+                );
 
                 // Some GLSL functions only accept signed integers (like abs),
                 // so they need their argument cast from uint to int.
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index 982bf0cfea..7965e6492b 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -3064,7 +3064,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     Mf::CountOneBits => Function::MissingIntOverload("countbits"),
                     Mf::ReverseBits => Function::MissingIntOverload("reversebits"),
                     Mf::FindLsb => Function::MissingIntReturnType("firstbitlow"),
-                    Mf::FindMsb => Function::MissingIntReturnType("firstbithigh"),
+                    Mf::FirstLeadingBit => Function::MissingIntReturnType("firstbithigh"),
                     Mf::ExtractBits => Function::Regular(EXTRACT_BITS_FUNCTION),
                     Mf::InsertBits => Function::Regular(INSERT_BITS_FUNCTION),
                     // Data Packing
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index 7ec22009bd..fccc92a1db 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -1876,7 +1876,7 @@ impl<W: Write> Writer<W> {
                     Mf::ExtractBits => "",
                     Mf::InsertBits => "",
                     Mf::FindLsb => "",
-                    Mf::FindMsb => "",
+                    Mf::FirstLeadingBit => "",
                     // data packing
                     Mf::Pack4x8snorm => "pack_float_to_snorm4x8",
                     Mf::Pack4x8unorm => "pack_float_to_unorm4x8",
@@ -1928,7 +1928,7 @@ impl<W: Write> Writer<W> {
                         self.put_expression(arg, context, true)?;
                         write!(self.out, ") + 1) % {constant}) - 1)")?;
                     }
-                    Mf::FindMsb => {
+                    Mf::FirstLeadingBit => {
                         let inner = context.resolve_type(arg);
                         let scalar = inner.scalar().unwrap();
                         let constant = scalar.width * 8 - 1;
@@ -2702,7 +2702,7 @@ impl<W: Write> Writer<W> {
                             }
                         }
                     }
-                    crate::MathFunction::FindMsb
+                    crate::MathFunction::FirstLeadingBit
                     | crate::MathFunction::Pack4xI8
                     | crate::MathFunction::Pack4xU8
                     | crate::MathFunction::Unpack4xI8
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 33f892aa45..932e27cceb 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -1184,12 +1184,12 @@ impl<'w> BlockContext<'w> {
                         ))
                     }
                     Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb),
-                    Mf::FindMsb => {
+                    Mf::FirstLeadingBit => {
                         if arg_ty.scalar_width() == Some(4) {
                             let thing = match arg_scalar_kind {
                                 Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb,
                                 Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb,
-                                other => unimplemented!("Unexpected findMSB({:?})", other),
+                                other => unimplemented!("Unexpected firstLeadingBit({:?})", other),
                             };
                             MathOp::Ext(thing)
                         } else {
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index 8cd37830ec..1b3597bcba 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -1711,7 +1711,7 @@ impl<W: Write> Writer<W> {
                     Mf::ExtractBits => Function::Regular("extractBits"),
                     Mf::InsertBits => Function::Regular("insertBits"),
                     Mf::FindLsb => Function::Regular("firstTrailingBit"),
-                    Mf::FindMsb => Function::Regular("firstLeadingBit"),
+                    Mf::FirstLeadingBit => Function::Regular("firstLeadingBit"),
                     // data packing
                     Mf::Pack4x8snorm => Function::Regular("pack4x8snorm"),
                     Mf::Pack4x8unorm => Function::Regular("pack4x8unorm"),
diff --git a/naga/src/front/glsl/builtins.rs b/naga/src/front/glsl/builtins.rs
index cbb9b99387..b0e921b79c 100644
--- a/naga/src/front/glsl/builtins.rs
+++ b/naga/src/front/glsl/builtins.rs
@@ -647,7 +647,7 @@ fn inject_standard_builtins(
                 "bitfieldExtract" => MathFunction::ExtractBits,
                 "bitfieldInsert" => MathFunction::InsertBits,
                 "findLSB" => MathFunction::FindLsb,
-                "findMSB" => MathFunction::FindMsb,
+                "findMSB" => MathFunction::FirstLeadingBit,
                 _ => unreachable!(),
             };
 
@@ -696,7 +696,9 @@ fn inject_standard_builtins(
                 let mc = if scalar.kind == Sk::Uint {
                     match mc {
                         MacroCall::MathFunction(MathFunction::FindLsb) => MacroCall::FindLsbUint,
-                        MacroCall::MathFunction(MathFunction::FindMsb) => MacroCall::FindMsbUint,
+                        MacroCall::MathFunction(MathFunction::FirstLeadingBit) => {
+                            MacroCall::FindMsbUint
+                        }
                         mc => mc,
                     }
                 } else {
@@ -1788,7 +1790,7 @@ impl MacroCall {
             mc @ (MacroCall::FindLsbUint | MacroCall::FindMsbUint) => {
                 let fun = match mc {
                     MacroCall::FindLsbUint => MathFunction::FindLsb,
-                    MacroCall::FindMsbUint => MathFunction::FindMsb,
+                    MacroCall::FindMsbUint => MathFunction::FirstLeadingBit,
                     _ => unreachable!(),
                 };
                 let res = ctx.add_expression(
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index d154712b20..ac048203e4 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -3027,7 +3027,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                         Glo::UnpackUnorm2x16 => Mf::Unpack2x16unorm,
                         Glo::UnpackSnorm2x16 => Mf::Unpack2x16snorm,
                         Glo::FindILsb => Mf::FindLsb,
-                        Glo::FindUMsb | Glo::FindSMsb => Mf::FindMsb,
+                        Glo::FindUMsb | Glo::FindSMsb => Mf::FirstLeadingBit,
                         // TODO: https://github.com/gfx-rs/naga/issues/2526
                         Glo::Modf | Glo::Frexp => return Err(Error::UnsupportedExtInst(inst_id)),
                         Glo::IMix
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 49b15dfa83..2cb676a80d 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -236,7 +236,7 @@ pub fn map_standard_fun(word: &str) -> Option<crate::MathFunction> {
         "extractBits" => Mf::ExtractBits,
         "insertBits" => Mf::InsertBits,
         "firstTrailingBit" => Mf::FindLsb,
-        "firstLeadingBit" => Mf::FindMsb,
+        "firstLeadingBit" => Mf::FirstLeadingBit,
         // data packing
         "pack4x8snorm" => Mf::Pack4x8snorm,
         "pack4x8unorm" => Mf::Pack4x8unorm,
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 8ed7527922..8a52df81bb 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -1199,7 +1199,7 @@ pub enum MathFunction {
     ExtractBits,
     InsertBits,
     FindLsb,
-    FindMsb,
+    FirstLeadingBit,
     // data packing
     Pack4x8snorm,
     Pack4x8unorm,
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index 86d2b11f25..6b514af18b 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -485,7 +485,7 @@ impl super::MathFunction {
             Self::ExtractBits => 3,
             Self::InsertBits => 4,
             Self::FindLsb => 1,
-            Self::FindMsb => 1,
+            Self::FirstLeadingBit => 1,
             // data packing
             Self::Pack4x8snorm => 1,
             Self::Pack4x8unorm => 1,
diff --git a/naga/src/proc/typifier.rs b/naga/src/proc/typifier.rs
index 0a02900c4a..23295cc0e2 100644
--- a/naga/src/proc/typifier.rs
+++ b/naga/src/proc/typifier.rs
@@ -789,7 +789,7 @@ impl<'a> ResolveContext<'a> {
                     Mf::ExtractBits |
                     Mf::InsertBits |
                     Mf::FindLsb |
-                    Mf::FindMsb => match *res_arg.inner_with(types)  {
+                    Mf::FirstLeadingBit => match *res_arg.inner_with(types)  {
                         Ti::Scalar(scalar @ crate::Scalar {
                             kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
                             ..
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index 89bceae061..bd90c8ad2d 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1350,7 +1350,7 @@ impl super::Validator {
                     | Mf::CountTrailingZeros
                     | Mf::CountOneBits
                     | Mf::ReverseBits
-                    | Mf::FindMsb
+                    | Mf::FirstLeadingBit
                     | Mf::FindLsb => {
                         if arg1_ty.is_some() || arg2_ty.is_some() || arg3_ty.is_some() {
                             return Err(ExpressionError::WrongArgumentCount(fun));

From 5b44baa8c834ea86ab8ff89848e13c4d39f46fbb Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Fri, 19 Jan 2024 14:07:22 -0500
Subject: [PATCH 084/226] feat(const_eval): impl. `firstLeadingBit`

---
 naga/src/proc/constant_evaluator.rs           |  91 ++++++++++++
 .../glsl/math-functions.main.Fragment.glsl    |   6 +-
 naga/tests/out/hlsl/math-functions.hlsl       |   6 +-
 naga/tests/out/msl/math-functions.msl         |   8 +-
 naga/tests/out/spv/math-functions.spvasm      | 136 +++++++++---------
 naga/tests/out/wgsl/math-functions.wgsl       |   6 +-
 6 files changed, 166 insertions(+), 87 deletions(-)

diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index cd7bb9f34b..344e2ddba2 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -1233,6 +1233,9 @@ impl<'a> ConstantEvaluator<'a> {
             crate::MathFunction::ReverseBits => {
                 component_wise_concrete_int!(self, span, [arg], |e| { Ok([e.reverse_bits()]) })
             }
+            crate::MathFunction::FirstLeadingBit => {
+                component_wise_concrete_int(self, span, [arg], |ci| Ok(first_leading_bit(ci)))
+            }
 
             fun => Err(ConstantEvaluatorError::NotImplemented(format!(
                 "{fun:?} built-in function"
@@ -2098,6 +2101,94 @@ impl<'a> ConstantEvaluator<'a> {
     }
 }
 
+fn first_leading_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
+    // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, 1 means
+    // the least significant bit is set. Therefore, an input of 1 would return a right-to-left bit
+    // index of 0.
+    let rtl_to_ltr_bit_idx = |e: u32| -> u32 {
+        match e {
+            idx @ 0..=31 => 31 - idx,
+            32 => u32::MAX,
+            _ => unreachable!(),
+        }
+    };
+    match concrete_int {
+        ConcreteInt::I32([e]) => ConcreteInt::I32([{
+            let rtl_bit_index = if e.is_negative() {
+                e.leading_ones()
+            } else {
+                e.leading_zeros()
+            };
+            rtl_to_ltr_bit_idx(rtl_bit_index) as i32
+        }]),
+        ConcreteInt::U32([e]) => ConcreteInt::U32([rtl_to_ltr_bit_idx(e.leading_zeros())]),
+    }
+}
+
+#[test]
+fn first_leading_bit_smoke() {
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([-1])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([0])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([1])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([-2])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([1234 + 4567])),
+        ConcreteInt::I32([12])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([i32::MAX])),
+        ConcreteInt::I32([30])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([i32::MIN])),
+        ConcreteInt::I32([30])
+    );
+    // NOTE: Ignore the sign bit, which is a separate (above) case.
+    for idx in 0..(32 - 1) {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::I32([1 << idx])),
+            ConcreteInt::I32([idx])
+        );
+    }
+    for idx in 1..(32 - 1) {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::I32([-(1 << idx)])),
+            ConcreteInt::I32([idx - 1])
+        );
+    }
+
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([0])),
+        ConcreteInt::U32([u32::MAX])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([1])),
+        ConcreteInt::U32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([u32::MAX])),
+        ConcreteInt::U32([31])
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::U32([1 << idx])),
+            ConcreteInt::U32([idx])
+        )
+    }
+}
+
 /// Trait for conversions of abstract values to concrete types.
 trait TryFromAbstract<T>: Sized {
     /// Convert an abstract literal `value` to `Self`.
diff --git a/naga/tests/out/glsl/math-functions.main.Fragment.glsl b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
index 7f91571dcc..c10dddff0f 100644
--- a/naga/tests/out/glsl/math-functions.main.Fragment.glsl
+++ b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
@@ -65,10 +65,8 @@ void main() {
     ivec4 sign_b = ivec4(-1, -1, -1, -1);
     vec4 sign_d = vec4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = ( + ivec2(0).x * ivec2(0).x + ivec2(0).y * ivec2(0).y);
-    uint first_leading_bit_abs = uint(findMSB(0u));
-    int flb_a = findMSB(-1);
-    ivec2 flb_b = findMSB(ivec2(-1));
-    uvec2 flb_c = uvec2(findMSB(uvec2(1u)));
+    ivec2 flb_b = ivec2(-1, -1);
+    uvec2 flb_c = uvec2(0u, 0u);
     int ftb_a = findLSB(-1);
     uint ftb_b = uint(findLSB(1u));
     ivec2 ftb_c = findLSB(ivec2(-1));
diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl
index c1a771c25d..14d1e9e188 100644
--- a/naga/tests/out/hlsl/math-functions.hlsl
+++ b/naga/tests/out/hlsl/math-functions.hlsl
@@ -79,10 +79,8 @@ void main()
     int4 sign_b = int4(-1, -1, -1, -1);
     float4 sign_d = float4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = dot(ZeroValueint2(), ZeroValueint2());
-    uint first_leading_bit_abs = firstbithigh(0u);
-    int flb_a = asint(firstbithigh(-1));
-    int2 flb_b = asint(firstbithigh((-1).xx));
-    uint2 flb_c = firstbithigh((1u).xx);
+    int2 flb_b = int2(-1, -1);
+    uint2 flb_c = uint2(0u, 0u);
     int ftb_a = asint(firstbitlow(-1));
     uint ftb_b = firstbitlow(1u);
     int2 ftb_c = asint(firstbitlow((-1).xx));
diff --git a/naga/tests/out/msl/math-functions.msl b/naga/tests/out/msl/math-functions.msl
index 0e6a5b24dc..271472978a 100644
--- a/naga/tests/out/msl/math-functions.msl
+++ b/naga/tests/out/msl/math-functions.msl
@@ -67,12 +67,8 @@ fragment void main_(
     metal::int4 sign_b = metal::int4(-1, -1, -1, -1);
     metal::float4 sign_d = metal::float4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = ( + metal::int2 {}.x * metal::int2 {}.x + metal::int2 {}.y * metal::int2 {}.y);
-    uint first_leading_bit_abs = metal::select(31 - metal::clz(0u), uint(-1), 0u == 0 || 0u == -1);
-    int flb_a = metal::select(31 - metal::clz(metal::select(-1, ~-1, -1 < 0)), int(-1), -1 == 0 || -1 == -1);
-    metal::int2 _e29 = metal::int2(-1);
-    metal::int2 flb_b = metal::select(31 - metal::clz(metal::select(_e29, ~_e29, _e29 < 0)), int2(-1), _e29 == 0 || _e29 == -1);
-    metal::uint2 _e32 = metal::uint2(1u);
-    metal::uint2 flb_c = metal::select(31 - metal::clz(_e32), uint2(-1), _e32 == 0 || _e32 == -1);
+    metal::int2 flb_b = metal::int2(-1, -1);
+    metal::uint2 flb_c = metal::uint2(0u, 0u);
     int ftb_a = (((metal::ctz(-1) + 1) % 33) - 1);
     uint ftb_b = (((metal::ctz(1u) + 1) % 33) - 1);
     metal::int2 ftb_c = (((metal::ctz(metal::int2(-1)) + 1) % 33) - 1);
diff --git a/naga/tests/out/spv/math-functions.spvasm b/naga/tests/out/spv/math-functions.spvasm
index 6e07c6d7a6..2207934cc9 100644
--- a/naga/tests/out/spv/math-functions.spvasm
+++ b/naga/tests/out/spv/math-functions.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 96
+; Bound: 94
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
@@ -40,77 +40,75 @@ OpMemberDecorate %15 1 Offset 16
 %24 = OpConstant  %4  -1.0
 %25 = OpConstantComposite  %3  %24 %24 %24 %24
 %26 = OpConstantNull  %7
-%27 = OpConstant  %9  0
+%27 = OpConstant  %9  4294967295
 %28 = OpConstantComposite  %7  %22 %22
-%29 = OpConstant  %9  1
+%29 = OpConstant  %9  0
 %30 = OpConstantComposite  %8  %29 %29
-%31 = OpConstant  %9  32
-%32 = OpConstant  %6  32
-%33 = OpConstant  %6  0
-%34 = OpConstantComposite  %8  %31 %31
-%35 = OpConstantComposite  %7  %32 %32
-%36 = OpConstantComposite  %8  %27 %27
-%37 = OpConstantComposite  %7  %33 %33
-%38 = OpConstant  %9  31
-%39 = OpConstantComposite  %8  %38 %38
-%40 = OpConstant  %6  2
-%41 = OpConstant  %4  2.0
-%42 = OpConstantComposite  %10  %19 %41
-%43 = OpConstant  %6  3
-%44 = OpConstant  %6  4
-%45 = OpConstantComposite  %7  %43 %44
-%46 = OpConstant  %4  1.5
-%47 = OpConstantComposite  %10  %46 %46
-%48 = OpConstantComposite  %3  %46 %46 %46 %46
-%55 = OpConstantComposite  %3  %19 %19 %19 %19
-%58 = OpConstantNull  %6
+%31 = OpConstant  %9  1
+%32 = OpConstantComposite  %7  %22 %22
+%33 = OpConstantComposite  %8  %31 %31
+%34 = OpConstant  %9  32
+%35 = OpConstant  %6  32
+%36 = OpConstant  %6  0
+%37 = OpConstantComposite  %8  %34 %34
+%38 = OpConstantComposite  %7  %35 %35
+%39 = OpConstantComposite  %7  %36 %36
+%40 = OpConstant  %9  31
+%41 = OpConstantComposite  %8  %40 %40
+%42 = OpConstant  %6  2
+%43 = OpConstant  %4  2.0
+%44 = OpConstantComposite  %10  %19 %43
+%45 = OpConstant  %6  3
+%46 = OpConstant  %6  4
+%47 = OpConstantComposite  %7  %45 %46
+%48 = OpConstant  %4  1.5
+%49 = OpConstantComposite  %10  %48 %48
+%50 = OpConstantComposite  %3  %48 %48 %48 %48
+%57 = OpConstantComposite  %3  %19 %19 %19 %19
+%60 = OpConstantNull  %6
 %17 = OpFunction  %2  None %18
 %16 = OpLabel
-OpBranch %49
-%49 = OpLabel
-%50 = OpExtInst  %4  %1 Degrees %19
-%51 = OpExtInst  %4  %1 Radians %19
-%52 = OpExtInst  %3  %1 Degrees %21
-%53 = OpExtInst  %3  %1 Radians %21
-%54 = OpExtInst  %3  %1 FClamp %21 %21 %55
-%56 = OpExtInst  %3  %1 Refract %21 %21 %19
-%59 = OpCompositeExtract  %6  %26 0
-%60 = OpCompositeExtract  %6  %26 0
-%61 = OpIMul  %6  %59 %60
-%62 = OpIAdd  %6  %58 %61
-%63 = OpCompositeExtract  %6  %26 1
-%64 = OpCompositeExtract  %6  %26 1
-%65 = OpIMul  %6  %63 %64
-%57 = OpIAdd  %6  %62 %65
-%66 = OpExtInst  %9  %1 FindUMsb %27
-%67 = OpExtInst  %6  %1 FindSMsb %22
-%68 = OpExtInst  %7  %1 FindSMsb %28
-%69 = OpExtInst  %8  %1 FindUMsb %30
-%70 = OpExtInst  %6  %1 FindILsb %22
-%71 = OpExtInst  %9  %1 FindILsb %29
-%72 = OpExtInst  %7  %1 FindILsb %28
-%73 = OpExtInst  %8  %1 FindILsb %30
-%74 = OpExtInst  %4  %1 Ldexp %19 %40
-%75 = OpExtInst  %10  %1 Ldexp %42 %45
-%76 = OpExtInst  %11  %1 ModfStruct %46
-%77 = OpExtInst  %11  %1 ModfStruct %46
-%78 = OpCompositeExtract  %4  %77 0
-%79 = OpExtInst  %11  %1 ModfStruct %46
-%80 = OpCompositeExtract  %4  %79 1
-%81 = OpExtInst  %12  %1 ModfStruct %47
-%82 = OpExtInst  %13  %1 ModfStruct %48
-%83 = OpCompositeExtract  %3  %82 1
-%84 = OpCompositeExtract  %4  %83 0
-%85 = OpExtInst  %12  %1 ModfStruct %47
-%86 = OpCompositeExtract  %10  %85 0
-%87 = OpCompositeExtract  %4  %86 1
-%88 = OpExtInst  %14  %1 FrexpStruct %46
-%89 = OpExtInst  %14  %1 FrexpStruct %46
-%90 = OpCompositeExtract  %4  %89 0
-%91 = OpExtInst  %14  %1 FrexpStruct %46
-%92 = OpCompositeExtract  %6  %91 1
-%93 = OpExtInst  %15  %1 FrexpStruct %48
-%94 = OpCompositeExtract  %5  %93 1
-%95 = OpCompositeExtract  %6  %94 0
+OpBranch %51
+%51 = OpLabel
+%52 = OpExtInst  %4  %1 Degrees %19
+%53 = OpExtInst  %4  %1 Radians %19
+%54 = OpExtInst  %3  %1 Degrees %21
+%55 = OpExtInst  %3  %1 Radians %21
+%56 = OpExtInst  %3  %1 FClamp %21 %21 %57
+%58 = OpExtInst  %3  %1 Refract %21 %21 %19
+%61 = OpCompositeExtract  %6  %26 0
+%62 = OpCompositeExtract  %6  %26 0
+%63 = OpIMul  %6  %61 %62
+%64 = OpIAdd  %6  %60 %63
+%65 = OpCompositeExtract  %6  %26 1
+%66 = OpCompositeExtract  %6  %26 1
+%67 = OpIMul  %6  %65 %66
+%59 = OpIAdd  %6  %64 %67
+%68 = OpExtInst  %6  %1 FindILsb %22
+%69 = OpExtInst  %9  %1 FindILsb %31
+%70 = OpExtInst  %7  %1 FindILsb %32
+%71 = OpExtInst  %8  %1 FindILsb %33
+%72 = OpExtInst  %4  %1 Ldexp %19 %42
+%73 = OpExtInst  %10  %1 Ldexp %44 %47
+%74 = OpExtInst  %11  %1 ModfStruct %48
+%75 = OpExtInst  %11  %1 ModfStruct %48
+%76 = OpCompositeExtract  %4  %75 0
+%77 = OpExtInst  %11  %1 ModfStruct %48
+%78 = OpCompositeExtract  %4  %77 1
+%79 = OpExtInst  %12  %1 ModfStruct %49
+%80 = OpExtInst  %13  %1 ModfStruct %50
+%81 = OpCompositeExtract  %3  %80 1
+%82 = OpCompositeExtract  %4  %81 0
+%83 = OpExtInst  %12  %1 ModfStruct %49
+%84 = OpCompositeExtract  %10  %83 0
+%85 = OpCompositeExtract  %4  %84 1
+%86 = OpExtInst  %14  %1 FrexpStruct %48
+%87 = OpExtInst  %14  %1 FrexpStruct %48
+%88 = OpCompositeExtract  %4  %87 0
+%89 = OpExtInst  %14  %1 FrexpStruct %48
+%90 = OpCompositeExtract  %6  %89 1
+%91 = OpExtInst  %15  %1 FrexpStruct %50
+%92 = OpCompositeExtract  %5  %91 1
+%93 = OpCompositeExtract  %6  %92 0
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/math-functions.wgsl b/naga/tests/out/wgsl/math-functions.wgsl
index 228248b3ce..c97ce6a4a2 100644
--- a/naga/tests/out/wgsl/math-functions.wgsl
+++ b/naga/tests/out/wgsl/math-functions.wgsl
@@ -10,10 +10,8 @@ fn main() {
     let sign_b = vec4<i32>(-1i, -1i, -1i, -1i);
     let sign_d = vec4<f32>(-1f, -1f, -1f, -1f);
     let const_dot = dot(vec2<i32>(), vec2<i32>());
-    let first_leading_bit_abs = firstLeadingBit(0u);
-    let flb_a = firstLeadingBit(-1i);
-    let flb_b = firstLeadingBit(vec2(-1i));
-    let flb_c = firstLeadingBit(vec2(1u));
+    let flb_b = vec2<i32>(-1i, -1i);
+    let flb_c = vec2<u32>(0u, 0u);
     let ftb_a = firstTrailingBit(-1i);
     let ftb_b = firstTrailingBit(1u);
     let ftb_c = firstTrailingBit(vec2(-1i));

From 2f7c87f7af53b664c569f6152382d8102029478e Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Fri, 19 Jan 2024 14:15:49 -0500
Subject: [PATCH 085/226] refactor(naga): rename `MathFunction::FindLsb` to
 `FirstTrailingBit`

---
 naga/src/back/glsl/mod.rs         | 4 ++--
 naga/src/back/hlsl/writer.rs      | 2 +-
 naga/src/back/msl/writer.rs       | 4 ++--
 naga/src/back/spv/block.rs        | 2 +-
 naga/src/back/wgsl/writer.rs      | 2 +-
 naga/src/front/glsl/builtins.rs   | 8 +++++---
 naga/src/front/spv/mod.rs         | 2 +-
 naga/src/front/wgsl/parse/conv.rs | 2 +-
 naga/src/lib.rs                   | 2 +-
 naga/src/proc/mod.rs              | 2 +-
 naga/src/proc/typifier.rs         | 2 +-
 naga/src/valid/expression.rs      | 2 +-
 12 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index 9ea6eed91a..fe70480544 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -3647,7 +3647,7 @@ impl<'a, W: Write> Writer<'a, W> {
 
                         return Ok(());
                     }
-                    Mf::FindLsb => "findLSB",
+                    Mf::FirstTrailingBit => "findLSB",
                     Mf::FirstLeadingBit => "findMSB",
                     // data packing
                     Mf::Pack4x8snorm => "packSnorm4x8",
@@ -3724,7 +3724,7 @@ impl<'a, W: Write> Writer<'a, W> {
                 // so they need to be cast to uint if the argument is also an uint.
                 let ret_might_need_int_to_uint = matches!(
                     fun,
-                    Mf::FindLsb | Mf::FirstLeadingBit | Mf::CountOneBits | Mf::Abs
+                    Mf::FirstTrailingBit | Mf::FirstLeadingBit | Mf::CountOneBits | Mf::Abs
                 );
 
                 // Some GLSL functions only accept signed integers (like abs),
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index 7965e6492b..85d943e850 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -3063,7 +3063,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     Mf::CountLeadingZeros => Function::CountLeadingZeros,
                     Mf::CountOneBits => Function::MissingIntOverload("countbits"),
                     Mf::ReverseBits => Function::MissingIntOverload("reversebits"),
-                    Mf::FindLsb => Function::MissingIntReturnType("firstbitlow"),
+                    Mf::FirstTrailingBit => Function::MissingIntReturnType("firstbitlow"),
                     Mf::FirstLeadingBit => Function::MissingIntReturnType("firstbithigh"),
                     Mf::ExtractBits => Function::Regular(EXTRACT_BITS_FUNCTION),
                     Mf::InsertBits => Function::Regular(INSERT_BITS_FUNCTION),
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index fccc92a1db..b112bb369a 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -1875,7 +1875,7 @@ impl<W: Write> Writer<W> {
                     Mf::ReverseBits => "reverse_bits",
                     Mf::ExtractBits => "",
                     Mf::InsertBits => "",
-                    Mf::FindLsb => "",
+                    Mf::FirstTrailingBit => "",
                     Mf::FirstLeadingBit => "",
                     // data packing
                     Mf::Pack4x8snorm => "pack_float_to_snorm4x8",
@@ -1920,7 +1920,7 @@ impl<W: Write> Writer<W> {
                         self.put_expression(arg1.unwrap(), context, false)?;
                         write!(self.out, ")")?;
                     }
-                    Mf::FindLsb => {
+                    Mf::FirstTrailingBit => {
                         let scalar = context.resolve_type(arg).scalar().unwrap();
                         let constant = scalar.width * 8 + 1;
 
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 932e27cceb..9fb9485860 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -1183,7 +1183,7 @@ impl<'w> BlockContext<'w> {
                             count_id,
                         ))
                     }
-                    Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb),
+                    Mf::FirstTrailingBit => MathOp::Ext(spirv::GLOp::FindILsb),
                     Mf::FirstLeadingBit => {
                         if arg_ty.scalar_width() == Some(4) {
                             let thing = match arg_scalar_kind {
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index 1b3597bcba..6a069113eb 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -1710,7 +1710,7 @@ impl<W: Write> Writer<W> {
                     Mf::ReverseBits => Function::Regular("reverseBits"),
                     Mf::ExtractBits => Function::Regular("extractBits"),
                     Mf::InsertBits => Function::Regular("insertBits"),
-                    Mf::FindLsb => Function::Regular("firstTrailingBit"),
+                    Mf::FirstTrailingBit => Function::Regular("firstTrailingBit"),
                     Mf::FirstLeadingBit => Function::Regular("firstLeadingBit"),
                     // data packing
                     Mf::Pack4x8snorm => Function::Regular("pack4x8snorm"),
diff --git a/naga/src/front/glsl/builtins.rs b/naga/src/front/glsl/builtins.rs
index b0e921b79c..f76ce7754a 100644
--- a/naga/src/front/glsl/builtins.rs
+++ b/naga/src/front/glsl/builtins.rs
@@ -646,7 +646,7 @@ fn inject_standard_builtins(
                 "bitfieldReverse" => MathFunction::ReverseBits,
                 "bitfieldExtract" => MathFunction::ExtractBits,
                 "bitfieldInsert" => MathFunction::InsertBits,
-                "findLSB" => MathFunction::FindLsb,
+                "findLSB" => MathFunction::FirstTrailingBit,
                 "findMSB" => MathFunction::FirstLeadingBit,
                 _ => unreachable!(),
             };
@@ -695,7 +695,9 @@ fn inject_standard_builtins(
                 // we need to cast the return type of findLsb / findMsb
                 let mc = if scalar.kind == Sk::Uint {
                     match mc {
-                        MacroCall::MathFunction(MathFunction::FindLsb) => MacroCall::FindLsbUint,
+                        MacroCall::MathFunction(MathFunction::FirstTrailingBit) => {
+                            MacroCall::FindLsbUint
+                        }
                         MacroCall::MathFunction(MathFunction::FirstLeadingBit) => {
                             MacroCall::FindMsbUint
                         }
@@ -1789,7 +1791,7 @@ impl MacroCall {
             )?,
             mc @ (MacroCall::FindLsbUint | MacroCall::FindMsbUint) => {
                 let fun = match mc {
-                    MacroCall::FindLsbUint => MathFunction::FindLsb,
+                    MacroCall::FindLsbUint => MathFunction::FirstTrailingBit,
                     MacroCall::FindMsbUint => MathFunction::FirstLeadingBit,
                     _ => unreachable!(),
                 };
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index ac048203e4..809aff7674 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -3026,7 +3026,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                         Glo::UnpackHalf2x16 => Mf::Unpack2x16float,
                         Glo::UnpackUnorm2x16 => Mf::Unpack2x16unorm,
                         Glo::UnpackSnorm2x16 => Mf::Unpack2x16snorm,
-                        Glo::FindILsb => Mf::FindLsb,
+                        Glo::FindILsb => Mf::FirstTrailingBit,
                         Glo::FindUMsb | Glo::FindSMsb => Mf::FirstLeadingBit,
                         // TODO: https://github.com/gfx-rs/naga/issues/2526
                         Glo::Modf | Glo::Frexp => return Err(Error::UnsupportedExtInst(inst_id)),
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 2cb676a80d..80f05db59a 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -235,7 +235,7 @@ pub fn map_standard_fun(word: &str) -> Option<crate::MathFunction> {
         "reverseBits" => Mf::ReverseBits,
         "extractBits" => Mf::ExtractBits,
         "insertBits" => Mf::InsertBits,
-        "firstTrailingBit" => Mf::FindLsb,
+        "firstTrailingBit" => Mf::FirstTrailingBit,
         "firstLeadingBit" => Mf::FirstLeadingBit,
         // data packing
         "pack4x8snorm" => Mf::Pack4x8snorm,
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 8a52df81bb..94edec9159 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -1198,7 +1198,7 @@ pub enum MathFunction {
     ReverseBits,
     ExtractBits,
     InsertBits,
-    FindLsb,
+    FirstTrailingBit,
     FirstLeadingBit,
     // data packing
     Pack4x8snorm,
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index 6b514af18b..41273c5c72 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -484,7 +484,7 @@ impl super::MathFunction {
             Self::ReverseBits => 1,
             Self::ExtractBits => 3,
             Self::InsertBits => 4,
-            Self::FindLsb => 1,
+            Self::FirstTrailingBit => 1,
             Self::FirstLeadingBit => 1,
             // data packing
             Self::Pack4x8snorm => 1,
diff --git a/naga/src/proc/typifier.rs b/naga/src/proc/typifier.rs
index 23295cc0e2..d8af0cd236 100644
--- a/naga/src/proc/typifier.rs
+++ b/naga/src/proc/typifier.rs
@@ -788,7 +788,7 @@ impl<'a> ResolveContext<'a> {
                     Mf::ReverseBits |
                     Mf::ExtractBits |
                     Mf::InsertBits |
-                    Mf::FindLsb |
+                    Mf::FirstTrailingBit |
                     Mf::FirstLeadingBit => match *res_arg.inner_with(types)  {
                         Ti::Scalar(scalar @ crate::Scalar {
                             kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index bd90c8ad2d..116560bb61 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1351,7 +1351,7 @@ impl super::Validator {
                     | Mf::CountOneBits
                     | Mf::ReverseBits
                     | Mf::FirstLeadingBit
-                    | Mf::FindLsb => {
+                    | Mf::FirstTrailingBit => {
                         if arg1_ty.is_some() || arg2_ty.is_some() || arg3_ty.is_some() {
                             return Err(ExpressionError::WrongArgumentCount(fun));
                         }

From a220fcfc5709e81c1c245672efc3515ad9b514dd Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Fri, 26 Jan 2024 13:06:20 -0500
Subject: [PATCH 086/226] feat(const_eval): impl. `firstTrailingBit`

---
 naga/src/proc/constant_evaluator.rs           |  83 ++++++++++++
 .../glsl/math-functions.main.Fragment.glsl    |   6 +-
 naga/tests/out/hlsl/math-functions.hlsl       |   6 +-
 naga/tests/out/msl/math-functions.msl         |   6 +-
 naga/tests/out/spv/math-functions.spvasm      | 121 +++++++++---------
 naga/tests/out/wgsl/math-functions.wgsl       |   6 +-
 6 files changed, 148 insertions(+), 80 deletions(-)

diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index 344e2ddba2..deaa9c93c7 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -1233,6 +1233,9 @@ impl<'a> ConstantEvaluator<'a> {
             crate::MathFunction::ReverseBits => {
                 component_wise_concrete_int!(self, span, [arg], |e| { Ok([e.reverse_bits()]) })
             }
+            crate::MathFunction::FirstTrailingBit => {
+                component_wise_concrete_int(self, span, [arg], |ci| Ok(first_trailing_bit(ci)))
+            }
             crate::MathFunction::FirstLeadingBit => {
                 component_wise_concrete_int(self, span, [arg], |ci| Ok(first_leading_bit(ci)))
             }
@@ -2101,6 +2104,86 @@ impl<'a> ConstantEvaluator<'a> {
     }
 }
 
+fn first_trailing_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
+    // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, a value
+    // of 1 means the least significant bit is set. Therefore, an input of `0x[80 00…]` would
+    // return a right-to-left bit index of 0.
+    let trailing_zeros_to_bit_idx = |e: u32| -> u32 {
+        match e {
+            idx @ 0..=31 => idx,
+            32 => u32::MAX,
+            _ => unreachable!(),
+        }
+    };
+    match concrete_int {
+        ConcreteInt::U32([e]) => ConcreteInt::U32([trailing_zeros_to_bit_idx(e.trailing_zeros())]),
+        ConcreteInt::I32([e]) => {
+            ConcreteInt::I32([trailing_zeros_to_bit_idx(e.trailing_zeros()) as i32])
+        }
+    }
+}
+
+#[test]
+fn first_trailing_bit_smoke() {
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([0])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([1])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([2])),
+        ConcreteInt::I32([1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([-1])),
+        ConcreteInt::I32([0]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([i32::MIN])),
+        ConcreteInt::I32([31]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([i32::MAX])),
+        ConcreteInt::I32([0]),
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_trailing_bit(ConcreteInt::I32([1 << idx])),
+            ConcreteInt::I32([idx])
+        )
+    }
+
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([0])),
+        ConcreteInt::U32([u32::MAX])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([1])),
+        ConcreteInt::U32([0])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([2])),
+        ConcreteInt::U32([1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([1 << 31])),
+        ConcreteInt::U32([31]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([u32::MAX])),
+        ConcreteInt::U32([0]),
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_trailing_bit(ConcreteInt::U32([1 << idx])),
+            ConcreteInt::U32([idx])
+        )
+    }
+}
+
 fn first_leading_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
     // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, 1 means
     // the least significant bit is set. Therefore, an input of 1 would return a right-to-left bit
diff --git a/naga/tests/out/glsl/math-functions.main.Fragment.glsl b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
index c10dddff0f..4ab85269e1 100644
--- a/naga/tests/out/glsl/math-functions.main.Fragment.glsl
+++ b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
@@ -67,10 +67,8 @@ void main() {
     int const_dot = ( + ivec2(0).x * ivec2(0).x + ivec2(0).y * ivec2(0).y);
     ivec2 flb_b = ivec2(-1, -1);
     uvec2 flb_c = uvec2(0u, 0u);
-    int ftb_a = findLSB(-1);
-    uint ftb_b = uint(findLSB(1u));
-    ivec2 ftb_c = findLSB(ivec2(-1));
-    uvec2 ftb_d = uvec2(findLSB(uvec2(1u)));
+    ivec2 ftb_c = ivec2(0, 0);
+    uvec2 ftb_d = uvec2(0u, 0u);
     uvec2 ctz_e = uvec2(32u, 32u);
     ivec2 ctz_f = ivec2(32, 32);
     uvec2 ctz_g = uvec2(0u, 0u);
diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl
index 14d1e9e188..a02b2b1280 100644
--- a/naga/tests/out/hlsl/math-functions.hlsl
+++ b/naga/tests/out/hlsl/math-functions.hlsl
@@ -81,10 +81,8 @@ void main()
     int const_dot = dot(ZeroValueint2(), ZeroValueint2());
     int2 flb_b = int2(-1, -1);
     uint2 flb_c = uint2(0u, 0u);
-    int ftb_a = asint(firstbitlow(-1));
-    uint ftb_b = firstbitlow(1u);
-    int2 ftb_c = asint(firstbitlow((-1).xx));
-    uint2 ftb_d = firstbitlow((1u).xx);
+    int2 ftb_c = int2(0, 0);
+    uint2 ftb_d = uint2(0u, 0u);
     uint2 ctz_e = uint2(32u, 32u);
     int2 ctz_f = int2(32, 32);
     uint2 ctz_g = uint2(0u, 0u);
diff --git a/naga/tests/out/msl/math-functions.msl b/naga/tests/out/msl/math-functions.msl
index 271472978a..559002c39b 100644
--- a/naga/tests/out/msl/math-functions.msl
+++ b/naga/tests/out/msl/math-functions.msl
@@ -69,10 +69,8 @@ fragment void main_(
     int const_dot = ( + metal::int2 {}.x * metal::int2 {}.x + metal::int2 {}.y * metal::int2 {}.y);
     metal::int2 flb_b = metal::int2(-1, -1);
     metal::uint2 flb_c = metal::uint2(0u, 0u);
-    int ftb_a = (((metal::ctz(-1) + 1) % 33) - 1);
-    uint ftb_b = (((metal::ctz(1u) + 1) % 33) - 1);
-    metal::int2 ftb_c = (((metal::ctz(metal::int2(-1)) + 1) % 33) - 1);
-    metal::uint2 ftb_d = (((metal::ctz(metal::uint2(1u)) + 1) % 33) - 1);
+    metal::int2 ftb_c = metal::int2(0, 0);
+    metal::uint2 ftb_d = metal::uint2(0u, 0u);
     metal::uint2 ctz_e = metal::uint2(32u, 32u);
     metal::int2 ctz_f = metal::int2(32, 32);
     metal::uint2 ctz_g = metal::uint2(0u, 0u);
diff --git a/naga/tests/out/spv/math-functions.spvasm b/naga/tests/out/spv/math-functions.spvasm
index 2207934cc9..366857f91f 100644
--- a/naga/tests/out/spv/math-functions.spvasm
+++ b/naga/tests/out/spv/math-functions.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 94
+; Bound: 87
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
@@ -44,71 +44,64 @@ OpMemberDecorate %15 1 Offset 16
 %28 = OpConstantComposite  %7  %22 %22
 %29 = OpConstant  %9  0
 %30 = OpConstantComposite  %8  %29 %29
-%31 = OpConstant  %9  1
-%32 = OpConstantComposite  %7  %22 %22
-%33 = OpConstantComposite  %8  %31 %31
-%34 = OpConstant  %9  32
-%35 = OpConstant  %6  32
-%36 = OpConstant  %6  0
-%37 = OpConstantComposite  %8  %34 %34
-%38 = OpConstantComposite  %7  %35 %35
-%39 = OpConstantComposite  %7  %36 %36
-%40 = OpConstant  %9  31
-%41 = OpConstantComposite  %8  %40 %40
-%42 = OpConstant  %6  2
-%43 = OpConstant  %4  2.0
-%44 = OpConstantComposite  %10  %19 %43
-%45 = OpConstant  %6  3
-%46 = OpConstant  %6  4
-%47 = OpConstantComposite  %7  %45 %46
-%48 = OpConstant  %4  1.5
-%49 = OpConstantComposite  %10  %48 %48
-%50 = OpConstantComposite  %3  %48 %48 %48 %48
-%57 = OpConstantComposite  %3  %19 %19 %19 %19
-%60 = OpConstantNull  %6
+%31 = OpConstant  %6  0
+%32 = OpConstantComposite  %7  %31 %31
+%33 = OpConstant  %9  32
+%34 = OpConstant  %6  32
+%35 = OpConstantComposite  %8  %33 %33
+%36 = OpConstantComposite  %7  %34 %34
+%37 = OpConstant  %9  31
+%38 = OpConstantComposite  %8  %37 %37
+%39 = OpConstant  %6  2
+%40 = OpConstant  %4  2.0
+%41 = OpConstantComposite  %10  %19 %40
+%42 = OpConstant  %6  3
+%43 = OpConstant  %6  4
+%44 = OpConstantComposite  %7  %42 %43
+%45 = OpConstant  %4  1.5
+%46 = OpConstantComposite  %10  %45 %45
+%47 = OpConstantComposite  %3  %45 %45 %45 %45
+%54 = OpConstantComposite  %3  %19 %19 %19 %19
+%57 = OpConstantNull  %6
 %17 = OpFunction  %2  None %18
 %16 = OpLabel
-OpBranch %51
-%51 = OpLabel
-%52 = OpExtInst  %4  %1 Degrees %19
-%53 = OpExtInst  %4  %1 Radians %19
-%54 = OpExtInst  %3  %1 Degrees %21
-%55 = OpExtInst  %3  %1 Radians %21
-%56 = OpExtInst  %3  %1 FClamp %21 %21 %57
-%58 = OpExtInst  %3  %1 Refract %21 %21 %19
-%61 = OpCompositeExtract  %6  %26 0
-%62 = OpCompositeExtract  %6  %26 0
-%63 = OpIMul  %6  %61 %62
-%64 = OpIAdd  %6  %60 %63
-%65 = OpCompositeExtract  %6  %26 1
-%66 = OpCompositeExtract  %6  %26 1
-%67 = OpIMul  %6  %65 %66
-%59 = OpIAdd  %6  %64 %67
-%68 = OpExtInst  %6  %1 FindILsb %22
-%69 = OpExtInst  %9  %1 FindILsb %31
-%70 = OpExtInst  %7  %1 FindILsb %32
-%71 = OpExtInst  %8  %1 FindILsb %33
-%72 = OpExtInst  %4  %1 Ldexp %19 %42
-%73 = OpExtInst  %10  %1 Ldexp %44 %47
-%74 = OpExtInst  %11  %1 ModfStruct %48
-%75 = OpExtInst  %11  %1 ModfStruct %48
-%76 = OpCompositeExtract  %4  %75 0
-%77 = OpExtInst  %11  %1 ModfStruct %48
+OpBranch %48
+%48 = OpLabel
+%49 = OpExtInst  %4  %1 Degrees %19
+%50 = OpExtInst  %4  %1 Radians %19
+%51 = OpExtInst  %3  %1 Degrees %21
+%52 = OpExtInst  %3  %1 Radians %21
+%53 = OpExtInst  %3  %1 FClamp %21 %21 %54
+%55 = OpExtInst  %3  %1 Refract %21 %21 %19
+%58 = OpCompositeExtract  %6  %26 0
+%59 = OpCompositeExtract  %6  %26 0
+%60 = OpIMul  %6  %58 %59
+%61 = OpIAdd  %6  %57 %60
+%62 = OpCompositeExtract  %6  %26 1
+%63 = OpCompositeExtract  %6  %26 1
+%64 = OpIMul  %6  %62 %63
+%56 = OpIAdd  %6  %61 %64
+%65 = OpExtInst  %4  %1 Ldexp %19 %39
+%66 = OpExtInst  %10  %1 Ldexp %41 %44
+%67 = OpExtInst  %11  %1 ModfStruct %45
+%68 = OpExtInst  %11  %1 ModfStruct %45
+%69 = OpCompositeExtract  %4  %68 0
+%70 = OpExtInst  %11  %1 ModfStruct %45
+%71 = OpCompositeExtract  %4  %70 1
+%72 = OpExtInst  %12  %1 ModfStruct %46
+%73 = OpExtInst  %13  %1 ModfStruct %47
+%74 = OpCompositeExtract  %3  %73 1
+%75 = OpCompositeExtract  %4  %74 0
+%76 = OpExtInst  %12  %1 ModfStruct %46
+%77 = OpCompositeExtract  %10  %76 0
 %78 = OpCompositeExtract  %4  %77 1
-%79 = OpExtInst  %12  %1 ModfStruct %49
-%80 = OpExtInst  %13  %1 ModfStruct %50
-%81 = OpCompositeExtract  %3  %80 1
-%82 = OpCompositeExtract  %4  %81 0
-%83 = OpExtInst  %12  %1 ModfStruct %49
-%84 = OpCompositeExtract  %10  %83 0
-%85 = OpCompositeExtract  %4  %84 1
-%86 = OpExtInst  %14  %1 FrexpStruct %48
-%87 = OpExtInst  %14  %1 FrexpStruct %48
-%88 = OpCompositeExtract  %4  %87 0
-%89 = OpExtInst  %14  %1 FrexpStruct %48
-%90 = OpCompositeExtract  %6  %89 1
-%91 = OpExtInst  %15  %1 FrexpStruct %50
-%92 = OpCompositeExtract  %5  %91 1
-%93 = OpCompositeExtract  %6  %92 0
+%79 = OpExtInst  %14  %1 FrexpStruct %45
+%80 = OpExtInst  %14  %1 FrexpStruct %45
+%81 = OpCompositeExtract  %4  %80 0
+%82 = OpExtInst  %14  %1 FrexpStruct %45
+%83 = OpCompositeExtract  %6  %82 1
+%84 = OpExtInst  %15  %1 FrexpStruct %47
+%85 = OpCompositeExtract  %5  %84 1
+%86 = OpCompositeExtract  %6  %85 0
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/math-functions.wgsl b/naga/tests/out/wgsl/math-functions.wgsl
index c97ce6a4a2..2271bb9cb0 100644
--- a/naga/tests/out/wgsl/math-functions.wgsl
+++ b/naga/tests/out/wgsl/math-functions.wgsl
@@ -12,10 +12,8 @@ fn main() {
     let const_dot = dot(vec2<i32>(), vec2<i32>());
     let flb_b = vec2<i32>(-1i, -1i);
     let flb_c = vec2<u32>(0u, 0u);
-    let ftb_a = firstTrailingBit(-1i);
-    let ftb_b = firstTrailingBit(1u);
-    let ftb_c = firstTrailingBit(vec2(-1i));
-    let ftb_d = firstTrailingBit(vec2(1u));
+    let ftb_c = vec2<i32>(0i, 0i);
+    let ftb_d = vec2<u32>(0u, 0u);
     let ctz_e = vec2<u32>(32u, 32u);
     let ctz_f = vec2<i32>(32i, 32i);
     let ctz_g = vec2<u32>(0u, 0u);

From fa93676991ddc06ccafa9b7a313080184423b249 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Tue, 23 Jul 2024 12:45:48 -0400
Subject: [PATCH 087/226] docs(CHANGELOG): add entry for const. eval of
 `first{Leading,Trailing}Bit`

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c9eccafcda..9567c74f0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,12 @@ Bottom level categories:
 
 ## Unreleased
 
+### New Features
+
+#### Naga
+
+* Support constant evaluation for `firstLeadingBit` and `firstTrailingBit` numeric built-ins in WGSL. Front-ends that translate to these built-ins also benefit from constant evaluation. By @ErichDonGubler in [#5101](https://github.com/gfx-rs/wgpu/pull/5101).
+
 ### Bug Fixes
 
 #### General

From 62333a573e07a62108921412565cc80466415069 Mon Sep 17 00:00:00 2001
From: renshuncui <renshun@111.com>
Date: Wed, 24 Jul 2024 23:48:51 +0900
Subject: [PATCH 088/226] chore: fix some comments (#6033)

Signed-off-by: renshuncui <renshun@111.com>
Co-authored-by: Erich Gubler <erichdongubler@gmail.com>
---
 examples/src/hello_synchronization/README.md | 2 +-
 naga/src/back/glsl/features.rs               | 2 +-
 naga/src/back/glsl/mod.rs                    | 2 +-
 naga/src/back/msl/writer.rs                  | 2 +-
 naga/src/front/mod.rs                        | 2 +-
 naga/src/front/spv/mod.rs                    | 2 +-
 naga/src/lib.rs                              | 2 +-
 wgpu-types/src/lib.rs                        | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/src/hello_synchronization/README.md b/examples/src/hello_synchronization/README.md
index 5750801f14..5367213eec 100644
--- a/examples/src/hello_synchronization/README.md
+++ b/examples/src/hello_synchronization/README.md
@@ -2,7 +2,7 @@
 
 This example is 
 1. A small demonstration of the importance of synchronization.
-2. How basic synchronization you can understand from the CPU is preformed on the GPU.
+2. How basic synchronization you can understand from the CPU is performed on the GPU.
 
 ## To Run
 
diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs
index 0478e01351..b6ad1738fe 100644
--- a/naga/src/back/glsl/features.rs
+++ b/naga/src/back/glsl/features.rs
@@ -447,7 +447,7 @@ impl<'a, W> Writer<'a, W> {
             ..
         } = self;
 
-        // Loop trough all expressions in both functions and the entry point
+        // Loop through all expressions in both functions and the entry point
         // to check for needed features
         for (expressions, info) in module
             .functions
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index fe70480544..99b0fc7150 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -1875,7 +1875,7 @@ impl<'a, W: Write> Writer<'a, W> {
         // with different precedences from applying earlier.
         write!(self.out, "(")?;
 
-        // Cycle trough all the components of the vector
+        // Cycle through all the components of the vector
         for index in 0..size {
             let component = back::COMPONENTS[index];
             // Write the addition to the previous product
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index b112bb369a..ad3dd69ebe 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -1235,7 +1235,7 @@ impl<W: Write> Writer<W> {
         // with different precedences from applying earlier.
         write!(self.out, "(")?;
 
-        // Cycle trough all the components of the vector
+        // Cycle through all the components of the vector
         for index in 0..size {
             let component = back::COMPONENTS[index];
             // Write the addition to the previous product
diff --git a/naga/src/front/mod.rs b/naga/src/front/mod.rs
index 3f602f3dd0..11c8aa047e 100644
--- a/naga/src/front/mod.rs
+++ b/naga/src/front/mod.rs
@@ -275,7 +275,7 @@ where
         Name: std::borrow::Borrow<Q>,
         Q: std::hash::Hash + Eq + ?Sized,
     {
-        // Iterate backwards trough the scopes and try to find the variable
+        // Iterate backwards through the scopes and try to find the variable
         for scope in self.scopes[..self.cursor].iter().rev() {
             if let Some(var) = scope.get(name) {
                 return Some(var);
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index 809aff7674..7dfb4ae293 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -3460,7 +3460,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                             .insert(target, (case_body_idx, vec![literal as i32]));
                     }
 
-                    // Loop trough the collected target blocks creating a new case for each
+                    // Loop through the collected target blocks creating a new case for each
                     // literal pointing to it, only one case will have the true body and all the
                     // others will be empty fallthrough so that they all execute the same body
                     // without duplicating code.
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 94edec9159..c356a2cf03 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -1337,7 +1337,7 @@ bitflags::bitflags! {
         const STORAGE = 1 << 0;
         /// Barrier affects all [`AddressSpace::WorkGroup`] accesses.
         const WORK_GROUP = 1 << 1;
-        /// Barrier synchronizes execution across all invocations within a subgroup that exectue this instruction.
+        /// Barrier synchronizes execution across all invocations within a subgroup that execute this instruction.
         const SUB_GROUP = 1 << 2;
     }
 }
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index dbe3a010b1..c6b91f1e12 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -4848,7 +4848,7 @@ pub enum StencilOperation {
 pub struct StencilFaceState {
     /// Comparison function that determines if the fail_op or pass_op is used on the stencil buffer.
     pub compare: CompareFunction,
-    /// Operation that is preformed when stencil test fails.
+    /// Operation that is performed when stencil test fails.
     pub fail_op: StencilOperation,
     /// Operation that is performed when depth test fails but stencil test succeeds.
     pub depth_fail_op: StencilOperation,

From ea81a24414df6569a4bb4ddf4f7ead6eac90dd26 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Tue, 23 Jul 2024 13:26:56 +0700
Subject: [PATCH 089/226] naga: Remove feature `std` for `indexmap`

This was added in https://github.com/gfx-rs/naga/pull/2062

This was needed before version 2, but not in version 2, so it
should be safe to remove now as it is enabled by default.
---
 naga/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 2d54de8c65..cf9f14373c 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -67,7 +67,7 @@ termcolor = { version = "1.4.1" }
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
 rustc-hash = "1.1.0"
-indexmap = { version = "2", features = ["std"] }
+indexmap = "2"
 log = "0.4"
 spirv = { version = "0.3", optional = true }
 thiserror = "1.0.63"

From 4f020573594b20e486600b79fcc2075b0e8da76d Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Sat, 20 Jul 2024 10:06:41 +0700
Subject: [PATCH 090/226] Convert some module doc comments

Some module doc comments were using `/*! ... */` syntax and had
leading ` *` prefixes on each line. This interferes with the
tracking of `clippy::doc_lazy_continuation`, so switch those over
to `//!` style comment blocks.

This leaves `/*! ... */` blocks alone which didn't prefix each
line.
---
 player/src/bin/play.rs           |   3 +-
 player/src/lib.rs                |   5 +-
 player/tests/test.rs             |  20 +-
 wgpu-core/src/track/buffer.rs    |  11 +-
 wgpu-core/src/track/stateless.rs |   9 +-
 wgpu-core/src/track/texture.rs   |  39 ++-
 wgpu-hal/src/lib.rs              | 407 +++++++++++++++----------------
 wgpu-types/src/lib.rs            |   5 +-
 8 files changed, 246 insertions(+), 253 deletions(-)

diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs
index 5c438dd20d..6510ab23cd 100644
--- a/player/src/bin/play.rs
+++ b/player/src/bin/play.rs
@@ -1,5 +1,4 @@
-/*! This is a player for WebGPU traces.
-!*/
+//! This is a player for WebGPU traces.
 
 #[cfg(not(target_arch = "wasm32"))]
 fn main() {
diff --git a/player/src/lib.rs b/player/src/lib.rs
index de56b16888..cf89b2469d 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -1,6 +1,5 @@
-/*! This is a player library for WebGPU traces.
- *
-!*/
+//! This is a player library for WebGPU traces.
+
 #![cfg(not(target_arch = "wasm32"))]
 #![warn(unsafe_op_in_unsafe_fn)]
 
diff --git a/player/tests/test.rs b/player/tests/test.rs
index 864f9429a9..a0df6f638c 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -1,13 +1,13 @@
-/*! Tester for WebGPU
- *  It enumerates the available backends on the system,
- *  and run the tests through them.
- *
- *  Test requirements:
- *    - all IDs have the backend `Empty`
- *    - all expected buffers have `MAP_READ` usage
- *    - last action is `Submit`
- *    - no swapchain use
-!*/
+//! Tester for WebGPU
+//!  It enumerates the available backends on the system,
+//!  and run the tests through them.
+//!
+//!  Test requirements:
+//!    - all IDs have the backend `Empty`
+//!    - all expected buffers have `MAP_READ` usage
+//!    - last action is `Submit`
+//!    - no swapchain use
+
 #![cfg(not(target_arch = "wasm32"))]
 
 use player::GlobalPlay;
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index afb20e149d..ed95f9ce8a 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -1,9 +1,8 @@
-/*! Buffer Trackers
- *
- * Buffers are represented by a single state for the whole resource,
- * a 16 bit bitflag of buffer usages. Because there is only ever
- * one subresource, they have no selector.
-!*/
+//! Buffer Trackers
+//!
+//! Buffers are represented by a single state for the whole resource,
+//! a 16 bit bitflag of buffer usages. Because there is only ever
+//! one subresource, they have no selector.
 
 use std::sync::{Arc, Weak};
 
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 7d8d904d2a..91cdc0fa36 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -1,8 +1,7 @@
-/*! Stateless Trackers
- *
- * Stateless trackers don't have any state, so make no
- * distinction between a usage scope and a full tracker.
-!*/
+//! Stateless Trackers
+//!
+//! Stateless trackers don't have any state, so make no
+//! distinction between a usage scope and a full tracker.
 
 use std::sync::Arc;
 
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index bad216db19..73321687cb 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -1,23 +1,22 @@
-/*! Texture Trackers
- *
- * Texture trackers are significantly more complicated than
- * the buffer trackers because textures can be in a "complex"
- * state where each individual subresource can potentially be
- * in a different state from every other subtresource. These
- * complex states are stored separately from the simple states
- * because they are signifignatly more difficult to track and
- * most resources spend the vast majority of their lives in
- * simple states.
- *
- * There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`.
- * - `UNKNOWN` is only used in complex states and is used to signify
- *   that the complex state does not know anything about those subresources.
- *   It cannot leak into transitions, it is invalid to transition into UNKNOWN
- *   state.
- * - `UNINITIALIZED` is used in both simple and complex states to mean the texture
- *   is known to be in some undefined state. Any transition away from UNINITIALIZED
- *   will treat the contents as junk.
-!*/
+//! Texture Trackers
+//!
+//! Texture trackers are significantly more complicated than
+//! the buffer trackers because textures can be in a "complex"
+//! state where each individual subresource can potentially be
+//! in a different state from every other subtresource. These
+//! complex states are stored separately from the simple states
+//! because they are signifignatly more difficult to track and
+//! most resources spend the vast majority of their lives in
+//! simple states.
+//!
+//! There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`.
+//! - `UNKNOWN` is only used in complex states and is used to signify
+//!   that the complex state does not know anything about those subresources.
+//!   It cannot leak into transitions, it is invalid to transition into UNKNOWN
+//!   state.
+//! - `UNINITIALIZED` is used in both simple and complex states to mean the texture
+//!   is known to be in some undefined state. Any transition away from UNINITIALIZED
+//!   will treat the contents as junk.
 
 use super::{range::RangedStates, PendingTransition, PendingTransitionList, TrackerIndex};
 use crate::{
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 706c369eb5..51eec1a82b 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1,207 +1,206 @@
-/*! A cross-platform unsafe graphics abstraction.
- *
- * This crate defines a set of traits abstracting over modern graphics APIs,
- * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
- *
- * `wgpu-hal` is a spiritual successor to
- * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
- * oriented towards WebGPU implementation goals. It has no overhead for
- * validation or tracking, and the API translation overhead is kept to the bare
- * minimum by the design of WebGPU. This API can be used for resource-demanding
- * applications and engines.
- *
- * The `wgpu-hal` crate's main design choices:
- *
- * - Our traits are meant to be *portable*: proper use
- *   should get equivalent results regardless of the backend.
- *
- * - Our traits' contracts are *unsafe*: implementations perform minimal
- *   validation, if any, and incorrect use will often cause undefined behavior.
- *   This allows us to minimize the overhead we impose over the underlying
- *   graphics system. If you need safety, the [`wgpu-core`] crate provides a
- *   safe API for driving `wgpu-hal`, implementing all necessary validation,
- *   resource state tracking, and so on. (Note that `wgpu-core` is designed for
- *   use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
- *   `wgpu-core`.) Or, you can do your own validation.
- *
- * - In the same vein, returned errors *only cover cases the user can't
- *   anticipate*, like running out of memory or losing the device. Any errors
- *   that the user could reasonably anticipate are their responsibility to
- *   avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
- *   not mappable: as the buffer creator, the user should already know if they
- *   can map it.
- *
- * - We use *static dispatch*. The traits are not
- *   generally object-safe. You must select a specific backend type
- *   like [`vulkan::Api`] or [`metal::Api`], and then use that
- *   according to the main traits, or call backend-specific methods.
- *
- * - We use *idiomatic Rust parameter passing*,
- *   taking objects by reference, returning them by value, and so on,
- *   unlike `wgpu-core`, which refers to objects by ID.
- *
- * - We map buffer contents *persistently*. This means that the buffer can
- *   remain mapped on the CPU while the GPU reads or writes to it. You must
- *   explicitly indicate when data might need to be transferred between CPU and
- *   GPU, if [`Device::map_buffer`] indicates that this is necessary.
- *
- * - You must record *explicit barriers* between different usages of a
- *   resource. For example, if a buffer is written to by a compute
- *   shader, and then used as and index buffer to a draw call, you
- *   must use [`CommandEncoder::transition_buffers`] between those two
- *   operations.
- *
- * - Pipeline layouts are *explicitly specified* when setting bind
- *   group. Incompatible layouts disturb groups bound at higher indices.
- *
- * - The API *accepts collections as iterators*, to avoid forcing the user to
- *   store data in particular containers. The implementation doesn't guarantee
- *   that any of the iterators are drained, unless stated otherwise by the
- *   function documentation. For this reason, we recommend that iterators don't
- *   do any mutating work.
- *
- * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
- * Ideally, all trait methods would have doc comments setting out the
- * requirements users must meet to ensure correct and portable behavior. If you
- * are aware of a specific requirement that a backend imposes that is not
- * ensured by the traits' documented rules, please file an issue. Or, if you are
- * a capable technical writer, please file a pull request!
- *
- * [`wgpu-core`]: https://crates.io/crates/wgpu-core
- * [`wgpu`]: https://crates.io/crates/wgpu
- * [`vulkan::Api`]: vulkan/struct.Api.html
- * [`metal::Api`]: metal/struct.Api.html
- *
- * ## Primary backends
- *
- * The `wgpu-hal` crate has full-featured backends implemented on the following
- * platform graphics APIs:
- *
- * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
- *   Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
- *
- * - Metal on macOS, using the [`metal`] crate's bindings.
- *
- * - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
- *
- * [`ash`]: https://crates.io/crates/ash
- * [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
- * [`metal`]: https://crates.io/crates/metal
- * [`d3d12`]: ahttps://crates.io/crates/d3d12
- *
- * ## Secondary backends
- *
- * The `wgpu-hal` crate has a partial implementation based on the following
- * platform graphics API:
- *
- * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
- *   available. See the [`gles`] module documentation for details.
- *
- * [`gles`]: gles/index.html
- *
- * You can see what capabilities an adapter is missing by checking the
- * [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
- * from [`Instance::enumerate_adapters`].
- *
- * The API is generally designed to fit the primary backends better than the
- * secondary backends, so the latter may impose more overhead.
- *
- * [tdc]: wgt::DownlevelCapabilities
- *
- * ## Traits
- *
- * The `wgpu-hal` crate defines a handful of traits that together
- * represent a cross-platform abstraction for modern GPU APIs.
- *
- * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
- *   own, only a collection of associated types.
- *
- * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
- *   creates an instance value, which you can use to enumerate the adapters
- *   available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
- *   returns an instance that can enumerate the Vulkan physical devices on your
- *   system.
- *
- * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
- *   particular device from a particular backend. For example, a Vulkan instance
- *   might have a Lavapipe software adapter and a GPU-based adapter.
- *
- * - [`Api::Device`] implements the [`Device`] trait, representing an active
- *   link to a device. You get a device value by calling [`Adapter::open`], and
- *   then use it to create buffers, textures, shader modules, and so on.
- *
- * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
- *   command buffers to a given device.
- *
- * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
- *   use to build buffers of commands to submit to a queue. This has all the
- *   methods for drawing and running compute shaders, which is presumably what
- *   you're here for.
- *
- * - [`Api::Surface`] implements the [`Surface`] trait, which represents a
- *   swapchain for presenting images on the screen, via interaction with the
- *   system's window manager.
- *
- * The [`Api`] trait has various other associated types like [`Api::Buffer`] and
- * [`Api::Texture`] that represent resources the rest of the interface can
- * operate on, but these generally do not have their own traits.
- *
- * [Ii]: Instance::init
- *
- * ## Validation is the calling code's responsibility, not `wgpu-hal`'s
- *
- * As much as possible, `wgpu-hal` traits place the burden of validation,
- * resource tracking, and state tracking on the caller, not on the trait
- * implementations themselves. Anything which can reasonably be handled in
- * backend-independent code should be. A `wgpu_hal` backend's sole obligation is
- * to provide portable behavior, and report conditions that the calling code
- * can't reasonably anticipate, like device loss or running out of memory.
- *
- * The `wgpu` crate collection is intended for use in security-sensitive
- * applications, like web browsers, where the API is available to untrusted
- * code. This means that `wgpu-core`'s validation is not simply a service to
- * developers, to be provided opportunistically when the performance costs are
- * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
- * validation must be exhaustive, to ensure that even malicious content cannot
- * provoke and exploit undefined behavior in the platform's graphics API.
- *
- * Because graphics APIs' requirements are complex, the only practical way for
- * `wgpu` to provide exhaustive validation is to comprehensively track the
- * lifetime and state of all the resources in the system. Implementing this
- * separately for each backend is infeasible; effort would be better spent
- * making the cross-platform validation in `wgpu-core` legible and trustworthy.
- * Fortunately, the requirements are largely similar across the various
- * platforms, so cross-platform validation is practical.
- *
- * Some backends have specific requirements that aren't practical to foist off
- * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
- * Microsoft COM reference counts is best handled by using appropriate pointer
- * types within the backend.
- *
- * A desire for "defense in depth" may suggest performing additional validation
- * in `wgpu-hal` when the opportunity arises, but this must be done with
- * caution. Even experienced contributors infer the expectations their changes
- * must meet by considering not just requirements made explicit in types, tests,
- * assertions, and comments, but also those implicit in the surrounding code.
- * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
- * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
- * about it - that would be redundant!" The responsibility for exhaustive
- * validation always rests with `wgpu-core`, regardless of what may or may not
- * be checked in `wgpu-hal`.
- *
- * To this end, any "defense in depth" validation that does appear in `wgpu-hal`
- * for requirements that `wgpu-core` should have enforced should report failure
- * via the `unreachable!` macro, because problems detected at this stage always
- * indicate a bug in `wgpu-core`.
- *
- * ## Debugging
- *
- * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
- * page still applies to this API, with the exception of API tracing/replay
- * functionality, which is only available in `wgpu-core`.
- *
- * [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
- */
+//! A cross-platform unsafe graphics abstraction.
+//!
+//! This crate defines a set of traits abstracting over modern graphics APIs,
+//! with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
+//!
+//! `wgpu-hal` is a spiritual successor to
+//! [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
+//! oriented towards WebGPU implementation goals. It has no overhead for
+//! validation or tracking, and the API translation overhead is kept to the bare
+//! minimum by the design of WebGPU. This API can be used for resource-demanding
+//! applications and engines.
+//!
+//! The `wgpu-hal` crate's main design choices:
+//!
+//! - Our traits are meant to be *portable*: proper use
+//!   should get equivalent results regardless of the backend.
+//!
+//! - Our traits' contracts are *unsafe*: implementations perform minimal
+//!   validation, if any, and incorrect use will often cause undefined behavior.
+//!   This allows us to minimize the overhead we impose over the underlying
+//!   graphics system. If you need safety, the [`wgpu-core`] crate provides a
+//!   safe API for driving `wgpu-hal`, implementing all necessary validation,
+//!   resource state tracking, and so on. (Note that `wgpu-core` is designed for
+//!   use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
+//!   `wgpu-core`.) Or, you can do your own validation.
+//!
+//! - In the same vein, returned errors *only cover cases the user can't
+//!   anticipate*, like running out of memory or losing the device. Any errors
+//!   that the user could reasonably anticipate are their responsibility to
+//!   avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
+//!   not mappable: as the buffer creator, the user should already know if they
+//!   can map it.
+//!
+//! - We use *static dispatch*. The traits are not
+//!   generally object-safe. You must select a specific backend type
+//!   like [`vulkan::Api`] or [`metal::Api`], and then use that
+//!   according to the main traits, or call backend-specific methods.
+//!
+//! - We use *idiomatic Rust parameter passing*,
+//!   taking objects by reference, returning them by value, and so on,
+//!   unlike `wgpu-core`, which refers to objects by ID.
+//!
+//! - We map buffer contents *persistently*. This means that the buffer can
+//!   remain mapped on the CPU while the GPU reads or writes to it. You must
+//!   explicitly indicate when data might need to be transferred between CPU and
+//!   GPU, if [`Device::map_buffer`] indicates that this is necessary.
+//!
+//! - You must record *explicit barriers* between different usages of a
+//!   resource. For example, if a buffer is written to by a compute
+//!   shader, and then used as and index buffer to a draw call, you
+//!   must use [`CommandEncoder::transition_buffers`] between those two
+//!   operations.
+//!
+//! - Pipeline layouts are *explicitly specified* when setting bind
+//!   group. Incompatible layouts disturb groups bound at higher indices.
+//!
+//! - The API *accepts collections as iterators*, to avoid forcing the user to
+//!   store data in particular containers. The implementation doesn't guarantee
+//!   that any of the iterators are drained, unless stated otherwise by the
+//!   function documentation. For this reason, we recommend that iterators don't
+//!   do any mutating work.
+//!
+//! Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
+//! Ideally, all trait methods would have doc comments setting out the
+//! requirements users must meet to ensure correct and portable behavior. If you
+//! are aware of a specific requirement that a backend imposes that is not
+//! ensured by the traits' documented rules, please file an issue. Or, if you are
+//! a capable technical writer, please file a pull request!
+//!
+//! [`wgpu-core`]: https://crates.io/crates/wgpu-core
+//! [`wgpu`]: https://crates.io/crates/wgpu
+//! [`vulkan::Api`]: vulkan/struct.Api.html
+//! [`metal::Api`]: metal/struct.Api.html
+//!
+//! ## Primary backends
+//!
+//! The `wgpu-hal` crate has full-featured backends implemented on the following
+//! platform graphics APIs:
+//!
+//! - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
+//!   Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
+//!
+//! - Metal on macOS, using the [`metal`] crate's bindings.
+//!
+//! - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
+//!
+//! [`ash`]: https://crates.io/crates/ash
+//! [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
+//! [`metal`]: https://crates.io/crates/metal
+//! [`d3d12`]: ahttps://crates.io/crates/d3d12
+//!
+//! ## Secondary backends
+//!
+//! The `wgpu-hal` crate has a partial implementation based on the following
+//! platform graphics API:
+//!
+//! - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
+//!   available. See the [`gles`] module documentation for details.
+//!
+//! [`gles`]: gles/index.html
+//!
+//! You can see what capabilities an adapter is missing by checking the
+//! [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
+//! from [`Instance::enumerate_adapters`].
+//!
+//! The API is generally designed to fit the primary backends better than the
+//! secondary backends, so the latter may impose more overhead.
+//!
+//! [tdc]: wgt::DownlevelCapabilities
+//!
+//! ## Traits
+//!
+//! The `wgpu-hal` crate defines a handful of traits that together
+//! represent a cross-platform abstraction for modern GPU APIs.
+//!
+//! - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
+//!   own, only a collection of associated types.
+//!
+//! - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
+//!   creates an instance value, which you can use to enumerate the adapters
+//!   available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
+//!   returns an instance that can enumerate the Vulkan physical devices on your
+//!   system.
+//!
+//! - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
+//!   particular device from a particular backend. For example, a Vulkan instance
+//!   might have a Lavapipe software adapter and a GPU-based adapter.
+//!
+//! - [`Api::Device`] implements the [`Device`] trait, representing an active
+//!   link to a device. You get a device value by calling [`Adapter::open`], and
+//!   then use it to create buffers, textures, shader modules, and so on.
+//!
+//! - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
+//!   command buffers to a given device.
+//!
+//! - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
+//!   use to build buffers of commands to submit to a queue. This has all the
+//!   methods for drawing and running compute shaders, which is presumably what
+//!   you're here for.
+//!
+//! - [`Api::Surface`] implements the [`Surface`] trait, which represents a
+//!   swapchain for presenting images on the screen, via interaction with the
+//!   system's window manager.
+//!
+//! The [`Api`] trait has various other associated types like [`Api::Buffer`] and
+//! [`Api::Texture`] that represent resources the rest of the interface can
+//! operate on, but these generally do not have their own traits.
+//!
+//! [Ii]: Instance::init
+//!
+//! ## Validation is the calling code's responsibility, not `wgpu-hal`'s
+//!
+//! As much as possible, `wgpu-hal` traits place the burden of validation,
+//! resource tracking, and state tracking on the caller, not on the trait
+//! implementations themselves. Anything which can reasonably be handled in
+//! backend-independent code should be. A `wgpu_hal` backend's sole obligation is
+//! to provide portable behavior, and report conditions that the calling code
+//! can't reasonably anticipate, like device loss or running out of memory.
+//!
+//! The `wgpu` crate collection is intended for use in security-sensitive
+//! applications, like web browsers, where the API is available to untrusted
+//! code. This means that `wgpu-core`'s validation is not simply a service to
+//! developers, to be provided opportunistically when the performance costs are
+//! acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
+//! validation must be exhaustive, to ensure that even malicious content cannot
+//! provoke and exploit undefined behavior in the platform's graphics API.
+//!
+//! Because graphics APIs' requirements are complex, the only practical way for
+//! `wgpu` to provide exhaustive validation is to comprehensively track the
+//! lifetime and state of all the resources in the system. Implementing this
+//! separately for each backend is infeasible; effort would be better spent
+//! making the cross-platform validation in `wgpu-core` legible and trustworthy.
+//! Fortunately, the requirements are largely similar across the various
+//! platforms, so cross-platform validation is practical.
+//!
+//! Some backends have specific requirements that aren't practical to foist off
+//! on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
+//! Microsoft COM reference counts is best handled by using appropriate pointer
+//! types within the backend.
+//!
+//! A desire for "defense in depth" may suggest performing additional validation
+//! in `wgpu-hal` when the opportunity arises, but this must be done with
+//! caution. Even experienced contributors infer the expectations their changes
+//! must meet by considering not just requirements made explicit in types, tests,
+//! assertions, and comments, but also those implicit in the surrounding code.
+//! When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
+//! to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
+//! about it - that would be redundant!" The responsibility for exhaustive
+//! validation always rests with `wgpu-core`, regardless of what may or may not
+//! be checked in `wgpu-hal`.
+//!
+//! To this end, any "defense in depth" validation that does appear in `wgpu-hal`
+//! for requirements that `wgpu-core` should have enforced should report failure
+//! via the `unreachable!` macro, because problems detected at this stage always
+//! indicate a bug in `wgpu-core`.
+//!
+//! ## Debugging
+//!
+//! Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
+//! page still applies to this API, with the exception of API tracing/replay
+//! functionality, which is only available in `wgpu-core`.
+//!
+//! [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
 
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
 #![allow(
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index c6b91f1e12..456551b1c0 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1,6 +1,5 @@
-/*! This library describes the API surface of WebGPU that is agnostic of the backend.
- *  This API is used for targeting both Web and Native.
- */
+//! This library describes the API surface of WebGPU that is agnostic of the backend.
+//! This API is used for targeting both Web and Native.
 
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
 #![allow(

From 2897fb58db2d9d1ef0d74b374d5f6630f2f29376 Mon Sep 17 00:00:00 2001
From: Samson <16504129+sagudev@users.noreply.github.com>
Date: Wed, 24 Jul 2024 17:50:18 +0200
Subject: [PATCH 091/226] Error instead of panic in check bind (#6012)

Removed zipping of binding entries introduced in 4a19ac279c4f81aacedb1d215c884c10fe115275 (to make sure binding numbers actually match) and add unknown error for fallback.
---
 CHANGELOG.md                  |  1 +
 wgpu-core/src/command/bind.rs | 69 ++++++++++++++++++-----------------
 wgpu-core/src/lib.rs          |  1 -
 wgpu-core/src/utils.rs        | 54 ---------------------------
 4 files changed, 36 insertions(+), 89 deletions(-)
 delete mode 100644 wgpu-core/src/utils.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9567c74f0f..d619a2e475 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,7 @@ Bottom level categories:
 - As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
 - Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
 - Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
+- Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
 
 ## 22.0.0 (2024-07-17)
 
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 64d534b558..73f1d9fe17 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -142,49 +142,50 @@ mod compat {
 
                         let mut errors = Vec::new();
 
-                        let mut expected_bgl_entries = expected_bgl.entries.iter();
-                        let mut assigned_bgl_entries = assigned_bgl.entries.iter();
-                        let zipped = crate::utils::ZipWithProperAdvance::new(
-                            &mut expected_bgl_entries,
-                            &mut assigned_bgl_entries,
-                        );
-
-                        for ((&binding, expected_entry), (_, assigned_entry)) in zipped {
-                            if assigned_entry.visibility != expected_entry.visibility {
-                                errors.push(EntryError::Visibility {
-                                    binding,
-                                    expected: expected_entry.visibility,
-                                    assigned: assigned_entry.visibility,
-                                });
-                            }
-                            if assigned_entry.ty != expected_entry.ty {
-                                errors.push(EntryError::Type {
-                                    binding,
-                                    expected: expected_entry.ty,
-                                    assigned: assigned_entry.ty,
-                                });
-                            }
-                            if assigned_entry.count != expected_entry.count {
-                                errors.push(EntryError::Count {
-                                    binding,
-                                    expected: expected_entry.count,
-                                    assigned: assigned_entry.count,
-                                });
+                        for (&binding, expected_entry) in expected_bgl.entries.iter() {
+                            if let Some(assigned_entry) = assigned_bgl.entries.get(binding) {
+                                if assigned_entry.visibility != expected_entry.visibility {
+                                    errors.push(EntryError::Visibility {
+                                        binding,
+                                        expected: expected_entry.visibility,
+                                        assigned: assigned_entry.visibility,
+                                    });
+                                }
+                                if assigned_entry.ty != expected_entry.ty {
+                                    errors.push(EntryError::Type {
+                                        binding,
+                                        expected: expected_entry.ty,
+                                        assigned: assigned_entry.ty,
+                                    });
+                                }
+                                if assigned_entry.count != expected_entry.count {
+                                    errors.push(EntryError::Count {
+                                        binding,
+                                        expected: expected_entry.count,
+                                        assigned: assigned_entry.count,
+                                    });
+                                }
+                            } else {
+                                errors.push(EntryError::ExtraExpected { binding });
                             }
                         }
 
-                        for (&binding, _) in expected_bgl_entries {
-                            errors.push(EntryError::ExtraExpected { binding });
+                        for (&binding, _) in assigned_bgl.entries.iter() {
+                            if !expected_bgl.entries.contains_key(binding) {
+                                errors.push(EntryError::ExtraAssigned { binding });
+                            }
                         }
 
-                        for (&binding, _) in assigned_bgl_entries {
-                            errors.push(EntryError::ExtraAssigned { binding });
-                        }
+                        #[derive(Clone, Debug, Error)]
+                        #[error("Unknown reason")]
+                        struct Unknown();
 
                         Err(Error::Incompatible {
                             expected_bgl: expected_bgl.error_ident(),
                             assigned_bgl: assigned_bgl.error_ident(),
-                            inner: MultiError::new(errors.drain(..)).unwrap(),
+                            inner: MultiError::new(errors.drain(..)).unwrap_or_else(|| {
+                                MultiError::new(core::iter::once(Unknown())).unwrap()
+                            }),
                         })
                     }
                 } else {
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 7bc6cfcefe..b192bc8670 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -85,7 +85,6 @@ pub mod resource;
 mod snatch;
 pub mod storage;
 mod track;
-mod utils;
 // This is public for users who pre-compile shaders while still wanting to
 // preserve all run-time checks that `wgpu-core` does.
 // See <https://github.com/gfx-rs/wgpu/issues/3103>, after which this can be
diff --git a/wgpu-core/src/utils.rs b/wgpu-core/src/utils.rs
deleted file mode 100644
index cf61e797e2..0000000000
--- a/wgpu-core/src/utils.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-/// If the first iterator is longer than the second, the zip implementation
-/// in the standard library will still advance the the first iterator before
-/// realizing that the second iterator has finished.
-///
-/// This implementation will advance the shorter iterator first avoiding
-/// the issue above.
-///
-/// If you can guarantee that the first iterator is always shorter than the
-/// second, you should use the zip impl in stdlib.
-pub(crate) struct ZipWithProperAdvance<
-    A: ExactSizeIterator<Item = IA>,
-    B: ExactSizeIterator<Item = IB>,
-    IA,
-    IB,
-> {
-    a: A,
-    b: B,
-    iter_a_first: bool,
-}
-
-impl<A: ExactSizeIterator<Item = IA>, B: ExactSizeIterator<Item = IB>, IA, IB>
-    ZipWithProperAdvance<A, B, IA, IB>
-{
-    pub(crate) fn new(a: A, b: B) -> Self {
-        let iter_a_first = a.len() <= b.len();
-        Self { a, b, iter_a_first }
-    }
-}
-
-impl<A: ExactSizeIterator<Item = IA>, B: ExactSizeIterator<Item = IB>, IA, IB> Iterator
-    for ZipWithProperAdvance<A, B, IA, IB>
-{
-    type Item = (IA, IB);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.iter_a_first {
-            let a = self.a.next()?;
-            let b = self.b.next()?;
-            Some((a, b))
-        } else {
-            let b = self.b.next()?;
-            let a = self.a.next()?;
-            Some((a, b))
-        }
-    }
-}
-
-impl<A: ExactSizeIterator<Item = IA>, B: ExactSizeIterator<Item = IB>, IA, IB> ExactSizeIterator
-    for ZipWithProperAdvance<A, B, IA, IB>
-{
-    fn len(&self) -> usize {
-        self.a.len().min(self.b.len())
-    }
-}

From 06649a39f3fa736fbb9a39a3dd2348ec218056c3 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Wed, 24 Jul 2024 23:08:21 +0700
Subject: [PATCH 092/226] Fix a `clippy::doc_lazy_continuation` lint (#6036)

---
 wgpu-hal/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 51eec1a82b..6cbee172c8 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -713,7 +713,7 @@ pub trait Device: WasmNotSendSync {
     /// - Zero-sized mappings are not allowed.
     ///
     /// - The returned [`BufferMapping::ptr`] must not be used after a call to
-    /// [`Device::unmap_buffer`].
+    ///   [`Device::unmap_buffer`].
     ///
     /// [`MAP_READ`]: BufferUses::MAP_READ
     /// [`MAP_WRITE`]: BufferUses::MAP_WRITE

From 723995d9a98171da306f93669754c33419f19d52 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 24 Jul 2024 10:10:29 -0400
Subject: [PATCH 093/226] refactor: warn on and satisfy
 `clippy::{ptr_as_ptr,ref_as_ptr}` in `wgpu-{core,hal,types}`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…using `cargo +1.79.0 clippy --workspace --all-features --all-targets
--fix`, plus some manual changes to (1) catch some missed cases (not run
on all platforms?) and (2) `--fix` doesn't make things compile again. 😀
---
 wgpu-core/src/device/global.rs  |  2 +-
 wgpu-core/src/instance.rs       |  4 ++--
 wgpu-core/src/lib.rs            |  1 +
 wgpu-hal/src/auxil/renderdoc.rs |  2 +-
 wgpu-hal/src/dx12/adapter.rs    | 20 ++++++++++----------
 wgpu-hal/src/dx12/command.rs    |  6 +++---
 wgpu-hal/src/dx12/device.rs     |  6 +++---
 wgpu-hal/src/dx12/instance.rs   |  2 +-
 wgpu-hal/src/dx12/mod.rs        |  6 +++---
 wgpu-hal/src/gles/command.rs    |  5 ++---
 wgpu-hal/src/gles/egl.rs        | 15 +++++++++------
 wgpu-hal/src/gles/emscripten.rs |  2 +-
 wgpu-hal/src/gles/queue.rs      |  5 ++---
 wgpu-hal/src/gles/wgl.rs        | 10 +++++-----
 wgpu-hal/src/lib.rs             |  1 +
 wgpu-hal/src/metal/command.rs   | 20 ++++++++++----------
 wgpu-hal/src/metal/device.rs    |  2 +-
 wgpu-hal/src/vulkan/command.rs  |  2 +-
 wgpu-hal/src/vulkan/device.rs   |  4 ++--
 wgpu-hal/src/vulkan/instance.rs |  6 +++---
 wgpu-types/src/lib.rs           |  8 ++++----
 21 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 69a9ebf32c..96727b04f5 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1170,7 +1170,7 @@ impl Global {
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 let data = trace.make_binary("spv", unsafe {
-                    std::slice::from_raw_parts(source.as_ptr() as *const u8, source.len() * 4)
+                    std::slice::from_raw_parts(source.as_ptr().cast::<u8>(), source.len() * 4)
                 });
                 trace.add(trace::Action::CreateShaderModule {
                     id: fid.id(),
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index cd38942187..65bed375f1 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -642,7 +642,7 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::instance_create_surface_from_visual");
         self.instance_create_surface_dx12(id_in, |inst| unsafe {
-            inst.create_surface_from_visual(visual as _)
+            inst.create_surface_from_visual(visual.cast())
         })
     }
 
@@ -672,7 +672,7 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel");
         self.instance_create_surface_dx12(id_in, |inst| unsafe {
-            inst.create_surface_from_swap_chain_panel(swap_chain_panel as _)
+            inst.create_surface_from_swap_chain_panel(swap_chain_panel.cast())
         })
     }
 
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index b192bc8670..c46a8f103a 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -55,6 +55,7 @@ the documentation for `wgpu-core` is empty unless built with
     rustdoc::private_intra_doc_links
 )]
 #![warn(
+    clippy::ptr_as_ptr,
     trivial_casts,
     trivial_numeric_casts,
     unsafe_op_in_unsafe_fn,
diff --git a/wgpu-hal/src/auxil/renderdoc.rs b/wgpu-hal/src/auxil/renderdoc.rs
index 15b2c1039a..240d9dda29 100644
--- a/wgpu-hal/src/auxil/renderdoc.rs
+++ b/wgpu-hal/src/auxil/renderdoc.rs
@@ -83,7 +83,7 @@ impl RenderDoc {
         match unsafe { get_api(10401, &mut obj) } {
             1 => RenderDoc::Available {
                 api: RenderDocApi {
-                    api: unsafe { *(obj as *mut renderdoc_sys::RENDERDOC_API_1_4_1) },
+                    api: unsafe { *obj.cast::<renderdoc_sys::RENDERDOC_API_1_4_1>() },
                     lib: renderdoc_lib,
                 },
             },
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index c05d9a8b3f..cb2636611b 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -88,7 +88,7 @@ impl super::Adapter {
         unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_FEATURE_LEVELS,
-                &mut device_levels as *mut _ as *mut _,
+                ptr::from_mut(&mut device_levels).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FEATURE_LEVELS>() as _,
             )
         };
@@ -111,7 +111,7 @@ impl super::Adapter {
         assert_eq!(0, unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_ARCHITECTURE,
-                &mut features_architecture as *mut _ as *mut _,
+                ptr::from_mut(&mut features_architecture).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _,
             )
         });
@@ -156,7 +156,7 @@ impl super::Adapter {
         assert_eq!(0, unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS,
-                &mut options as *mut _ as *mut _,
+                ptr::from_mut(&mut options).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _,
             )
         });
@@ -167,7 +167,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2,
-                    &mut features2 as *mut _ as *mut _,
+                    ptr::from_mut(&mut features2).cast(),
                     mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _,
                 )
             };
@@ -180,7 +180,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     21, // D3D12_FEATURE_D3D12_OPTIONS3
-                    &mut features3 as *mut _ as *mut _,
+                    ptr::from_mut(&mut features3).cast(),
                     mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _,
                 )
             };
@@ -210,7 +210,7 @@ impl super::Adapter {
                     if 0 == unsafe {
                         device.CheckFeatureSupport(
                             7, // D3D12_FEATURE_SHADER_MODEL
-                            &mut sm as *mut _ as *mut _,
+                            ptr::from_mut(&mut sm).cast(),
                             mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL>()
                                 as _,
                         )
@@ -337,7 +337,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
-                    &mut bgra8unorm_info as *mut _ as *mut _,
+                    ptr::from_mut(&mut bgra8unorm_info).cast(),
                     mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _,
                 )
             };
@@ -353,7 +353,7 @@ impl super::Adapter {
         let hr = unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1,
-                &mut features1 as *mut _ as *mut _,
+                ptr::from_mut(&mut features1).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _,
             )
         };
@@ -378,7 +378,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     37, // D3D12_FEATURE_D3D12_OPTIONS9
-                    &mut features9 as *mut _ as *mut _,
+                    ptr::from_mut(&mut features9).cast(),
                     mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9>() as _,
                 )
             };
@@ -586,7 +586,7 @@ impl crate::Adapter for super::Adapter {
         assert_eq!(winerror::S_OK, unsafe {
             self.device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
-                &mut data as *mut _ as *mut _,
+                ptr::from_mut(&mut data).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _,
             )
         });
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index 3c535b2234..fbaa956dfb 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -69,7 +69,7 @@ impl super::CommandEncoder {
         self.pass.kind = kind;
         if let Some(label) = label {
             let (wide_label, size) = self.temp.prepare_marker(label);
-            unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) };
+            unsafe { list.BeginEvent(0, wide_label.as_ptr().cast(), size) };
             self.pass.has_label = true;
         }
         self.pass.dirty_root_elements = 0;
@@ -950,7 +950,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .SetMarker(0, wide_label.as_ptr() as *const _, size)
+                .SetMarker(0, wide_label.as_ptr().cast(), size)
         };
     }
     unsafe fn begin_debug_marker(&mut self, group_label: &str) {
@@ -959,7 +959,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .BeginEvent(0, wide_label.as_ptr() as *const _, size)
+                .BeginEvent(0, wide_label.as_ptr().cast(), size)
         };
     }
     unsafe fn end_debug_marker(&mut self) {
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index e886e2fd04..e08388b20b 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1386,7 +1386,7 @@ impl crate::Device for super::Device {
             };
             for attribute in vbuf.attributes {
                 input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC {
-                    SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _,
+                    SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr().cast(),
                     SemanticIndex: attribute.shader_location,
                     Format: auxil::dxgi::conv::map_vertex_format(attribute.format),
                     InputSlot: i as u32,
@@ -1749,7 +1749,7 @@ impl crate::Device for super::Device {
         {
             unsafe {
                 self.render_doc
-                    .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut())
+                    .start_frame_capture(self.raw.as_mut_ptr().cast(), ptr::null_mut())
             }
         }
         #[cfg(not(feature = "renderdoc"))]
@@ -1760,7 +1760,7 @@ impl crate::Device for super::Device {
         #[cfg(feature = "renderdoc")]
         unsafe {
             self.render_doc
-                .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut())
+                .end_frame_capture(self.raw.as_mut_ptr().cast(), ptr::null_mut())
         }
     }
 
diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs
index 4a4c6c6ff9..a629018404 100644
--- a/wgpu-hal/src/dx12/instance.rs
+++ b/wgpu-hal/src/dx12/instance.rs
@@ -81,7 +81,7 @@ impl crate::Instance for super::Instance {
             let hr = unsafe {
                 factory5.CheckFeatureSupport(
                     dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING,
-                    &mut allow_tearing as *mut _ as *mut _,
+                    std::ptr::from_mut(&mut allow_tearing).cast(),
                     mem::size_of::<minwindef::BOOL>() as _,
                 )
             };
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 0bb7adc75e..8d08b8f72d 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -720,7 +720,7 @@ impl crate::Surface for Surface {
                         self.factory
                             .unwrap_factory2()
                             .create_swapchain_for_composition(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                                device.present_queue.as_mut_ptr().cast(),
                                 &desc,
                             )
                             .into_result()
@@ -733,7 +733,7 @@ impl crate::Surface for Surface {
                             .clone()
                             .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))?
                             .create_swapchain_for_composition_surface_handle(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                                device.present_queue.as_mut_ptr().cast(),
                                 handle,
                                 &desc,
                             )
@@ -745,7 +745,7 @@ impl crate::Surface for Surface {
                             .as_factory2()
                             .unwrap()
                             .create_swapchain_for_hwnd(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                                device.present_queue.as_mut_ptr().cast(),
                                 hwnd,
                                 &desc,
                             )
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index 63a9b5496e..2fcbc7cffe 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -81,9 +81,8 @@ impl super::CommandBuffer {
     }
 
     fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> {
-        let data_raw = unsafe {
-            std::slice::from_raw_parts(data.as_ptr() as *const _, mem::size_of_val(data))
-        };
+        let data_raw =
+            unsafe { std::slice::from_raw_parts(data.as_ptr().cast(), mem::size_of_val(data)) };
         let start = self.data_bytes.len();
         assert!(start < u32::MAX as usize);
         self.data_bytes.extend_from_slice(data_raw);
diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs
index f35d697d5e..8cf69cc076 100644
--- a/wgpu-hal/src/gles/egl.rs
+++ b/wgpu-hal/src/gles/egl.rs
@@ -919,7 +919,10 @@ impl crate::Instance for Instance {
 
                 let ret = unsafe {
                     ndk_sys::ANativeWindow_setBuffersGeometry(
-                        handle.a_native_window.as_ptr() as *mut ndk_sys::ANativeWindow,
+                        handle
+                            .a_native_window
+                            .as_ptr()
+                            .cast::<ndk_sys::ANativeWindow>(),
                         0,
                         0,
                         format,
@@ -1229,12 +1232,12 @@ impl crate::Surface for Surface {
                 let native_window_ptr = match (self.wsi.kind, self.raw_window_handle) {
                     (WindowKind::Unknown | WindowKind::X11, Rwh::Xlib(handle)) => {
                         temp_xlib_handle = handle.window;
-                        &mut temp_xlib_handle as *mut _ as *mut ffi::c_void
+                        ptr::from_mut(&mut temp_xlib_handle).cast::<ffi::c_void>()
                     }
                     (WindowKind::AngleX11, Rwh::Xlib(handle)) => handle.window as *mut ffi::c_void,
                     (WindowKind::Unknown | WindowKind::X11, Rwh::Xcb(handle)) => {
                         temp_xcb_handle = handle.window;
-                        &mut temp_xcb_handle as *mut _ as *mut ffi::c_void
+                        ptr::from_mut(&mut temp_xcb_handle).cast::<ffi::c_void>()
                     }
                     (WindowKind::AngleX11, Rwh::Xcb(handle)) => {
                         handle.window.get() as *mut ffi::c_void
@@ -1248,7 +1251,7 @@ impl crate::Surface for Surface {
                             unsafe { library.get(b"wl_egl_window_create") }.unwrap();
                         let window =
                             unsafe { wl_egl_window_create(handle.surface.as_ptr(), 640, 480) }
-                                as *mut _;
+                                .cast();
                         wl_window = Some(window);
                         window
                     }
@@ -1265,8 +1268,8 @@ impl crate::Surface for Surface {
                             use objc::{msg_send, runtime::Object, sel, sel_impl};
                             // ns_view always have a layer and don't need to verify that it exists.
                             let layer: *mut Object =
-                                msg_send![handle.ns_view.as_ptr() as *mut Object, layer];
-                            layer as *mut ffi::c_void
+                                msg_send![handle.ns_view.as_ptr().cast::<Object>(), layer];
+                            layer.cast::<ffi::c_void>()
                         };
                         window_ptr
                     }
diff --git a/wgpu-hal/src/gles/emscripten.rs b/wgpu-hal/src/gles/emscripten.rs
index 8174614f02..8a341d54d4 100644
--- a/wgpu-hal/src/gles/emscripten.rs
+++ b/wgpu-hal/src/gles/emscripten.rs
@@ -20,7 +20,7 @@ pub unsafe fn enable_extension(extension_name_null_terminated: &str) -> bool {
     unsafe {
         emscripten_webgl_enable_extension(
             emscripten_webgl_get_current_context(),
-            extension_name_null_terminated.as_ptr() as _,
+            extension_name_null_terminated.as_ptr().cast(),
         ) == 1
     }
 }
diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs
index 95eff36d57..398e37ffe6 100644
--- a/wgpu-hal/src/gles/queue.rs
+++ b/wgpu-hal/src/gles/queue.rs
@@ -955,7 +955,7 @@ impl super::Queue {
                     }
                     let query_data = unsafe {
                         slice::from_raw_parts(
-                            temp_query_results.as_ptr() as *const u8,
+                            temp_query_results.as_ptr().cast::<u8>(),
                             temp_query_results.len() * mem::size_of::<u64>(),
                         )
                     };
@@ -1526,8 +1526,7 @@ impl super::Queue {
 
                     debug_assert_eq!(data_required, raw.len());
 
-                    let slice: &[T] =
-                        unsafe { slice::from_raw_parts(raw.as_ptr() as *const _, COUNT) };
+                    let slice: &[T] = unsafe { slice::from_raw_parts(raw.as_ptr().cast(), COUNT) };
 
                     slice.try_into().unwrap()
                 }
diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs
index c221b3e59d..1e92d488ae 100644
--- a/wgpu-hal/src/gles/wgl.rs
+++ b/wgpu-hal/src/gles/wgl.rs
@@ -59,7 +59,7 @@ impl AdapterContext {
     }
 
     pub fn raw_context(&self) -> *mut c_void {
-        self.inner.lock().context.context as *mut _
+        self.inner.lock().context.context.cast()
     }
 
     /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to
@@ -184,7 +184,7 @@ fn load_gl_func(name: &str, module: Option<HMODULE>) -> *const c_void {
 
 fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
     if extra.GetExtensionsStringARB.is_loaded() {
-        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc as *const _)) }
+        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc.cast())) }
             .to_str()
             .unwrap_or("")
     } else {
@@ -427,7 +427,7 @@ impl crate::Instance for Instance {
 
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         profiling::scope!("Init OpenGL (WGL) Backend");
-        let opengl_module = unsafe { LoadLibraryA("opengl32.dll\0".as_ptr() as *const _) };
+        let opengl_module = unsafe { LoadLibraryA("opengl32.dll\0".as_ptr().cast()) };
         if opengl_module.is_null() {
             return Err(crate::InstanceError::with_source(
                 String::from("unable to load the OpenGL library"),
@@ -472,7 +472,7 @@ impl crate::Instance for Instance {
                 0, // End of list
             ];
             let context = unsafe {
-                extra.CreateContextAttribsARB(dc as *const _, ptr::null(), attributes.as_ptr())
+                extra.CreateContextAttribsARB(dc.cast(), ptr::null(), attributes.as_ptr())
             };
             if context.is_null() {
                 return Err(crate::InstanceError::with_source(
@@ -481,7 +481,7 @@ impl crate::Instance for Instance {
                 ));
             }
             WglContext {
-                context: context as *mut _,
+                context: context.cast_mut().cast(),
             }
         } else {
             context
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 6cbee172c8..812bb7299c 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -226,6 +226,7 @@
     clippy::pattern_type_mismatch,
 )]
 #![warn(
+    clippy::ptr_as_ptr,
     trivial_casts,
     trivial_numeric_casts,
     unsafe_op_in_unsafe_fn,
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index fb9c7e9c0e..fafe3478fd 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -679,7 +679,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     encoder.set_vertex_bytes(
                         index as _,
                         (sizes.len() * WORD_SIZE) as u64,
-                        sizes.as_ptr() as _,
+                        sizes.as_ptr().cast(),
                     );
                 }
             }
@@ -713,7 +713,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     encoder.set_fragment_bytes(
                         index as _,
                         (sizes.len() * WORD_SIZE) as u64,
-                        sizes.as_ptr() as _,
+                        sizes.as_ptr().cast(),
                     );
                 }
             }
@@ -785,7 +785,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     encoder.set_bytes(
                         index as _,
                         (sizes.len() * WORD_SIZE) as u64,
-                        sizes.as_ptr() as _,
+                        sizes.as_ptr().cast(),
                     );
                 }
             }
@@ -827,21 +827,21 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.state.compute.as_ref().unwrap().set_bytes(
                 layout.push_constants_infos.cs.unwrap().buffer_index as _,
                 (layout.total_push_constants as usize * WORD_SIZE) as _,
-                state_pc.as_ptr() as _,
+                state_pc.as_ptr().cast(),
             )
         }
         if stages.contains(wgt::ShaderStages::VERTEX) {
             self.state.render.as_ref().unwrap().set_vertex_bytes(
                 layout.push_constants_infos.vs.unwrap().buffer_index as _,
                 (layout.total_push_constants as usize * WORD_SIZE) as _,
-                state_pc.as_ptr() as _,
+                state_pc.as_ptr().cast(),
             )
         }
         if stages.contains(wgt::ShaderStages::FRAGMENT) {
             self.state.render.as_ref().unwrap().set_fragment_bytes(
                 layout.push_constants_infos.fs.unwrap().buffer_index as _,
                 (layout.total_push_constants as usize * WORD_SIZE) as _,
-                state_pc.as_ptr() as _,
+                state_pc.as_ptr().cast(),
             )
         }
     }
@@ -895,7 +895,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 encoder.set_vertex_bytes(
                     index as _,
                     (sizes.len() * WORD_SIZE) as u64,
-                    sizes.as_ptr() as _,
+                    sizes.as_ptr().cast(),
                 );
             }
         }
@@ -907,7 +907,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 encoder.set_fragment_bytes(
                     index as _,
                     (sizes.len() * WORD_SIZE) as u64,
-                    sizes.as_ptr() as _,
+                    sizes.as_ptr().cast(),
                 );
             }
         }
@@ -956,7 +956,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             encoder.set_vertex_bytes(
                 index as _,
                 (sizes.len() * WORD_SIZE) as u64,
-                sizes.as_ptr() as _,
+                sizes.as_ptr().cast(),
             );
         }
     }
@@ -1212,7 +1212,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             encoder.set_bytes(
                 index as _,
                 (sizes.len() * WORD_SIZE) as u64,
-                sizes.as_ptr() as _,
+                sizes.as_ptr().cast(),
             );
         }
 
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index d9525999d8..e108d38202 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -362,7 +362,7 @@ impl crate::Device for super::Device {
         buffer: &super::Buffer,
         range: crate::MemoryRange,
     ) -> DeviceResult<crate::BufferMapping> {
-        let ptr = buffer.raw.contents() as *mut u8;
+        let ptr = buffer.raw.contents().cast::<u8>();
         assert!(!ptr.is_null());
         Ok(crate::BufferMapping {
             ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(),
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 5f3fdc5959..4f36e6f86c 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -833,7 +833,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 layout.raw,
                 conv::map_shader_stage(stages),
                 offset_bytes,
-                slice::from_raw_parts(data.as_ptr() as _, data.len() * 4),
+                slice::from_raw_parts(data.as_ptr().cast(), data.len() * 4),
             )
         };
     }
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 86bfa56442..00f6c7a41c 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -343,7 +343,7 @@ impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
             self.raw
                 .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty())
         } {
-            Ok(ptr) => Ok(ptr::NonNull::new(ptr as *mut u8)
+            Ok(ptr) => Ok(ptr::NonNull::new(ptr.cast::<u8>())
                 .expect("Pointer to memory mapping must not be null")),
             Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => {
                 Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory)
@@ -1513,7 +1513,7 @@ impl crate::Device for super::Device {
                     // SAFETY: similar to safety notes for `slice_get_ref`, but we have a
                     // mutable reference which is also guaranteed to be valid for writes.
                     unsafe {
-                        &mut *(to_init as *mut [MaybeUninit<T>] as *mut [T])
+                        &mut *(ptr::from_mut::<[MaybeUninit<T>]>(to_init) as *mut [T])
                     }
                 };
                 (Self { remainder }, init)
diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs
index f27cef55fa..b3ced3275e 100644
--- a/wgpu-hal/src/vulkan/instance.rs
+++ b/wgpu-hal/src/vulkan/instance.rs
@@ -23,7 +23,7 @@ unsafe extern "system" fn debug_utils_messenger_callback(
     }
 
     let cd = unsafe { &*callback_data_ptr };
-    let user_data = unsafe { &*(user_data as *mut super::DebugUtilsMessengerUserData) };
+    let user_data = unsafe { &*user_data.cast::<super::DebugUtilsMessengerUserData>() };
 
     const VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912: i32 = 0x56146426;
     if cd.message_id_number == VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912 {
@@ -515,7 +515,7 @@ impl super::Instance {
         }
 
         let layer = unsafe {
-            crate::metal::Surface::get_metal_layer(view as *mut objc::runtime::Object, None)
+            crate::metal::Surface::get_metal_layer(view.cast::<objc::runtime::Object>(), None)
         };
 
         let surface = {
@@ -523,7 +523,7 @@ impl super::Instance {
                 ext::metal_surface::Instance::new(&self.shared.entry, &self.shared.raw);
             let vk_info = vk::MetalSurfaceCreateInfoEXT::default()
                 .flags(vk::MetalSurfaceCreateFlagsEXT::empty())
-                .layer(layer as *mut _);
+                .layer(layer.cast());
 
             unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() }
         };
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 456551b1c0..abe66d4910 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -6,7 +6,7 @@
     // We don't use syntax sugar where it's not necessary.
     clippy::match_like_matches_macro,
 )]
-#![warn(missing_docs, unsafe_op_in_unsafe_fn)]
+#![warn(clippy::ptr_as_ptr, missing_docs, unsafe_op_in_unsafe_fn)]
 
 #[cfg(any(feature = "serde", test))]
 use serde::Deserialize;
@@ -7068,7 +7068,7 @@ impl DrawIndirectArgs {
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -7098,7 +7098,7 @@ impl DrawIndexedIndirectArgs {
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -7122,7 +7122,7 @@ impl DispatchIndirectArgs {
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }

From 65b6e15f0fdf46b109df66ec57b75aa6be9b067d Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 24 Jul 2024 10:34:25 -0400
Subject: [PATCH 094/226] chore: satisfy `clippy::unused_qualifications`

---
 wgpu-core/src/device/queue.rs | 2 +-
 wgpu-hal/src/dx12/device.rs   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index f5bc296534..625395fdc1 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -70,7 +70,7 @@ impl<A: HalApi> Drop for Queue<A> {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
         // SAFETY: we never access `self.raw` beyond this point.
-        let queue = unsafe { std::mem::ManuallyDrop::take(&mut self.raw) };
+        let queue = unsafe { ManuallyDrop::take(&mut self.raw) };
         self.device.release_queue(queue);
     }
 }
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index e08388b20b..fa3e828fba 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1816,7 +1816,7 @@ impl crate::Device for super::Device {
             .allocations
             .iter_mut()
             .map(|alloc| wgt::AllocationReport {
-                name: std::mem::take(&mut alloc.name),
+                name: mem::take(&mut alloc.name),
                 offset: alloc.offset,
                 size: alloc.size,
             })

From 54fb4ccf7d52921bfbcac3737d6f493a41aeefaa Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Thu, 4 Jul 2024 19:43:28 -0600
Subject: [PATCH 095/226] refactor(naga)!: remove `Function::locals`, migrate
 docs to `ExpressionContext::locals`

---
 naga/src/front/wgsl/parse/ast.rs | 27 ---------------------------
 naga/src/front/wgsl/parse/mod.rs | 26 +++++++++++++++++++++++---
 2 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/naga/src/front/wgsl/parse/ast.rs b/naga/src/front/wgsl/parse/ast.rs
index ea8013ee7c..7df5c8a1c9 100644
--- a/naga/src/front/wgsl/parse/ast.rs
+++ b/naga/src/front/wgsl/parse/ast.rs
@@ -117,33 +117,6 @@ pub struct Function<'a> {
     pub name: Ident<'a>,
     pub arguments: Vec<FunctionArgument<'a>>,
     pub result: Option<FunctionResult<'a>>,
-
-    /// Local variable and function argument arena.
-    ///
-    /// Note that the `Local` here is actually a zero-sized type. The AST keeps
-    /// all the detailed information about locals - names, types, etc. - in
-    /// [`LocalDecl`] statements. For arguments, that information is kept in
-    /// [`arguments`]. This `Arena`'s only role is to assign a unique `Handle`
-    /// to each of them, and track their definitions' spans for use in
-    /// diagnostics.
-    ///
-    /// In the AST, when an [`Ident`] expression refers to a local variable or
-    /// argument, its [`IdentExpr`] holds the referent's `Handle<Local>` in this
-    /// arena.
-    ///
-    /// During lowering, [`LocalDecl`] statements add entries to a per-function
-    /// table that maps `Handle<Local>` values to their Naga representations,
-    /// accessed via [`StatementContext::local_table`] and
-    /// [`RuntimeExpressionContext::local_table`]. This table is then consulted when
-    /// lowering subsequent [`Ident`] expressions.
-    ///
-    /// [`LocalDecl`]: StatementKind::LocalDecl
-    /// [`arguments`]: Function::arguments
-    /// [`Ident`]: Expression::Ident
-    /// [`StatementContext::local_table`]: StatementContext::local_table
-    /// [`RuntimeExpressionContext::local_table`]: RuntimeExpressionContext::local_table
-    pub locals: Arena<Local>,
-
     pub body: Block<'a>,
 }
 
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index ee3a1846b9..c9114d685d 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -37,9 +37,30 @@ struct ExpressionContext<'input, 'temp, 'out> {
     /// [`Function::locals`]: ast::Function::locals
     local_table: &'temp mut SymbolTable<&'input str, Handle<ast::Local>>,
 
-    /// The [`Function::locals`] arena for the function we're building.
+    /// Local variable and function argument arena for the function we're building.
     ///
-    /// [`Function::locals`]: ast::Function::locals
+    /// Note that the `Local` here is actually a zero-sized type. The AST keeps
+    /// all the detailed information about locals - names, types, etc. - in
+    /// [`LocalDecl`] statements. For arguments, that information is kept in
+    /// [`arguments`]. This `Arena`'s only role is to assign a unique `Handle`
+    /// to each of them, and track their definitions' spans for use in
+    /// diagnostics.
+    ///
+    /// In the AST, when an [`Ident`] expression refers to a local variable or
+    /// argument, its [`IdentExpr`] holds the referent's `Handle<Local>` in this
+    /// arena.
+    ///
+    /// During lowering, [`LocalDecl`] statements add entries to a per-function
+    /// table that maps `Handle<Local>` values to their Naga representations,
+    /// accessed via [`StatementContext::local_table`] and
+    /// [`RuntimeExpressionContext::local_table`]. This table is then consulted when
+    /// lowering subsequent [`Ident`] expressions.
+    ///
+    /// [`LocalDecl`]: StatementKind::LocalDecl
+    /// [`arguments`]: Function::arguments
+    /// [`Ident`]: Expression::Ident
+    /// [`StatementContext::local_table`]: StatementContext::local_table
+    /// [`RuntimeExpressionContext::local_table`]: RuntimeExpressionContext::local_table
     locals: &'out mut Arena<ast::Local>,
 
     /// Identifiers used by the current global declaration that have no local definition.
@@ -2158,7 +2179,6 @@ impl Parser {
             arguments,
             result,
             body,
-            locals,
         };
 
         // done

From 6b3e039250a98f6da022f1f82fca2894889aa629 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Thu, 4 Jul 2024 19:55:11 -0600
Subject: [PATCH 096/226] refactor(naga)!: remove `ExpressionTracer::overrides`

---
 naga/src/compact/expressions.rs | 1 -
 naga/src/compact/functions.rs   | 1 -
 naga/src/compact/mod.rs         | 1 -
 3 files changed, 3 deletions(-)

diff --git a/naga/src/compact/expressions.rs b/naga/src/compact/expressions.rs
index 8072d46d33..0677ab694a 100644
--- a/naga/src/compact/expressions.rs
+++ b/naga/src/compact/expressions.rs
@@ -3,7 +3,6 @@ use crate::arena::{Arena, Handle};
 
 pub struct ExpressionTracer<'tracer> {
     pub constants: &'tracer Arena<crate::Constant>,
-    pub overrides: &'tracer Arena<crate::Override>,
 
     /// The arena in which we are currently tracing expressions.
     pub expressions: &'tracer Arena<crate::Expression>,
diff --git a/naga/src/compact/functions.rs b/naga/src/compact/functions.rs
index 372d472da3..69387ad01d 100644
--- a/naga/src/compact/functions.rs
+++ b/naga/src/compact/functions.rs
@@ -48,7 +48,6 @@ impl<'a> FunctionTracer<'a> {
     fn as_expression(&mut self) -> super::expressions::ExpressionTracer {
         super::expressions::ExpressionTracer {
             constants: self.constants,
-            overrides: self.overrides,
             expressions: &self.function.expressions,
 
             types_used: self.types_used,
diff --git a/naga/src/compact/mod.rs b/naga/src/compact/mod.rs
index c40a1880e1..6c9ac8b6e6 100644
--- a/naga/src/compact/mod.rs
+++ b/naga/src/compact/mod.rs
@@ -253,7 +253,6 @@ impl<'module> ModuleTracer<'module> {
         expressions::ExpressionTracer {
             expressions: &self.module.global_expressions,
             constants: &self.module.constants,
-            overrides: &self.module.overrides,
             types_used: &mut self.types_used,
             constants_used: &mut self.constants_used,
             expressions_used: &mut self.global_expressions_used,

From 591e1d2a08f4bf039d49d79a97782dcff47fb1ca Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Thu, 4 Jul 2024 19:57:45 -0600
Subject: [PATCH 097/226] refactor(naga)!: remove `FunctionTracer::overrides`

---
 naga/src/compact/functions.rs | 1 -
 naga/src/compact/mod.rs       | 1 -
 2 files changed, 2 deletions(-)

diff --git a/naga/src/compact/functions.rs b/naga/src/compact/functions.rs
index 69387ad01d..bc13e4b229 100644
--- a/naga/src/compact/functions.rs
+++ b/naga/src/compact/functions.rs
@@ -4,7 +4,6 @@ use super::{FunctionMap, ModuleMap};
 pub struct FunctionTracer<'a> {
     pub function: &'a crate::Function,
     pub constants: &'a crate::Arena<crate::Constant>,
-    pub overrides: &'a crate::Arena<crate::Override>,
 
     pub types_used: &'a mut HandleSet<crate::Type>,
     pub constants_used: &'a mut HandleSet<crate::Constant>,
diff --git a/naga/src/compact/mod.rs b/naga/src/compact/mod.rs
index 6c9ac8b6e6..a9fc7bc945 100644
--- a/naga/src/compact/mod.rs
+++ b/naga/src/compact/mod.rs
@@ -267,7 +267,6 @@ impl<'module> ModuleTracer<'module> {
         FunctionTracer {
             function,
             constants: &self.module.constants,
-            overrides: &self.module.overrides,
             types_used: &mut self.types_used,
             constants_used: &mut self.constants_used,
             global_expressions_used: &mut self.global_expressions_used,

From 2611d18b154d0aad055ea72802e678d956efaea7 Mon Sep 17 00:00:00 2001
From: Marijn Suijten <marijns95@gmail.com>
Date: Sun, 21 Jul 2024 21:35:50 +0200
Subject: [PATCH 098/226] gles/wgl: Migrate from ancient/unmaintained `winapi`
 to `windows-rs`

---
 CHANGELOG.md             |  10 +-
 Cargo.lock               | 129 ++++++++++----
 Cargo.toml               |   3 +
 wgpu-hal/Cargo.toml      |   7 +-
 wgpu-hal/src/gles/wgl.rs | 353 ++++++++++++++++++---------------------
 5 files changed, 280 insertions(+), 222 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d619a2e475..6f502ebc9e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 Please add your PR to the changelog! Choose from a top level and bottom
 level category, then write your changes like follows:
 
-- Describe your change in a user friendly format by @yourslug in [#99999](https://github.com/gfx-rs/wgpu/pull/99999)
+- Describe your change in a user friendly format. By @yourslug in [#99999](https://github.com/gfx-rs/wgpu/pull/99999)
 
 You can add additional user facing information if it's a major breaking change. You can use the following to help:
 
@@ -55,6 +55,12 @@ Bottom level categories:
 - Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
 - Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
 
+### Dependency Updates
+
+#### GLES
+
+- Replace `winapi` code in WGL wrapper to use the `windows` crate. By @MarijnS95 in [#6006](https://github.com/gfx-rs/wgpu/pull/6006)
+
 ## 22.0.0 (2024-07-17)
 
 ### Overview
@@ -64,7 +70,7 @@ Bottom level categories:
 For the first time ever, WGPU is being released with a major version (i.e., 22.* instead of 0.22.*)! Maintainership has decided to fully adhere to [Semantic Versioning](https://semver.org/)'s recommendations for versioning production software. According to [SemVer 2.0.0's Q&A about when to use 1.0.0 versions (and beyond)](https://semver.org/spec/v2.0.0.html#how-do-i-know-when-to-release-100):
 
 > ### How do I know when to release 1.0.0?
-> 
+>
 > If your software is being used in production, it should probably already be 1.0.0. If you have a stable API on which users have come to depend, you should be 1.0.0. If you’re worrying a lot about backward compatibility, you should probably already be 1.0.0.
 
 It is a well-known fact that WGPU has been used for applications and platforms already in production for years, at this point. We are often concerned with tracking breaking changes, and affecting these consumers' ability to ship. By releasing our first major version, we publicly acknowledge that this is the case. We encourage other projects in the Rust ecosystem to follow suit.
diff --git a/Cargo.lock b/Cargo.lock
index 8b4e604f4e..1603038a2c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1522,7 +1522,7 @@ dependencies = [
  "libc",
  "log",
  "rustversion",
- "windows",
+ "windows 0.54.0",
 ]
 
 [[package]]
@@ -1686,7 +1686,7 @@ dependencies = [
  "presser",
  "thiserror",
  "winapi",
- "windows",
+ "windows 0.58.0",
 ]
 
 [[package]]
@@ -2608,7 +2608,7 @@ dependencies = [
  "redox_syscall 0.5.1",
  "smallvec",
  "thread-id",
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4346,6 +4346,7 @@ dependencies = [
  "web-sys",
  "wgpu-types",
  "winapi",
+ "windows 0.58.0",
  "winit 0.29.15",
 ]
 
@@ -4473,8 +4474,18 @@ version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
 dependencies = [
- "windows-core",
- "windows-targets 0.52.5",
+ "windows-core 0.54.0",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
+dependencies = [
+ "windows-core 0.58.0",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4483,8 +4494,43 @@ version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
 dependencies = [
- "windows-result",
- "windows-targets 0.52.5",
+ "windows-result 0.1.2",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-result 0.2.0",
+ "windows-strings",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.72",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -4493,7 +4539,26 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
 dependencies = [
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result 0.2.0",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4533,7 +4598,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4568,18 +4633,18 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.5",
- "windows_aarch64_msvc 0.52.5",
- "windows_i686_gnu 0.52.5",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
  "windows_i686_gnullvm",
- "windows_i686_msvc 0.52.5",
- "windows_x86_64_gnu 0.52.5",
- "windows_x86_64_gnullvm 0.52.5",
- "windows_x86_64_msvc 0.52.5",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
 ]
 
 [[package]]
@@ -4596,9 +4661,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -4620,9 +4685,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -4644,9 +4709,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
 [[package]]
 name = "windows_i686_gnullvm"
@@ -4674,9 +4739,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -4698,9 +4763,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -4716,9 +4781,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -4740,9 +4805,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
 name = "winit"
diff --git a/Cargo.toml b/Cargo.toml
index 51fe42197e..76b3de9dc2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -160,6 +160,9 @@ khronos-egl = "6"
 glow = "0.14.0"
 glutin = "0.29.1"
 
+# DX and GLES dependencies
+windows = { version = "0.58", default-features = false }
+
 # wasm32 dependencies
 console_error_panic_hook = "0.1.7"
 console_log = "1"
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index a54332fef6..19effa8837 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -61,7 +61,10 @@ gles = [
     "dep:khronos-egl",
     "dep:libloading",
     "dep:ndk-sys",
-    "winapi/libloaderapi",
+    "windows/Win32_Graphics_OpenGL",
+    "windows/Win32_Graphics_Gdi",
+    "windows/Win32_System_LibraryLoader",
+    "windows/Win32_UI_WindowsAndMessaging",
 ]
 ## Enables the DX12 backend when targeting Windows.
 ##
@@ -144,6 +147,8 @@ khronos-egl = { version = "6", features = ["static", "no-pkg-config"] }
 libloading = { version = ">=0.7, <0.9", optional = true }
 
 [target.'cfg(windows)'.dependencies]
+# backend: Dx12 and Gles
+windows = { workspace = true, optional = true }
 # backend: Dx12
 bit-set = { version = "0.8", optional = true }
 range-alloc = { version = "0.1", optional = true }
diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs
index 1e92d488ae..64ed063254 100644
--- a/wgpu-hal/src/gles/wgl.rs
+++ b/wgpu-hal/src/gles/wgl.rs
@@ -9,7 +9,6 @@ use raw_window_handle::{RawDisplayHandle, RawWindowHandle};
 use std::{
     collections::HashSet,
     ffi::{c_void, CStr, CString},
-    io::Error,
     mem,
     os::raw::c_int,
     ptr,
@@ -21,23 +20,13 @@ use std::{
     time::Duration,
 };
 use wgt::InstanceFlags;
-use winapi::{
-    shared::{
-        minwindef::{FALSE, HMODULE, LPARAM, LRESULT, UINT, WPARAM},
-        windef::{HDC, HGLRC, HWND},
-    },
-    um::{
-        libloaderapi::{GetModuleHandleA, GetProcAddress, LoadLibraryA},
-        wingdi::{
-            wglCreateContext, wglDeleteContext, wglGetCurrentContext, wglGetProcAddress,
-            wglMakeCurrent, ChoosePixelFormat, DescribePixelFormat, GetPixelFormat, SetPixelFormat,
-            SwapBuffers, PFD_DOUBLEBUFFER, PFD_DRAW_TO_WINDOW, PFD_SUPPORT_OPENGL, PFD_TYPE_RGBA,
-            PIXELFORMATDESCRIPTOR,
-        },
-        winuser::{
-            CreateWindowExA, DefWindowProcA, DestroyWindow, GetDC, RegisterClassExA, ReleaseDC,
-            CS_OWNDC, WNDCLASSEXA,
-        },
+use windows::{
+    core::{Error, PCSTR},
+    Win32::{
+        Foundation,
+        Graphics::{Gdi, OpenGL},
+        System::LibraryLoader,
+        UI::WindowsAndMessaging,
     },
 };
 
@@ -59,7 +48,7 @@ impl AdapterContext {
     }
 
     pub fn raw_context(&self) -> *mut c_void {
-        self.inner.lock().context.context.cast()
+        self.inner.lock().context.context.0
     }
 
     /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to
@@ -84,7 +73,7 @@ impl AdapterContext {
     /// Unlike [`lock`](Self::lock), this accepts a device to pass to `make_current` and exposes the error
     /// when `make_current` fails.
     #[track_caller]
-    fn lock_with_dc(&self, device: HDC) -> Result<AdapterContextLock<'_>, Error> {
+    fn lock_with_dc(&self, device: Gdi::HDC) -> windows::core::Result<AdapterContextLock<'_>> {
         let inner = self
             .inner
             .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS))
@@ -117,37 +106,27 @@ impl<'a> Drop for AdapterContextLock<'a> {
 }
 
 struct WglContext {
-    context: HGLRC,
+    context: OpenGL::HGLRC,
 }
 
 impl WglContext {
-    fn make_current(&self, device: HDC) -> Result<(), Error> {
-        if unsafe { wglMakeCurrent(device, self.context) } == FALSE {
-            Err(Error::last_os_error())
-        } else {
-            Ok(())
-        }
+    fn make_current(&self, device: Gdi::HDC) -> windows::core::Result<()> {
+        unsafe { OpenGL::wglMakeCurrent(device, self.context) }
     }
 
-    fn unmake_current(&self) -> Result<(), Error> {
-        if unsafe { wglGetCurrentContext().is_null() } {
+    fn unmake_current(&self) -> windows::core::Result<()> {
+        if unsafe { OpenGL::wglGetCurrentContext() }.is_invalid() {
             return Ok(());
         }
-        if unsafe { wglMakeCurrent(ptr::null_mut(), ptr::null_mut()) } == FALSE {
-            Err(Error::last_os_error())
-        } else {
-            Ok(())
-        }
+        unsafe { OpenGL::wglMakeCurrent(None, None) }
     }
 }
 
 impl Drop for WglContext {
     fn drop(&mut self) {
-        unsafe {
-            if wglDeleteContext(self.context) == FALSE {
-                log::error!("failed to delete WGL context {}", Error::last_os_error());
-            }
-        };
+        if let Err(e) = unsafe { OpenGL::wglDeleteContext(self.context) } {
+            log::error!("failed to delete WGL context: {e}");
+        }
     }
 }
 
@@ -171,20 +150,20 @@ pub struct Instance {
 unsafe impl Send for Instance {}
 unsafe impl Sync for Instance {}
 
-fn load_gl_func(name: &str, module: Option<HMODULE>) -> *const c_void {
+fn load_gl_func(name: &str, module: Option<Foundation::HMODULE>) -> *const c_void {
     let addr = CString::new(name.as_bytes()).unwrap();
-    let mut ptr = unsafe { wglGetProcAddress(addr.as_ptr()) };
-    if ptr.is_null() {
+    let mut ptr = unsafe { OpenGL::wglGetProcAddress(PCSTR(addr.as_ptr().cast())) };
+    if ptr.is_none() {
         if let Some(module) = module {
-            ptr = unsafe { GetProcAddress(module, addr.as_ptr()) };
+            ptr = unsafe { LibraryLoader::GetProcAddress(module, PCSTR(addr.as_ptr().cast())) };
         }
     }
-    ptr.cast()
+    ptr.map_or_else(ptr::null_mut, |p| p as *mut c_void)
 }
 
-fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
+fn get_extensions(extra: &Wgl, dc: Gdi::HDC) -> HashSet<String> {
     if extra.GetExtensionsStringARB.is_loaded() {
-        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc.cast())) }
+        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc.0)) }
             .to_str()
             .unwrap_or("")
     } else {
@@ -195,63 +174,75 @@ fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
     .collect()
 }
 
-unsafe fn setup_pixel_format(dc: HDC) -> Result<(), crate::InstanceError> {
-    let mut format: PIXELFORMATDESCRIPTOR = unsafe { mem::zeroed() };
-    format.nVersion = 1;
-    format.nSize = mem::size_of_val(&format) as u16;
-    format.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
-    format.iPixelType = PFD_TYPE_RGBA;
-    format.cColorBits = 8;
+unsafe fn setup_pixel_format(dc: Gdi::HDC) -> Result<(), crate::InstanceError> {
+    {
+        let format = OpenGL::PIXELFORMATDESCRIPTOR {
+            nVersion: 1,
+            nSize: mem::size_of::<OpenGL::PIXELFORMATDESCRIPTOR>() as u16,
+            dwFlags: OpenGL::PFD_DRAW_TO_WINDOW
+                | OpenGL::PFD_SUPPORT_OPENGL
+                | OpenGL::PFD_DOUBLEBUFFER,
+            iPixelType: OpenGL::PFD_TYPE_RGBA,
+            cColorBits: 8,
+            ..unsafe { mem::zeroed() }
+        };
 
-    let index = unsafe { ChoosePixelFormat(dc, &format) };
-    if index == 0 {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to choose pixel format"),
-            Error::last_os_error(),
-        ));
-    }
+        let index = unsafe { OpenGL::ChoosePixelFormat(dc, &format) };
+        if index == 0 {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to choose pixel format"),
+                Error::from_win32(),
+            ));
+        }
 
-    let current = unsafe { GetPixelFormat(dc) };
+        let current = unsafe { OpenGL::GetPixelFormat(dc) };
 
-    if index != current && unsafe { SetPixelFormat(dc, index, &format) } == FALSE {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to set pixel format"),
-            Error::last_os_error(),
-        ));
+        if index != current {
+            unsafe { OpenGL::SetPixelFormat(dc, index, &format) }.map_err(|e| {
+                crate::InstanceError::with_source(String::from("unable to set pixel format"), e)
+            })?;
+        }
     }
 
-    let index = unsafe { GetPixelFormat(dc) };
-    if index == 0 {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to get pixel format index"),
-            Error::last_os_error(),
-        ));
-    }
-    if unsafe { DescribePixelFormat(dc, index, mem::size_of_val(&format) as UINT, &mut format) }
-        == 0
     {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to read pixel format"),
-            Error::last_os_error(),
-        ));
-    }
+        let index = unsafe { OpenGL::GetPixelFormat(dc) };
+        if index == 0 {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to get pixel format index"),
+                Error::from_win32(),
+            ));
+        }
+        let mut format = Default::default();
+        if unsafe {
+            OpenGL::DescribePixelFormat(
+                dc,
+                index,
+                mem::size_of_val(&format) as u32,
+                Some(&mut format),
+            )
+        } == 0
+        {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to read pixel format"),
+                Error::from_win32(),
+            ));
+        }
 
-    if format.dwFlags & PFD_SUPPORT_OPENGL == 0 || format.iPixelType != PFD_TYPE_RGBA {
-        return Err(crate::InstanceError::new(String::from(
-            "unsuitable pixel format",
-        )));
+        if !format.dwFlags.contains(OpenGL::PFD_SUPPORT_OPENGL)
+            || format.iPixelType != OpenGL::PFD_TYPE_RGBA
+        {
+            return Err(crate::InstanceError::new(String::from(
+                "unsuitable pixel format",
+            )));
+        }
     }
     Ok(())
 }
 
 fn create_global_window_class() -> Result<CString, crate::InstanceError> {
-    let instance = unsafe { GetModuleHandleA(ptr::null()) };
-    if instance.is_null() {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to get executable instance"),
-            Error::last_os_error(),
-        ));
-    }
+    let instance = unsafe { LibraryLoader::GetModuleHandleA(None) }.map_err(|e| {
+        crate::InstanceError::with_source(String::from("unable to get executable instance"), e)
+    })?;
 
     // Use the address of `UNIQUE` as part of the window class name to ensure different
     // `wgpu` versions use different names.
@@ -262,35 +253,35 @@ fn create_global_window_class() -> Result<CString, crate::InstanceError> {
 
     // Use a wrapper function for compatibility with `windows-rs`.
     unsafe extern "system" fn wnd_proc(
-        window: HWND,
-        msg: UINT,
-        wparam: WPARAM,
-        lparam: LPARAM,
-    ) -> LRESULT {
-        unsafe { DefWindowProcA(window, msg, wparam, lparam) }
+        window: Foundation::HWND,
+        msg: u32,
+        wparam: Foundation::WPARAM,
+        lparam: Foundation::LPARAM,
+    ) -> Foundation::LRESULT {
+        unsafe { WindowsAndMessaging::DefWindowProcA(window, msg, wparam, lparam) }
     }
 
-    let window_class = WNDCLASSEXA {
-        cbSize: mem::size_of::<WNDCLASSEXA>() as u32,
-        style: CS_OWNDC,
+    let window_class = WindowsAndMessaging::WNDCLASSEXA {
+        cbSize: mem::size_of::<WindowsAndMessaging::WNDCLASSEXA>() as u32,
+        style: WindowsAndMessaging::CS_OWNDC,
         lpfnWndProc: Some(wnd_proc),
         cbClsExtra: 0,
         cbWndExtra: 0,
-        hInstance: instance,
-        hIcon: ptr::null_mut(),
-        hCursor: ptr::null_mut(),
-        hbrBackground: ptr::null_mut(),
-        lpszMenuName: ptr::null_mut(),
-        lpszClassName: name.as_ptr(),
-        hIconSm: ptr::null_mut(),
+        hInstance: instance.into(),
+        hIcon: WindowsAndMessaging::HICON::default(),
+        hCursor: WindowsAndMessaging::HCURSOR::default(),
+        hbrBackground: Gdi::HBRUSH::default(),
+        lpszMenuName: PCSTR::null(),
+        lpszClassName: PCSTR(name.as_ptr().cast()),
+        hIconSm: WindowsAndMessaging::HICON::default(),
     };
 
-    let atom = unsafe { RegisterClassExA(&window_class) };
+    let atom = unsafe { WindowsAndMessaging::RegisterClassExA(&window_class) };
 
     if atom == 0 {
         return Err(crate::InstanceError::with_source(
             String::from("unable to register window class"),
-            Error::last_os_error(),
+            Error::from_win32(),
         ));
     }
 
@@ -306,7 +297,7 @@ fn get_global_window_class() -> Result<CString, crate::InstanceError> {
 }
 
 struct InstanceDevice {
-    dc: HDC,
+    dc: Gdi::HDC,
 
     /// This is used to keep the thread owning `dc` alive until this struct is dropped.
     _tx: SyncSender<()>,
@@ -314,31 +305,19 @@ struct InstanceDevice {
 
 fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> {
     #[derive(Clone, Copy)]
-    struct SendDc(HDC);
+    // TODO: We can get these SendSync definitions in the upstream metadata if this is the case
+    struct SendDc(Gdi::HDC);
     unsafe impl Sync for SendDc {}
     unsafe impl Send for SendDc {}
 
     struct Window {
-        window: HWND,
+        window: Foundation::HWND,
     }
     impl Drop for Window {
         fn drop(&mut self) {
-            unsafe {
-                if DestroyWindow(self.window) == FALSE {
-                    log::error!("failed to destroy window {}", Error::last_os_error());
-                }
-            };
-        }
-    }
-    struct DeviceContextHandle {
-        dc: HDC,
-        window: HWND,
-    }
-    impl Drop for DeviceContextHandle {
-        fn drop(&mut self) {
-            unsafe {
-                ReleaseDC(self.window, self.dc);
-            };
+            if let Err(e) = unsafe { WindowsAndMessaging::DestroyWindow(self.window) } {
+                log::error!("failed to destroy window: {e}");
+            }
         }
     }
 
@@ -353,58 +332,57 @@ fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> {
         .name("wgpu-hal WGL Instance Thread".to_owned())
         .spawn(move || {
             let setup = (|| {
-                let instance = unsafe { GetModuleHandleA(ptr::null()) };
-                if instance.is_null() {
-                    return Err(crate::InstanceError::with_source(
+                let instance = unsafe { LibraryLoader::GetModuleHandleA(None) }.map_err(|e| {
+                    crate::InstanceError::with_source(
                         String::from("unable to get executable instance"),
-                        Error::last_os_error(),
-                    ));
-                }
+                        e,
+                    )
+                })?;
 
                 // Create a hidden window since we don't pass `WS_VISIBLE`.
                 let window = unsafe {
-                    CreateWindowExA(
-                        0,
-                        window_class.as_ptr(),
-                        window_class.as_ptr(),
-                        0,
+                    WindowsAndMessaging::CreateWindowExA(
+                        WindowsAndMessaging::WINDOW_EX_STYLE::default(),
+                        PCSTR(window_class.as_ptr().cast()),
+                        PCSTR(window_class.as_ptr().cast()),
+                        WindowsAndMessaging::WINDOW_STYLE::default(),
                         0,
                         0,
                         1,
                         1,
-                        ptr::null_mut(),
-                        ptr::null_mut(),
+                        None,
+                        None,
                         instance,
-                        ptr::null_mut(),
+                        None,
                     )
-                };
-                if window.is_null() {
-                    return Err(crate::InstanceError::with_source(
-                        String::from("unable to create hidden instance window"),
-                        Error::last_os_error(),
-                    ));
                 }
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create hidden instance window"),
+                        e,
+                    )
+                })?;
                 let window = Window { window };
 
-                let dc = unsafe { GetDC(window.window) };
-                if dc.is_null() {
+                let dc = unsafe { Gdi::GetDC(window.window) };
+                if dc.is_invalid() {
                     return Err(crate::InstanceError::with_source(
                         String::from("unable to create memory device"),
-                        Error::last_os_error(),
+                        Error::from_win32(),
                     ));
                 }
                 let dc = DeviceContextHandle {
-                    dc,
+                    device: dc,
                     window: window.window,
                 };
-                unsafe { setup_pixel_format(dc.dc)? };
+                unsafe { setup_pixel_format(dc.device)? };
 
                 Ok((window, dc))
             })();
 
             match setup {
                 Ok((_window, dc)) => {
-                    setup_tx.send(Ok(SendDc(dc.dc))).unwrap();
+                    setup_tx.send(Ok(SendDc(dc.device))).unwrap();
                     // Wait for the shutdown event to free the window and device context handle.
                     drop_rx.recv().ok();
                 }
@@ -427,24 +405,25 @@ impl crate::Instance for Instance {
 
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         profiling::scope!("Init OpenGL (WGL) Backend");
-        let opengl_module = unsafe { LoadLibraryA("opengl32.dll\0".as_ptr().cast()) };
-        if opengl_module.is_null() {
-            return Err(crate::InstanceError::with_source(
-                String::from("unable to load the OpenGL library"),
-                Error::last_os_error(),
-            ));
-        }
+        let opengl_module =
+            unsafe { LibraryLoader::LoadLibraryA(PCSTR("opengl32.dll\0".as_ptr())) }.map_err(
+                |e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to load the OpenGL library"),
+                        e,
+                    )
+                },
+            )?;
 
         let device = create_instance_device()?;
         let dc = device.dc;
 
-        let context = unsafe { wglCreateContext(dc) };
-        if context.is_null() {
-            return Err(crate::InstanceError::with_source(
+        let context = unsafe { OpenGL::wglCreateContext(dc) }.map_err(|e| {
+            crate::InstanceError::with_source(
                 String::from("unable to create initial OpenGL context"),
-                Error::last_os_error(),
-            ));
-        }
+                e,
+            )
+        })?;
         let context = WglContext { context };
         context.make_current(dc).map_err(|e| {
             crate::InstanceError::with_source(
@@ -471,17 +450,16 @@ impl crate::Instance for Instance {
                 },
                 0, // End of list
             ];
-            let context = unsafe {
-                extra.CreateContextAttribsARB(dc.cast(), ptr::null(), attributes.as_ptr())
-            };
+            let context =
+                unsafe { extra.CreateContextAttribsARB(dc.0, ptr::null(), attributes.as_ptr()) };
             if context.is_null() {
                 return Err(crate::InstanceError::with_source(
                     String::from("unable to create OpenGL context"),
-                    Error::last_os_error(),
+                    Error::from_win32(),
                 ));
             }
             WglContext {
-                context: context.cast_mut().cast(),
+                context: OpenGL::HGLRC(context.cast_mut()),
             }
         } else {
             context
@@ -550,7 +528,8 @@ impl crate::Instance for Instance {
             )));
         };
         Ok(Surface {
-            window: window.hwnd.get() as *mut _,
+            // This cast exists because of https://github.com/rust-windowing/raw-window-handle/issues/171
+            window: Foundation::HWND(window.hwnd.get() as *mut _),
             presentable: true,
             swapchain: RwLock::new(None),
             srgb_capable: self.srgb_capable,
@@ -573,14 +552,14 @@ impl crate::Instance for Instance {
 }
 
 struct DeviceContextHandle {
-    device: HDC,
-    window: HWND,
+    device: Gdi::HDC,
+    window: Foundation::HWND,
 }
 
 impl Drop for DeviceContextHandle {
     fn drop(&mut self) {
         unsafe {
-            ReleaseDC(self.window, self.device);
+            Gdi::ReleaseDC(self.window, self.device);
         };
     }
 }
@@ -599,7 +578,7 @@ pub struct Swapchain {
 }
 
 pub struct Surface {
-    window: HWND,
+    window: Foundation::HWND,
     pub(super) presentable: bool,
     swapchain: RwLock<Option<Swapchain>>,
     srgb_capable: bool,
@@ -616,11 +595,11 @@ impl Surface {
     ) -> Result<(), crate::SurfaceError> {
         let swapchain = self.swapchain.read();
         let sc = swapchain.as_ref().unwrap();
-        let dc = unsafe { GetDC(self.window) };
-        if dc.is_null() {
+        let dc = unsafe { Gdi::GetDC(self.window) };
+        if dc.is_invalid() {
             log::error!(
                 "unable to get the device context from window: {}",
-                Error::last_os_error()
+                Error::from_win32()
             );
             return Err(crate::SurfaceError::Other(
                 "unable to get the device context from window",
@@ -670,8 +649,8 @@ impl Surface {
         unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) };
         unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) };
 
-        if unsafe { SwapBuffers(dc.device) } == FALSE {
-            log::error!("unable to swap buffers: {}", Error::last_os_error());
+        if let Err(e) = unsafe { OpenGL::SwapBuffers(dc.device) } {
+            log::error!("unable to swap buffers: {e}");
             return Err(crate::SurfaceError::Other("unable to swap buffers"));
         }
 
@@ -694,11 +673,11 @@ impl crate::Surface for Surface {
         // Remove the old configuration.
         unsafe { self.unconfigure(device) };
 
-        let dc = unsafe { GetDC(self.window) };
-        if dc.is_null() {
+        let dc = unsafe { Gdi::GetDC(self.window) };
+        if dc.is_invalid() {
             log::error!(
                 "unable to get the device context from window: {}",
-                Error::last_os_error()
+                Error::from_win32()
             );
             return Err(crate::SurfaceError::Other(
                 "unable to get the device context from window",
@@ -771,8 +750,8 @@ impl crate::Surface for Surface {
             }
         };
 
-        if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == FALSE {
-            log::error!("unable to set swap interval: {}", Error::last_os_error());
+        if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == Foundation::FALSE.0 {
+            log::error!("unable to set swap interval: {}", Error::from_win32());
             return Err(crate::SurfaceError::Other("unable to set swap interval"));
         }
 

From 03f6d24ea6996c8dce2364531ed6ca6db719ddcf Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 25 Jul 2024 13:43:25 +0200
Subject: [PATCH 099/226] deduplicate derived BGLs

---
 CHANGELOG.md                           |   1 +
 tests/tests/bind_group_layout_dedup.rs | 100 +++++++++++++++++++++++--
 wgpu-core/src/command/bind.rs          |   8 +-
 wgpu-core/src/device/bgl.rs            |   5 ++
 wgpu-core/src/device/resource.rs       |  40 +++++++---
 5 files changed, 131 insertions(+), 23 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f502ebc9e..c63d7c35c9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -54,6 +54,7 @@ Bottom level categories:
 - Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
 - Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
 - Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
+- Deduplicate bind group layouts that are created from pipelines with "auto" layouts. By @teoxoy [#6049](https://github.com/gfx-rs/wgpu/pull/6049)
 
 ### Dependency Updates
 
diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs
index 5c38779f13..591f4f9054 100644
--- a/tests/tests/bind_group_layout_dedup.rs
+++ b/tests/tests/bind_group_layout_dedup.rs
@@ -241,11 +241,11 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
 }
 
 #[gpu_test]
-static BIND_GROUP_LAYOUT_DEDUPLICATION_DERIVED: GpuTestConfiguration = GpuTestConfiguration::new()
+static GET_DERIVED_BGL: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().test_features_limits())
-    .run_sync(bgl_dedupe_derived);
+    .run_sync(get_derived_bgl);
 
-fn bgl_dedupe_derived(ctx: TestingContext) {
+fn get_derived_bgl(ctx: TestingContext) {
     let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
         label: None,
         size: 4,
@@ -314,12 +314,12 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
 }
 
 #[gpu_test]
-static SEPARATE_PROGRAMS_HAVE_INCOMPATIBLE_DERIVED_BGLS: GpuTestConfiguration =
+static SEPARATE_PIPELINES_HAVE_INCOMPATIBLE_DERIVED_BGLS: GpuTestConfiguration =
     GpuTestConfiguration::new()
         .parameters(TestParameters::default().test_features_limits())
-        .run_sync(separate_programs_have_incompatible_derived_bgls);
+        .run_sync(separate_pipelines_have_incompatible_derived_bgls);
 
-fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
+fn separate_pipelines_have_incompatible_derived_bgls(ctx: TestingContext) {
     let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
         label: None,
         size: 4,
@@ -448,3 +448,91 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
         None,
     )
 }
+
+#[gpu_test]
+static BIND_GROUP_LAYOUT_DEDUPLICATION_DERIVED: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().test_features_limits())
+    .run_sync(bgl_dedupe_derived);
+
+fn bgl_dedupe_derived(ctx: TestingContext) {
+    let src = "
+        @group(0) @binding(0) var<uniform> u1: vec4f;
+        @group(1) @binding(0) var<uniform> u2: vec4f;
+
+        @compute @workgroup_size(1, 1, 1)
+        fn main() {
+            // Just need a static use.
+            let _u1 = u1;
+            let _u2 = u2;
+        }
+    ";
+    let module = ctx
+        .device
+        .create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: None,
+            source: wgpu::ShaderSource::Wgsl(src.into()),
+        });
+
+    let pipeline = ctx
+        .device
+        .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: None,
+            layout: None,
+            module: &module,
+            entry_point: None,
+            compilation_options: Default::default(),
+            cache: None,
+        });
+
+    let bind_group_layout_0 = pipeline.get_bind_group_layout(0);
+    let bind_group_layout_1 = pipeline.get_bind_group_layout(1);
+
+    let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: None,
+        size: 16,
+        usage: wgpu::BufferUsages::UNIFORM,
+        mapped_at_creation: false,
+    });
+
+    let bind_group_0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout_1,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                buffer: &buffer,
+                offset: 0,
+                size: None,
+            }),
+        }],
+    });
+    let bind_group_1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout_0,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                buffer: &buffer,
+                offset: 0,
+                size: None,
+            }),
+        }],
+    });
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+    let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+        label: None,
+        timestamp_writes: None,
+    });
+    pass.set_pipeline(&pipeline);
+    pass.set_bind_group(0, &bind_group_0, &[]);
+    pass.set_bind_group(1, &bind_group_1, &[]);
+    pass.dispatch_workgroups(1, 1, 1);
+
+    drop(pass);
+
+    ctx.queue.submit(Some(encoder.finish()));
+}
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 73f1d9fe17..04a992928c 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -176,16 +176,10 @@ mod compat {
                             }
                         }
 
-                        #[derive(Clone, Debug, Error)]
-                        #[error("Unknown reason")]
-                        struct Unknown();
-
                         Err(Error::Incompatible {
                             expected_bgl: expected_bgl.error_ident(),
                             assigned_bgl: assigned_bgl.error_ident(),
-                            inner: MultiError::new(errors.drain(..)).unwrap_or_else(|| {
-                                MultiError::new(core::iter::once(Unknown())).unwrap()
-                            }),
+                            inner: MultiError::new(errors.drain(..)).unwrap(),
                         })
                     }
                 } else {
diff --git a/wgpu-core/src/device/bgl.rs b/wgpu-core/src/device/bgl.rs
index 911ac8a435..9b7bdc0fee 100644
--- a/wgpu-core/src/device/bgl.rs
+++ b/wgpu-core/src/device/bgl.rs
@@ -126,4 +126,9 @@ impl EntryMap {
         self.sorted = false;
         self.inner.entry(key)
     }
+
+    pub fn sort(&mut self) {
+        self.inner.sort_unstable_keys();
+        self.sorted = true;
+    }
 }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 4a063fbf2f..9f8f48e566 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -32,7 +32,7 @@ use crate::{
         UsageScopePool,
     },
     validation::{self, validate_color_attachment_bytes_per_sample},
-    FastHashMap, LabelHelpers as _, SubmissionIndex,
+    FastHashMap, LabelHelpers as _, PreHashedKey, PreHashedMap, SubmissionIndex,
 };
 
 use arrayvec::ArrayVec;
@@ -2592,11 +2592,29 @@ impl<A: HalApi> Device<A> {
             derived_group_layouts.pop();
         }
 
+        let mut unique_bind_group_layouts = PreHashedMap::default();
+
         let bind_group_layouts = derived_group_layouts
             .into_iter()
-            .map(|bgl_entry_map| {
-                self.create_bind_group_layout(&None, bgl_entry_map, bgl::Origin::Derived)
-                    .map(Arc::new)
+            .map(|mut bgl_entry_map| {
+                bgl_entry_map.sort();
+                match unique_bind_group_layouts.entry(PreHashedKey::from_key(&bgl_entry_map)) {
+                    std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())),
+                    std::collections::hash_map::Entry::Vacant(e) => {
+                        match self.create_bind_group_layout(
+                            &None,
+                            bgl_entry_map,
+                            bgl::Origin::Derived,
+                        ) {
+                            Ok(bgl) => {
+                                let bgl = Arc::new(bgl);
+                                e.insert(bgl.clone());
+                                Ok(bgl)
+                            }
+                            Err(e) => Err(e),
+                        }
+                    }
+                }
             })
             .collect::<Result<Vec<_>, _>>()?;
 
@@ -2730,11 +2748,12 @@ impl<A: HalApi> Device<A> {
 
         if is_auto_layout {
             for bgl in pipeline.layout.bind_group_layouts.iter() {
-                bgl.exclusive_pipeline
+                // `bind_group_layouts` might contain duplicate entries, so we need to ignore the result.
+                let _ = bgl
+                    .exclusive_pipeline
                     .set(binding_model::ExclusivePipeline::Compute(Arc::downgrade(
                         &pipeline,
-                    )))
-                    .unwrap();
+                    )));
             }
         }
 
@@ -3355,11 +3374,12 @@ impl<A: HalApi> Device<A> {
 
         if is_auto_layout {
             for bgl in pipeline.layout.bind_group_layouts.iter() {
-                bgl.exclusive_pipeline
+                // `bind_group_layouts` might contain duplicate entries, so we need to ignore the result.
+                let _ = bgl
+                    .exclusive_pipeline
                     .set(binding_model::ExclusivePipeline::Render(Arc::downgrade(
                         &pipeline,
-                    )))
-                    .unwrap();
+                    )));
             }
         }
 

From d3c38a4fd03f55124b800581338fbd31aa617f49 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Fri, 26 Jul 2024 10:31:36 +0200
Subject: [PATCH 100/226] Fix AnyDevice drop implementation dropping the wrong
 thing (#6052)

---
 CHANGELOG.md                       | 1 +
 wgpu-core/src/device/any_device.rs | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c63d7c35c9..bf6f23104c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -55,6 +55,7 @@ Bottom level categories:
 - Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
 - Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
 - Deduplicate bind group layouts that are created from pipelines with "auto" layouts. By @teoxoy [#6049](https://github.com/gfx-rs/wgpu/pull/6049)
+- Fix crash when dropping the surface after the device. By @wumpf in [#6052](https://github.com/gfx-rs/wgpu/pull/6052)
 
 ### Dependency Updates
 
diff --git a/wgpu-core/src/device/any_device.rs b/wgpu-core/src/device/any_device.rs
index 9e459c1a94..e796bf0574 100644
--- a/wgpu-core/src/device/any_device.rs
+++ b/wgpu-core/src/device/any_device.rs
@@ -34,7 +34,7 @@ impl AnyDevice {
         unsafe fn drop_glue<A: HalApi>(ptr: *mut ()) {
             // Drop the arc this instance is holding.
             unsafe {
-                _ = Arc::from_raw(ptr.cast::<A::Device>());
+                _ = Arc::from_raw(ptr.cast::<Device<A>>());
             }
         }
 

From 1f4f675b1bc252b2783594c72eade60f1a7358aa Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 25 Jul 2024 10:54:08 +0200
Subject: [PATCH 101/226] [naga] add back `PartialEq` derives for some types

---
 naga/src/lib.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index c356a2cf03..4f80345bba 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -873,7 +873,7 @@ pub enum Literal {
 }
 
 /// Pipeline-overridable constant.
-#[derive(Debug, Clone)]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -891,8 +891,7 @@ pub struct Override {
 }
 
 /// Constant value.
-#[derive(Debug, Clone)]
-#[cfg_attr(test, derive(PartialEq))]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -954,7 +953,7 @@ pub struct ResourceBinding {
 }
 
 /// Variable defined at module level.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -1354,8 +1353,7 @@ bitflags::bitflags! {
 ///
 /// [`Constant`]: Expression::Constant
 /// [`Override`]: Expression::Override
-#[derive(Clone, Debug)]
-#[cfg_attr(test, derive(PartialEq))]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]

From 3166d377544c189135196039d516153176663847 Mon Sep 17 00:00:00 2001
From: Bruce Mitchener <bruce.mitchener@gmail.com>
Date: Fri, 26 Jul 2024 23:48:01 +0700
Subject: [PATCH 102/226] Use workspace dependencies more. (#6020)

---
 Cargo.toml            | 14 +++++---
 naga-cli/Cargo.toml   |  8 ++---
 naga/Cargo.toml       | 16 ++++-----
 wgpu-core/Cargo.toml  | 31 ++++++++--------
 wgpu-hal/Cargo.toml   | 82 ++++++++++++++++++++-----------------------
 wgpu-types/Cargo.toml | 12 +++----
 6 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 76b3de9dc2..944402cd73 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -71,10 +71,11 @@ version = "22.0.0"
 
 [workspace.dependencies]
 anyhow = "1.0.86"
+argh = "0.1.5"
 arrayvec = "0.7"
 bincode = "1"
 bit-vec = "0.8"
-bitflags = "2"
+bitflags = "2.6"
 bytemuck = { version = "1.16", features = ["derive"] }
 cfg_aliases = "0.1"
 cfg-if = "1"
@@ -91,6 +92,7 @@ getrandom = "0.2"
 glam = "0.28"
 heck = "0.5.0"
 image = { version = "0.24", default-features = false, features = ["png"] }
+indexmap = "2"
 itertools = { version = "0.10.5" }
 ktx2 = "0.3"
 libc = "0.2"
@@ -103,7 +105,7 @@ nanorand = { version = "0.7", default-features = false, features = ["wyrand"] }
 noise = { version = "0.8", git = "https://github.com/Razaekel/noise-rs.git", rev = "c6942d4fb70af26db4441edcf41f90fa115333f2" }
 nv-flip = "0.1"
 obj = "0.10"
-once_cell = "1"
+once_cell = "1.19.0"
 parking_lot = ">=0.11, <0.13" # parking_lot 0.12 switches from `winapi` to `windows`; permit either
 pico-args = { version = "0.5.0", features = [
     "eq-separator",
@@ -124,7 +126,7 @@ smallvec = "1"
 static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }
 tracy-client = "0.17"
-thiserror = "1"
+thiserror = "1.0.63"
 wgpu = { version = "22.0.0", path = "./wgpu", default-features = false }
 wgpu-core = { version = "22.0.0", path = "./wgpu-core" }
 wgpu-macros = { version = "22.0.0", path = "./wgpu-macros" }
@@ -146,7 +148,7 @@ gpu-descriptor = "0.3"
 
 # DX dependencies
 bit-set = "0.8"
-gpu-allocator = { version = "0.26", default-features = false, features = [
+gpu-allocator = { version = "0.27", default-features = false, features = [
     "d3d12",
     "public-winapi",
 ] }
@@ -159,6 +161,7 @@ hassle-rs = "0.11.0"
 khronos-egl = "6"
 glow = "0.14.0"
 glutin = "0.29.1"
+glutin_wgl_sys = "0.6"
 
 # DX and GLES dependencies
 windows = { version = "0.58", default-features = false }
@@ -183,6 +186,9 @@ deno_webgpu = { version = "0.118.0", path = "./deno_webgpu" }
 tokio = "1.38.1"
 termcolor = "1.4.1"
 
+# android dependencies
+ndk-sys = "0.5.0"
+
 [patch."https://github.com/gfx-rs/naga"]
 
 [patch."https://github.com/zakarumych/gpu-descriptor"]
diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml
index fb999c495a..e9abb82d26 100644
--- a/naga-cli/Cargo.toml
+++ b/naga-cli/Cargo.toml
@@ -18,10 +18,10 @@ doc = false
 test = false
 
 [dependencies]
-bincode = "1"
-codespan-reporting = "0.11"
-env_logger = "0.11"
-argh = "0.1.5"
+bincode.workspace = true
+codespan-reporting.workspace = true
+env_logger.workspace = true
+argh.workspace = true
 anyhow.workspace = true
 
 [dependencies.naga]
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index cf9f14373c..9a4182bc7e 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -59,31 +59,31 @@ compact = []
 
 [dependencies]
 arbitrary = { version = "1.3", features = ["derive"], optional = true }
-bitflags = "2.6"
-bit-set = "0.8"
+arrayvec.workspace = true
+bitflags.workspace = true
+bit-set.workspace = true
 termcolor = { version = "1.4.1" }
 # remove termcolor dep when updating to the next version of codespan-reporting
 # termcolor minimum version was wrong and was fixed in
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
-rustc-hash = "1.1.0"
-indexmap = "2"
+rustc-hash.workspace = true
+indexmap.workspace = true
 log = "0.4"
 spirv = { version = "0.3", optional = true }
-thiserror = "1.0.63"
+thiserror.workspace = true
 serde = { version = "1.0.204", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }
 hexf-parse = { version = "0.2.1", optional = true }
 unicode-xid = { version = "0.2.3", optional = true }
-arrayvec.workspace = true
 
 [build-dependencies]
 cfg_aliases.workspace = true
 
 [dev-dependencies]
 diff = "0.1"
-env_logger = "0.11"
+env_logger.workspace = true
 # This _cannot_ have a version specified. If it does, crates.io will look
 # for a version of the package on crates when we publish naga. Path dependencies
 # are allowed through though.
@@ -93,5 +93,5 @@ hlsl-snapshots = { path = "./hlsl-snapshots" }
 # incompatible with our tests because we do a syntactic diff and not a semantic one.
 ron = "0.8.0"
 rspirv = { version = "0.11", git = "https://github.com/gfx-rs/rspirv", rev = "b969f175d5663258b4891e44b76c1544da9661ab" }
-serde = { version = "1.0", features = ["derive"] }
+serde = { workspace = true, features = ["derive"] }
 spirv = { version = "0.3", features = ["deserialize"] }
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index d6fe534629..22d813c4cb 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -103,23 +103,22 @@ gles = ["hal/gles"]
 dx12 = ["hal/dx12"]
 
 [dependencies]
-arrayvec = "0.7"
-bit-vec = "0.8"
-bitflags = "2"
-bytemuck = { version = "1.16", optional = true }
+arrayvec.workspace = true
+bit-vec.workspace = true
+bitflags.workspace = true
+bytemuck = { workspace = true, optional = true }
 document-features.workspace = true
-indexmap = "2"
-log = "0.4"
-once_cell = "1"
-# parking_lot 0.12 switches from `winapi` to `windows`; permit either
-parking_lot = ">=0.11, <0.13"
-profiling = { version = "1", default-features = false }
-raw-window-handle = { version = "0.6", optional = true }
-ron = { version = "0.8", optional = true }
-rustc-hash = "1.1"
-serde = { version = "1", features = ["derive"], optional = true }
-smallvec = "1"
-thiserror = "1"
+indexmap.workspace = true
+log.workspace = true
+once_cell.workspace = true
+parking_lot.workspace = true
+profiling = { workspace = true, default-features = false }
+raw-window-handle = { workspace = true, optional = true }
+ron = { workspace = true, optional = true }
+rustc-hash.workspace = true
+serde = { workspace = true, features = ["derive"], optional = true }
+smallvec.workspace = true
+thiserror.workspace = true
 
 [dependencies.naga]
 path = "../naga"
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index 19effa8837..eedd027bfe 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -110,20 +110,20 @@ name = "raw-gles"
 required-features = ["gles"]
 
 [dependencies]
-bitflags = "2"
-parking_lot = ">=0.11, <0.13"
-profiling = { version = "1", default-features = false }
-raw-window-handle = "0.6"
-thiserror = "1"
-once_cell = "1.19.0"
+bitflags.workspace = true
+parking_lot.workspace = true
+profiling = { workspace = true, default-features = false }
+raw-window-handle.workspace = true
+thiserror.workspace = true
+once_cell.workspace = true
 
 # backends common
-arrayvec = "0.7"
-rustc-hash = "1.1"
-log = "0.4"
+arrayvec.workspace = true
+rustc-hash.workspace = true
+log.workspace = true
 
 # backend: Gles
-glow = { version = "0.14.0", optional = true }
+glow = { workspace = true, optional = true }
 
 [dependencies.wgt]
 package = "wgpu-types"
@@ -132,33 +132,31 @@ version = "22.0.0"
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 # backend: Vulkan
-ash = { version = "0.38.0", optional = true }
-gpu-alloc = { version = "0.6", optional = true }
-gpu-descriptor = { version = "0.3", optional = true }
-smallvec = { version = "1", optional = true, features = ["union"] }
+ash = { workspace = true, optional = true }
+gpu-alloc = { workspace = true, optional = true }
+gpu-descriptor = { workspace = true, optional = true }
+smallvec = { workspace = true, optional = true, features = ["union"] }
 
-khronos-egl = { version = "6", features = ["dynamic"], optional = true }
-libloading = { version = ">=0.7, <0.9", optional = true }
-renderdoc-sys = { version = "1.1.0", optional = true }
+khronos-egl = { workspace = true, features = ["dynamic"], optional = true }
+libloading = { workspace = true, optional = true }
+renderdoc-sys = { workspace = true, optional = true }
 
 [target.'cfg(target_os = "emscripten")'.dependencies]
-khronos-egl = { version = "6", features = ["static", "no-pkg-config"] }
+khronos-egl = { workspace = true, features = ["static", "no-pkg-config"] }
 #Note: it's unused by emscripten, but we keep it to have single code base in egl.rs
-libloading = { version = ">=0.7, <0.9", optional = true }
+libloading = { workspace = true, optional = true }
 
 [target.'cfg(windows)'.dependencies]
 # backend: Dx12 and Gles
 windows = { workspace = true, optional = true }
 # backend: Dx12
-bit-set = { version = "0.8", optional = true }
-range-alloc = { version = "0.1", optional = true }
-gpu-allocator = { version = "0.27", default-features = false, features = [
-    "d3d12",
-    "public-winapi",
-], optional = true }
-hassle-rs = { version = "0.11", optional = true }
+bit-set = { workspace = true, optional = true }
+range-alloc = { workspace = true, optional = true }
+gpu-allocator = { workspace = true, optional = true }
+hassle-rs = { workspace = true, optional = true }
+
 # backend: Gles
-glutin_wgl_sys = { version = "0.6", optional = true }
+glutin_wgl_sys = { workspace = true, optional = true }
 
 winapi = { version = "0.3", features = [
     "profileapi",
@@ -172,28 +170,28 @@ d3d12 = { path = "../d3d12/", version = "22.0.0", optional = true, features = [
 
 [target.'cfg(any(target_os="macos", target_os="ios"))'.dependencies]
 # backend: Metal
-block = { version = "0.1", optional = true }
+block = { workspace = true, optional = true }
 
-metal = { version = "0.29.0" }
-objc = "0.2.5"
-core-graphics-types = "0.1"
+metal.workspace = true
+objc.workspace = true
+core-graphics-types.workspace = true
 
 [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies]
-wasm-bindgen = "0.2.87"
-web-sys = { version = "0.3.69", features = [
+wasm-bindgen.workspace = true
+web-sys = { workspace = true, features = [
     "Window",
     "HtmlCanvasElement",
     "WebGl2RenderingContext",
     "OffscreenCanvas",
 ] }
-js-sys = "0.3.69"
+js-sys.workspace = true
 
 [target.'cfg(unix)'.dependencies]
-libc = "0.2"
+libc.workspace = true
 
 [target.'cfg(target_os = "android")'.dependencies]
-android_system_properties = { version = "0.1.1", optional = true }
-ndk-sys = { version = "0.5.0", optional = true }
+android_system_properties = { workspace = true, optional = true }
+ndk-sys = { workspace = true, optional = true }
 
 [dependencies.naga]
 path = "../naga"
@@ -209,12 +207,10 @@ version = "22.0.0"
 features = ["wgsl-in"]
 
 [dev-dependencies]
-cfg-if = "1"
-env_logger = "0.11"
+cfg-if.workspace = true
+env_logger.workspace = true
 glam.workspace = true # for ray-traced-triangle example
-winit = { version = "0.29", features = [
-    "android-native-activity",
-] } # for "halmark" example
+winit.workspace = true # for "halmark" example
 
 [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
-glutin = "0.29.1" # for "gles" example
+glutin.workspace = true # for "gles" example
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index 6c8f284896..387e41a475 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -35,12 +35,12 @@ serde = ["dep:serde"]
 counters = []
 
 [dependencies]
-bitflags = "2"
-serde = { version = "1", features = ["derive"], optional = true }
+bitflags.workspace = true
+serde = { workspace = true, features = ["derive"], optional = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
-js-sys = "0.3.69"
-web-sys = { version = "0.3.69", features = [
+js-sys.workspace = true
+web-sys = { workspace = true, features = [
     "ImageBitmap",
     "HtmlVideoElement",
     "HtmlCanvasElement",
@@ -48,5 +48,5 @@ web-sys = { version = "0.3.69", features = [
 ] }
 
 [dev-dependencies]
-serde = { version = "1", features = ["derive"] }
-serde_json = "1.0.120"
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true

From 339ecf37da7546e3bfd234e75f58aeabee425663 Mon Sep 17 00:00:00 2001
From: Matthew Wong <110081332+matthew-wong1@users.noreply.github.com>
Date: Fri, 26 Jul 2024 17:59:41 +0100
Subject: [PATCH 103/226] Fix error message in create_render_pass (#6041)

---
 CHANGELOG.md                  | 1 +
 wgpu/src/backend/wgpu_core.rs | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf6f23104c..81beb96854 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -56,6 +56,7 @@ Bottom level categories:
 - Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
 - Deduplicate bind group layouts that are created from pipelines with "auto" layouts. By @teoxoy [#6049](https://github.com/gfx-rs/wgpu/pull/6049)
 - Fix crash when dropping the surface after the device. By @wumpf in [#6052](https://github.com/gfx-rs/wgpu/pull/6052)
+- Fix error message that is thrown in create_render_pass to no longer say `compute_pass`. By @matthew-wong1 [#6041](https://github.com/gfx-rs/wgpu/pull/6041)
 
 ### Dependency Updates
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index b7560268e9..cc4ad9b997 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1975,7 +1975,7 @@ impl crate::Context for ContextWgpuCore {
                 &encoder_data.error_sink,
                 cause,
                 desc.label,
-                "CommandEncoder::begin_compute_pass",
+                "CommandEncoder::begin_render_pass",
             );
         }
 

From ccd6d2ca484fff182bc69e35af3de4fd878e732f Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 26 Jun 2024 10:00:28 +0200
Subject: [PATCH 104/226] remove `BoundsCheckPolicies.image_store`

---
 naga-cli/src/bin/naga.rs                      |  11 -
 naga/src/back/msl/writer.rs                   |  37 ---
 naga/src/back/spv/image.rs                    |  33 +--
 naga/src/proc/index.rs                        |  23 +-
 naga/tests/in/binding-arrays.param.ron        |   1 -
 .../in/bounds-check-image-restrict.param.ron  |   1 -
 .../in/bounds-check-image-rzsw.param.ron      |   1 -
 naga/tests/in/pointers.param.ron              |   1 -
 naga/tests/in/policy-mix.param.ron            |   1 -
 naga/tests/in/resource-binding-map.param.ron  |   1 -
 naga/tests/out/msl/binding-arrays.msl         |  12 +-
 .../out/msl/bounds-check-image-restrict.msl   |  10 +-
 .../tests/out/msl/bounds-check-image-rzsw.msl |  20 +-
 naga/tests/out/spv/binding-arrays.spvasm      |  78 +++---
 .../spv/bounds-check-image-restrict.spvasm    | 209 ++++++++--------
 .../out/spv/bounds-check-image-rzsw.spvasm    | 227 ++++++++----------
 wgpu-hal/src/gles/device.rs                   |   1 -
 wgpu-hal/src/metal/device.rs                  |   1 -
 wgpu-hal/src/vulkan/adapter.rs                |   1 -
 wgpu-hal/src/vulkan/device.rs                 |   2 -
 20 files changed, 243 insertions(+), 428 deletions(-)

diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs
index 97d947973e..002c6dd664 100644
--- a/naga-cli/src/bin/naga.rs
+++ b/naga-cli/src/bin/naga.rs
@@ -38,13 +38,6 @@ struct Args {
     #[argh(option)]
     image_load_bounds_check_policy: Option<BoundsCheckPolicyArg>,
 
-    /// what policy to use for texture stores bounds checking.
-    ///
-    /// Possible values are the same as for `index-bounds-check-policy`. If
-    /// omitted, defaults to the index bounds check policy.
-    #[argh(option)]
-    image_store_bounds_check_policy: Option<BoundsCheckPolicyArg>,
-
     /// directory to dump the SPIR-V block context dump to
     #[argh(option)]
     block_ctx_dir: Option<String>,
@@ -409,10 +402,6 @@ fn run() -> anyhow::Result<()> {
         Some(arg) => arg.0,
         None => params.bounds_check_policies.index,
     };
-    params.bounds_check_policies.image_store = match args.image_store_bounds_check_policy {
-        Some(arg) => arg.0,
-        None => params.bounds_check_policies.index,
-    };
     params.overrides = args
         .overrides
         .iter()
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index ad3dd69ebe..48f862f8ba 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -1063,43 +1063,6 @@ impl<W: Write> Writer<W> {
         address: &TexelAddress,
         value: Handle<crate::Expression>,
         context: &StatementContext,
-    ) -> BackendResult {
-        match context.expression.policies.image_store {
-            proc::BoundsCheckPolicy::Restrict => {
-                // We don't have a restricted level value, because we don't
-                // support writes to mipmapped textures.
-                debug_assert!(address.level.is_none());
-
-                write!(self.out, "{level}")?;
-                self.put_expression(image, &context.expression, false)?;
-                write!(self.out, ".write(")?;
-                self.put_expression(value, &context.expression, true)?;
-                write!(self.out, ", ")?;
-                self.put_restricted_texel_address(image, address, &context.expression)?;
-                writeln!(self.out, ");")?;
-            }
-            proc::BoundsCheckPolicy::ReadZeroSkipWrite => {
-                write!(self.out, "{level}if (")?;
-                self.put_image_access_bounds_check(image, address, &context.expression)?;
-                writeln!(self.out, ") {{")?;
-                self.put_unchecked_image_store(level.next(), image, address, value, context)?;
-                writeln!(self.out, "{level}}}")?;
-            }
-            proc::BoundsCheckPolicy::Unchecked => {
-                self.put_unchecked_image_store(level, image, address, value, context)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn put_unchecked_image_store(
-        &mut self,
-        level: back::Level,
-        image: Handle<crate::Expression>,
-        address: &TexelAddress,
-        value: Handle<crate::Expression>,
-        context: &StatementContext,
     ) -> BackendResult {
         write!(self.out, "{level}")?;
         self.put_expression(image, &context.expression, false)?;
diff --git a/naga/src/back/spv/image.rs b/naga/src/back/spv/image.rs
index 3011ee4d13..769971d136 100644
--- a/naga/src/back/spv/image.rs
+++ b/naga/src/back/spv/image.rs
@@ -1178,32 +1178,13 @@ impl<'w> BlockContext<'w> {
             _ => {}
         }
 
-        match self.writer.bounds_check_policies.image_store {
-            crate::proc::BoundsCheckPolicy::Restrict => {
-                let (coords, _, _) =
-                    self.write_restricted_coordinates(image_id, coordinates, None, None, block)?;
-                write.generate(&mut self.writer.id_gen, coords, None, None, block);
-            }
-            crate::proc::BoundsCheckPolicy::ReadZeroSkipWrite => {
-                self.write_conditional_image_access(
-                    image_id,
-                    coordinates,
-                    None,
-                    None,
-                    block,
-                    &write,
-                )?;
-            }
-            crate::proc::BoundsCheckPolicy::Unchecked => {
-                write.generate(
-                    &mut self.writer.id_gen,
-                    coordinates.value_id,
-                    None,
-                    None,
-                    block,
-                );
-            }
-        }
+        write.generate(
+            &mut self.writer.id_gen,
+            coordinates.value_id,
+            None,
+            None,
+            block,
+        );
 
         Ok(())
     }
diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs
index 48b987ce85..555b08d2c3 100644
--- a/naga/src/proc/index.rs
+++ b/naga/src/proc/index.rs
@@ -112,21 +112,15 @@ pub struct BoundsCheckPolicies {
     /// This controls the behavior of [`ImageLoad`] expressions when a coordinate,
     /// texture array index, level of detail, or multisampled sample number is out of range.
     ///
-    /// [`ImageLoad`]: crate::Expression::ImageLoad
-    #[cfg_attr(feature = "deserialize", serde(default))]
-    pub image_load: BoundsCheckPolicy,
-
-    /// How should the generated code handle image texel stores that are out
-    /// of range?
-    ///
-    /// This controls the behavior of [`ImageStore`] statements when a coordinate,
-    /// texture array index, level of detail, or multisampled sample number is out of range.
-    ///
-    /// This policy should't be needed since all backends should ignore OOB writes.
+    /// There is no corresponding policy for [`ImageStore`] statements. All the
+    /// platforms we support already discard out-of-bounds image stores,
+    /// effectively implementing the "skip write" part of [`ReadZeroSkipWrite`].
     ///
+    /// [`ImageLoad`]: crate::Expression::ImageLoad
     /// [`ImageStore`]: crate::Statement::ImageStore
+    /// [`ReadZeroSkipWrite`]: BoundsCheckPolicy::ReadZeroSkipWrite
     #[cfg_attr(feature = "deserialize", serde(default))]
-    pub image_store: BoundsCheckPolicy,
+    pub image_load: BoundsCheckPolicy,
 
     /// How should the generated code handle binding array indexes that are out of bounds.
     #[cfg_attr(feature = "deserialize", serde(default))]
@@ -173,10 +167,7 @@ impl BoundsCheckPolicies {
 
     /// Return `true` if any of `self`'s policies are `policy`.
     pub fn contains(&self, policy: BoundsCheckPolicy) -> bool {
-        self.index == policy
-            || self.buffer == policy
-            || self.image_load == policy
-            || self.image_store == policy
+        self.index == policy || self.buffer == policy || self.image_load == policy
     }
 }
 
diff --git a/naga/tests/in/binding-arrays.param.ron b/naga/tests/in/binding-arrays.param.ron
index 39d6c03664..56a4983709 100644
--- a/naga/tests/in/binding-arrays.param.ron
+++ b/naga/tests/in/binding-arrays.param.ron
@@ -42,6 +42,5 @@
 		index: ReadZeroSkipWrite,
 		buffer: ReadZeroSkipWrite,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	)
 )
diff --git a/naga/tests/in/bounds-check-image-restrict.param.ron b/naga/tests/in/bounds-check-image-restrict.param.ron
index d7ff0f006b..19f7399068 100644
--- a/naga/tests/in/bounds-check-image-restrict.param.ron
+++ b/naga/tests/in/bounds-check-image-restrict.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: Restrict,
-		image_store: Restrict,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/bounds-check-image-rzsw.param.ron b/naga/tests/in/bounds-check-image-rzsw.param.ron
index b256790e15..e818d7a3ba 100644
--- a/naga/tests/in/bounds-check-image-rzsw.param.ron
+++ b/naga/tests/in/bounds-check-image-rzsw.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/pointers.param.ron b/naga/tests/in/pointers.param.ron
index fc40272838..c3b4d8880b 100644
--- a/naga/tests/in/pointers.param.ron
+++ b/naga/tests/in/pointers.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 2),
diff --git a/naga/tests/in/policy-mix.param.ron b/naga/tests/in/policy-mix.param.ron
index e5469157ed..31e80e4c52 100644
--- a/naga/tests/in/policy-mix.param.ron
+++ b/naga/tests/in/policy-mix.param.ron
@@ -3,7 +3,6 @@
 		index: Restrict,
 		buffer: Unchecked,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/resource-binding-map.param.ron b/naga/tests/in/resource-binding-map.param.ron
index 25e7b054b0..a700a33f2a 100644
--- a/naga/tests/in/resource-binding-map.param.ron
+++ b/naga/tests/in/resource-binding-map.param.ron
@@ -49,6 +49,5 @@
 		index: ReadZeroSkipWrite,
 		buffer: ReadZeroSkipWrite,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	)
 )
diff --git a/naga/tests/out/msl/binding-arrays.msl b/naga/tests/out/msl/binding-arrays.msl
index f3548c9e79..75f787a9f2 100644
--- a/naga/tests/out/msl/binding-arrays.msl
+++ b/naga/tests/out/msl/binding-arrays.msl
@@ -150,17 +150,11 @@ fragment main_Output main_(
     metal::float4 _e278 = v4_;
     v4_ = _e278 + _e277;
     metal::float4 _e282 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[0].get_width(), texture_array_storage[0].get_height()))) {
-        texture_array_storage[0].write(_e282, metal::uint2(pix));
-    }
+    texture_array_storage[0].write(_e282, metal::uint2(pix));
     metal::float4 _e285 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[uniform_index].get_width(), texture_array_storage[uniform_index].get_height()))) {
-        texture_array_storage[uniform_index].write(_e285, metal::uint2(pix));
-    }
+    texture_array_storage[uniform_index].write(_e285, metal::uint2(pix));
     metal::float4 _e288 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[non_uniform_index].get_width(), texture_array_storage[non_uniform_index].get_height()))) {
-        texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix));
-    }
+    texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix));
     metal::uint2 _e289 = u2_;
     uint _e290 = u1_;
     metal::float2 v2_ = static_cast<metal::float2>(_e289 + metal::uint2(_e290));
diff --git a/naga/tests/out/msl/bounds-check-image-restrict.msl b/naga/tests/out/msl/bounds-check-image-restrict.msl
index 6a3c43f0ce..138c0f6455 100644
--- a/naga/tests/out/msl/bounds-check-image-restrict.msl
+++ b/naga/tests/out/msl/bounds-check-image-restrict.msl
@@ -111,7 +111,7 @@ void test_textureStore_1d(
     metal::float4 value,
     metal::texture1d<float, metal::access::write> image_storage_1d
 ) {
-    image_storage_1d.write(value, metal::min(uint(coords_10), image_storage_1d.get_width() - 1));
+    image_storage_1d.write(value, uint(coords_10));
     return;
 }
 
@@ -120,7 +120,7 @@ void test_textureStore_2d(
     metal::float4 value_1,
     metal::texture2d<float, metal::access::write> image_storage_2d
 ) {
-    image_storage_2d.write(value_1, metal::min(metal::uint2(coords_11), metal::uint2(image_storage_2d.get_width(), image_storage_2d.get_height()) - 1));
+    image_storage_2d.write(value_1, metal::uint2(coords_11));
     return;
 }
 
@@ -130,7 +130,7 @@ void test_textureStore_2d_array_u(
     metal::float4 value_2,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    image_storage_2d_array.write(value_2, metal::min(metal::uint2(coords_12), metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()) - 1), metal::min(uint(array_index), image_storage_2d_array.get_array_size() - 1));
+    image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
     return;
 }
 
@@ -140,7 +140,7 @@ void test_textureStore_2d_array_s(
     metal::float4 value_3,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    image_storage_2d_array.write(value_3, metal::min(metal::uint2(coords_13), metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()) - 1), metal::min(uint(array_index_1), image_storage_2d_array.get_array_size() - 1));
+    image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
     return;
 }
 
@@ -149,7 +149,7 @@ void test_textureStore_3d(
     metal::float4 value_4,
     metal::texture3d<float, metal::access::write> image_storage_3d
 ) {
-    image_storage_3d.write(value_4, metal::min(metal::uint3(coords_14), metal::uint3(image_storage_3d.get_width(), image_storage_3d.get_height(), image_storage_3d.get_depth()) - 1));
+    image_storage_3d.write(value_4, metal::uint3(coords_14));
     return;
 }
 
diff --git a/naga/tests/out/msl/bounds-check-image-rzsw.msl b/naga/tests/out/msl/bounds-check-image-rzsw.msl
index 5db0c9df94..f73b8e3e32 100644
--- a/naga/tests/out/msl/bounds-check-image-rzsw.msl
+++ b/naga/tests/out/msl/bounds-check-image-rzsw.msl
@@ -110,9 +110,7 @@ void test_textureStore_1d(
     metal::float4 value,
     metal::texture1d<float, metal::access::write> image_storage_1d
 ) {
-    if (uint(coords_10) < image_storage_1d.get_width()) {
-        image_storage_1d.write(value, uint(coords_10));
-    }
+    image_storage_1d.write(value, uint(coords_10));
     return;
 }
 
@@ -121,9 +119,7 @@ void test_textureStore_2d(
     metal::float4 value_1,
     metal::texture2d<float, metal::access::write> image_storage_2d
 ) {
-    if (metal::all(metal::uint2(coords_11) < metal::uint2(image_storage_2d.get_width(), image_storage_2d.get_height()))) {
-        image_storage_2d.write(value_1, metal::uint2(coords_11));
-    }
+    image_storage_2d.write(value_1, metal::uint2(coords_11));
     return;
 }
 
@@ -133,9 +129,7 @@ void test_textureStore_2d_array_u(
     metal::float4 value_2,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    if (uint(array_index) < image_storage_2d_array.get_array_size() && metal::all(metal::uint2(coords_12) < metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()))) {
-        image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
-    }
+    image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
     return;
 }
 
@@ -145,9 +139,7 @@ void test_textureStore_2d_array_s(
     metal::float4 value_3,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    if (uint(array_index_1) < image_storage_2d_array.get_array_size() && metal::all(metal::uint2(coords_13) < metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()))) {
-        image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
-    }
+    image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
     return;
 }
 
@@ -156,9 +148,7 @@ void test_textureStore_3d(
     metal::float4 value_4,
     metal::texture3d<float, metal::access::write> image_storage_3d
 ) {
-    if (metal::all(metal::uint3(coords_14) < metal::uint3(image_storage_3d.get_width(), image_storage_3d.get_height(), image_storage_3d.get_depth()))) {
-        image_storage_3d.write(value_4, metal::uint3(coords_14));
-    }
+    image_storage_3d.write(value_4, metal::uint3(coords_14));
     return;
 }
 
diff --git a/naga/tests/out/spv/binding-arrays.spvasm b/naga/tests/out/spv/binding-arrays.spvasm
index 143ee269af..af75dca492 100644
--- a/naga/tests/out/spv/binding-arrays.spvasm
+++ b/naga/tests/out/spv/binding-arrays.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 428
+; Bound: 413
 OpCapability Shader
 OpCapability ImageQuery
 OpCapability ShaderNonUniform
@@ -77,8 +77,8 @@ OpDecorate %380 NonUniform
 OpDecorate %381 NonUniform
 OpDecorate %382 NonUniform
 OpDecorate %383 NonUniform
-OpDecorate %405 NonUniform
-OpDecorate %406 NonUniform
+OpDecorate %395 NonUniform
+OpDecorate %396 NonUniform
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
 %4 = OpTypeStruct %3
@@ -521,54 +521,30 @@ OpStore %72 %387
 %389 = OpAccessChain  %388  %36 %55
 %390 = OpLoad  %16  %389
 %391 = OpLoad  %22  %72
-%392 = OpImageQuerySize  %64  %390
-%393 = OpULessThan  %157  %65 %392
-%394 = OpAll  %150  %393
-OpSelectionMerge %395 None
-OpBranchConditional %394 %396 %395
-%396 = OpLabel
 OpImageWrite %390 %65 %391
-OpBranch %395
-%395 = OpLabel
-%397 = OpAccessChain  %388  %36 %77
-%398 = OpLoad  %16  %397
-%399 = OpLoad  %22  %72
-%400 = OpImageQuerySize  %64  %398
-%401 = OpULessThan  %157  %65 %400
-%402 = OpAll  %150  %401
-OpSelectionMerge %403 None
-OpBranchConditional %402 %404 %403
-%404 = OpLabel
-OpImageWrite %398 %65 %399
-OpBranch %403
-%403 = OpLabel
-%405 = OpAccessChain  %388  %36 %78
-%406 = OpLoad  %16  %405
-%407 = OpLoad  %22  %72
-%408 = OpImageQuerySize  %64  %406
-%409 = OpULessThan  %157  %65 %408
-%410 = OpAll  %150  %409
-OpSelectionMerge %411 None
-OpBranchConditional %410 %412 %411
-%412 = OpLabel
-OpImageWrite %406 %65 %407
-OpBranch %411
-%411 = OpLabel
-%413 = OpLoad  %23  %68
-%414 = OpLoad  %3  %66
-%415 = OpCompositeConstruct  %23  %414 %414
-%416 = OpIAdd  %23  %413 %415
-%417 = OpConvertUToF  %60  %416
-%418 = OpLoad  %22  %72
-%419 = OpCompositeExtract  %6  %417 0
-%420 = OpCompositeExtract  %6  %417 1
-%421 = OpCompositeExtract  %6  %417 0
-%422 = OpCompositeExtract  %6  %417 1
-%423 = OpCompositeConstruct  %22  %419 %420 %421 %422
-%424 = OpFAdd  %22  %418 %423
-%425 = OpLoad  %6  %70
-%426 = OpCompositeConstruct  %22  %425 %425 %425 %425
-%427 = OpFAdd  %22  %424 %426
-OpStore %50 %427
+%392 = OpAccessChain  %388  %36 %77
+%393 = OpLoad  %16  %392
+%394 = OpLoad  %22  %72
+OpImageWrite %393 %65 %394
+%395 = OpAccessChain  %388  %36 %78
+%396 = OpLoad  %16  %395
+%397 = OpLoad  %22  %72
+OpImageWrite %396 %65 %397
+%398 = OpLoad  %23  %68
+%399 = OpLoad  %3  %66
+%400 = OpCompositeConstruct  %23  %399 %399
+%401 = OpIAdd  %23  %398 %400
+%402 = OpConvertUToF  %60  %401
+%403 = OpLoad  %22  %72
+%404 = OpCompositeExtract  %6  %402 0
+%405 = OpCompositeExtract  %6  %402 1
+%406 = OpCompositeExtract  %6  %402 0
+%407 = OpCompositeExtract  %6  %402 1
+%408 = OpCompositeConstruct  %22  %404 %405 %406 %407
+%409 = OpFAdd  %22  %403 %408
+%410 = OpLoad  %6  %70
+%411 = OpCompositeConstruct  %22  %410 %410 %410 %410
+%412 = OpFAdd  %22  %409 %411
+OpStore %50 %412
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/bounds-check-image-restrict.spvasm b/naga/tests/out/spv/bounds-check-image-restrict.spvasm
index 038685a559..7837602e08 100644
--- a/naga/tests/out/spv/bounds-check-image-restrict.spvasm
+++ b/naga/tests/out/spv/bounds-check-image-restrict.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 299
+; Bound: 280
 OpCapability Shader
 OpCapability Sampled1D
 OpCapability Image1D
 OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %269 "fragment_shader" %267
-OpExecutionMode %269 OriginUpperLeft
+OpEntryPoint Fragment %250 "fragment_shader" %248
+OpExecutionMode %250 OriginUpperLeft
 OpName %21 "image_1d"
 OpName %23 "image_2d"
 OpName %25 "image_2d_array"
@@ -59,21 +59,21 @@ OpName %195 "test_textureLoad_depth_multisampled_2d"
 OpName %208 "coords"
 OpName %209 "value"
 OpName %210 "test_textureStore_1d"
-OpName %218 "coords"
-OpName %219 "value"
-OpName %220 "test_textureStore_2d"
-OpName %229 "coords"
-OpName %230 "array_index"
-OpName %231 "value"
-OpName %232 "test_textureStore_2d_array_u"
-OpName %243 "coords"
-OpName %244 "array_index"
-OpName %245 "value"
-OpName %246 "test_textureStore_2d_array_s"
-OpName %256 "coords"
-OpName %257 "value"
-OpName %258 "test_textureStore_3d"
-OpName %269 "fragment_shader"
+OpName %215 "coords"
+OpName %216 "value"
+OpName %217 "test_textureStore_2d"
+OpName %222 "coords"
+OpName %223 "array_index"
+OpName %224 "value"
+OpName %225 "test_textureStore_2d_array_u"
+OpName %232 "coords"
+OpName %233 "array_index"
+OpName %234 "value"
+OpName %235 "test_textureStore_2d_array_s"
+OpName %241 "coords"
+OpName %242 "value"
+OpName %243 "test_textureStore_3d"
+OpName %250 "fragment_shader"
 OpDecorate %21 DescriptorSet 0
 OpDecorate %21 Binding 0
 OpDecorate %23 DescriptorSet 0
@@ -102,7 +102,7 @@ OpDecorate %41 Binding 10
 OpDecorate %43 NonReadable
 OpDecorate %43 DescriptorSet 0
 OpDecorate %43 Binding 11
-OpDecorate %267 Location 0
+OpDecorate %248 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeImage %4 1D 0 0 0 1 Unknown
@@ -165,24 +165,20 @@ OpDecorate %267 Location 0
 %187 = OpConstantComposite  %12  %53 %53 %53
 %202 = OpConstantComposite  %8  %53 %53
 %211 = OpTypeFunction %2 %5 %6
-%221 = OpTypeFunction %2 %8 %6
-%225 = OpConstantComposite  %8  %53 %53
-%233 = OpTypeFunction %2 %8 %10 %6
-%239 = OpConstantComposite  %12  %53 %53 %53
-%247 = OpTypeFunction %2 %8 %5 %6
-%252 = OpConstantComposite  %12  %53 %53 %53
-%259 = OpTypeFunction %2 %12 %6
-%263 = OpConstantComposite  %12  %53 %53 %53
-%268 = OpTypePointer Output %6
-%267 = OpVariable  %268  Output
-%270 = OpTypeFunction %2
-%280 = OpConstant  %5  0
-%281 = OpConstantNull  %8
-%282 = OpConstant  %10  0
-%283 = OpConstantNull  %12
-%284 = OpConstantNull  %6
-%285 = OpConstant  %4  0.0
-%286 = OpConstantComposite  %6  %285 %285 %285 %285
+%218 = OpTypeFunction %2 %8 %6
+%226 = OpTypeFunction %2 %8 %10 %6
+%236 = OpTypeFunction %2 %8 %5 %6
+%244 = OpTypeFunction %2 %12 %6
+%249 = OpTypePointer Output %6
+%248 = OpVariable  %249  Output
+%251 = OpTypeFunction %2
+%261 = OpConstant  %5  0
+%262 = OpConstantNull  %8
+%263 = OpConstant  %10  0
+%264 = OpConstantNull  %12
+%265 = OpConstantNull  %6
+%266 = OpConstant  %4  0.0
+%267 = OpConstantComposite  %6  %266 %266 %266 %266
 %48 = OpFunction  %6  None %49
 %46 = OpFunctionParameter  %5
 %47 = OpFunctionParameter  %5
@@ -364,93 +360,78 @@ OpFunctionEnd
 %212 = OpLoad  %17  %37
 OpBranch %213
 %213 = OpLabel
-%214 = OpImageQuerySize  %5  %212
-%215 = OpISub  %5  %214 %53
-%216 = OpExtInst  %5  %1 UMin %208 %215
-OpImageWrite %212 %216 %209
+OpImageWrite %212 %208 %209
 OpReturn
 OpFunctionEnd
-%220 = OpFunction  %2  None %221
-%218 = OpFunctionParameter  %8
-%219 = OpFunctionParameter  %6
-%217 = OpLabel
-%222 = OpLoad  %18  %39
-OpBranch %223
-%223 = OpLabel
-%224 = OpImageQuerySize  %8  %222
-%226 = OpISub  %8  %224 %225
-%227 = OpExtInst  %8  %1 UMin %218 %226
-OpImageWrite %222 %227 %219
+%217 = OpFunction  %2  None %218
+%215 = OpFunctionParameter  %8
+%216 = OpFunctionParameter  %6
+%214 = OpLabel
+%219 = OpLoad  %18  %39
+OpBranch %220
+%220 = OpLabel
+OpImageWrite %219 %215 %216
 OpReturn
 OpFunctionEnd
-%232 = OpFunction  %2  None %233
-%229 = OpFunctionParameter  %8
-%230 = OpFunctionParameter  %10
-%231 = OpFunctionParameter  %6
+%225 = OpFunction  %2  None %226
+%222 = OpFunctionParameter  %8
+%223 = OpFunctionParameter  %10
+%224 = OpFunctionParameter  %6
+%221 = OpLabel
+%227 = OpLoad  %19  %41
+OpBranch %228
 %228 = OpLabel
-%234 = OpLoad  %19  %41
-OpBranch %235
-%235 = OpLabel
-%236 = OpBitcast  %5  %230
-%237 = OpCompositeConstruct  %12  %229 %236
-%238 = OpImageQuerySize  %12  %234
-%240 = OpISub  %12  %238 %239
-%241 = OpExtInst  %12  %1 UMin %237 %240
-OpImageWrite %234 %241 %231
+%229 = OpBitcast  %5  %223
+%230 = OpCompositeConstruct  %12  %222 %229
+OpImageWrite %227 %230 %224
 OpReturn
 OpFunctionEnd
-%246 = OpFunction  %2  None %247
-%243 = OpFunctionParameter  %8
-%244 = OpFunctionParameter  %5
-%245 = OpFunctionParameter  %6
-%242 = OpLabel
-%248 = OpLoad  %19  %41
-OpBranch %249
-%249 = OpLabel
-%250 = OpCompositeConstruct  %12  %243 %244
-%251 = OpImageQuerySize  %12  %248
-%253 = OpISub  %12  %251 %252
-%254 = OpExtInst  %12  %1 UMin %250 %253
-OpImageWrite %248 %254 %245
+%235 = OpFunction  %2  None %236
+%232 = OpFunctionParameter  %8
+%233 = OpFunctionParameter  %5
+%234 = OpFunctionParameter  %6
+%231 = OpLabel
+%237 = OpLoad  %19  %41
+OpBranch %238
+%238 = OpLabel
+%239 = OpCompositeConstruct  %12  %232 %233
+OpImageWrite %237 %239 %234
 OpReturn
 OpFunctionEnd
-%258 = OpFunction  %2  None %259
-%256 = OpFunctionParameter  %12
-%257 = OpFunctionParameter  %6
-%255 = OpLabel
-%260 = OpLoad  %20  %43
-OpBranch %261
-%261 = OpLabel
-%262 = OpImageQuerySize  %12  %260
-%264 = OpISub  %12  %262 %263
-%265 = OpExtInst  %12  %1 UMin %256 %264
-OpImageWrite %260 %265 %257
+%243 = OpFunction  %2  None %244
+%241 = OpFunctionParameter  %12
+%242 = OpFunctionParameter  %6
+%240 = OpLabel
+%245 = OpLoad  %20  %43
+OpBranch %246
+%246 = OpLabel
+OpImageWrite %245 %241 %242
 OpReturn
 OpFunctionEnd
-%269 = OpFunction  %2  None %270
-%266 = OpLabel
-%271 = OpLoad  %3  %21
-%272 = OpLoad  %7  %23
-%273 = OpLoad  %9  %25
-%274 = OpLoad  %11  %27
-%275 = OpLoad  %13  %29
-%276 = OpLoad  %17  %37
-%277 = OpLoad  %18  %39
-%278 = OpLoad  %19  %41
-%279 = OpLoad  %20  %43
-OpBranch %287
-%287 = OpLabel
-%288 = OpFunctionCall  %6  %48 %280 %280
-%289 = OpFunctionCall  %6  %63 %281 %280
-%290 = OpFunctionCall  %6  %79 %281 %282 %280
-%291 = OpFunctionCall  %6  %97 %281 %280 %280
-%292 = OpFunctionCall  %6  %113 %283 %280
-%293 = OpFunctionCall  %6  %128 %281 %280
-%294 = OpFunctionCall  %2  %210 %280 %284
-%295 = OpFunctionCall  %2  %220 %281 %284
-%296 = OpFunctionCall  %2  %232 %281 %282 %284
-%297 = OpFunctionCall  %2  %246 %281 %280 %284
-%298 = OpFunctionCall  %2  %258 %283 %284
-OpStore %267 %286
+%250 = OpFunction  %2  None %251
+%247 = OpLabel
+%252 = OpLoad  %3  %21
+%253 = OpLoad  %7  %23
+%254 = OpLoad  %9  %25
+%255 = OpLoad  %11  %27
+%256 = OpLoad  %13  %29
+%257 = OpLoad  %17  %37
+%258 = OpLoad  %18  %39
+%259 = OpLoad  %19  %41
+%260 = OpLoad  %20  %43
+OpBranch %268
+%268 = OpLabel
+%269 = OpFunctionCall  %6  %48 %261 %261
+%270 = OpFunctionCall  %6  %63 %262 %261
+%271 = OpFunctionCall  %6  %79 %262 %263 %261
+%272 = OpFunctionCall  %6  %97 %262 %261 %261
+%273 = OpFunctionCall  %6  %113 %264 %261
+%274 = OpFunctionCall  %6  %128 %262 %261
+%275 = OpFunctionCall  %2  %210 %261 %265
+%276 = OpFunctionCall  %2  %217 %262 %265
+%277 = OpFunctionCall  %2  %225 %262 %263 %265
+%278 = OpFunctionCall  %2  %235 %262 %261 %265
+%279 = OpFunctionCall  %2  %243 %264 %265
+OpStore %248 %267
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/bounds-check-image-rzsw.spvasm b/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
index a9eeb42047..9b8c091bba 100644
--- a/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
+++ b/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 326
+; Bound: 302
 OpCapability Shader
 OpCapability Sampled1D
 OpCapability Image1D
 OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %297 "fragment_shader" %295
-OpExecutionMode %297 OriginUpperLeft
+OpEntryPoint Fragment %273 "fragment_shader" %271
+OpExecutionMode %273 OriginUpperLeft
 OpName %21 "image_1d"
 OpName %23 "image_2d"
 OpName %25 "image_2d_array"
@@ -59,21 +59,21 @@ OpName %216 "test_textureLoad_depth_multisampled_2d"
 OpName %231 "coords"
 OpName %232 "value"
 OpName %233 "test_textureStore_1d"
-OpName %242 "coords"
-OpName %243 "value"
-OpName %244 "test_textureStore_2d"
-OpName %254 "coords"
-OpName %255 "array_index"
-OpName %256 "value"
-OpName %257 "test_textureStore_2d_array_u"
-OpName %269 "coords"
-OpName %270 "array_index"
-OpName %271 "value"
-OpName %272 "test_textureStore_2d_array_s"
-OpName %283 "coords"
-OpName %284 "value"
-OpName %285 "test_textureStore_3d"
-OpName %297 "fragment_shader"
+OpName %238 "coords"
+OpName %239 "value"
+OpName %240 "test_textureStore_2d"
+OpName %245 "coords"
+OpName %246 "array_index"
+OpName %247 "value"
+OpName %248 "test_textureStore_2d_array_u"
+OpName %255 "coords"
+OpName %256 "array_index"
+OpName %257 "value"
+OpName %258 "test_textureStore_2d_array_s"
+OpName %264 "coords"
+OpName %265 "value"
+OpName %266 "test_textureStore_3d"
+OpName %273 "fragment_shader"
 OpDecorate %21 DescriptorSet 0
 OpDecorate %21 Binding 0
 OpDecorate %23 DescriptorSet 0
@@ -102,7 +102,7 @@ OpDecorate %41 Binding 10
 OpDecorate %43 NonReadable
 OpDecorate %43 DescriptorSet 0
 OpDecorate %43 Binding 11
-OpDecorate %295 Location 0
+OpDecorate %271 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeImage %4 1D 0 0 0 1 Unknown
@@ -159,19 +159,19 @@ OpDecorate %295 Location 0
 %177 = OpTypeFunction %4 %8 %10 %5
 %198 = OpTypeFunction %4 %8 %5 %5
 %234 = OpTypeFunction %2 %5 %6
-%245 = OpTypeFunction %2 %8 %6
-%258 = OpTypeFunction %2 %8 %10 %6
-%273 = OpTypeFunction %2 %8 %5 %6
-%286 = OpTypeFunction %2 %12 %6
-%296 = OpTypePointer Output %6
-%295 = OpVariable  %296  Output
-%298 = OpTypeFunction %2
-%308 = OpConstant  %5  0
-%309 = OpConstantNull  %8
-%310 = OpConstant  %10  0
-%311 = OpConstantNull  %12
-%312 = OpConstant  %4  0.0
-%313 = OpConstantComposite  %6  %312 %312 %312 %312
+%241 = OpTypeFunction %2 %8 %6
+%249 = OpTypeFunction %2 %8 %10 %6
+%259 = OpTypeFunction %2 %8 %5 %6
+%267 = OpTypeFunction %2 %12 %6
+%272 = OpTypePointer Output %6
+%271 = OpVariable  %272  Output
+%274 = OpTypeFunction %2
+%284 = OpConstant  %5  0
+%285 = OpConstantNull  %8
+%286 = OpConstant  %10  0
+%287 = OpConstantNull  %12
+%288 = OpConstant  %4  0.0
+%289 = OpConstantComposite  %6  %288 %288 %288 %288
 %48 = OpFunction  %6  None %49
 %46 = OpFunctionParameter  %5
 %47 = OpFunctionParameter  %5
@@ -422,117 +422,78 @@ OpFunctionEnd
 %235 = OpLoad  %17  %37
 OpBranch %236
 %236 = OpLabel
-%237 = OpImageQuerySize  %5  %235
-%238 = OpULessThan  %52  %231 %237
-OpSelectionMerge %239 None
-OpBranchConditional %238 %240 %239
-%240 = OpLabel
 OpImageWrite %235 %231 %232
-OpBranch %239
-%239 = OpLabel
 OpReturn
 OpFunctionEnd
-%244 = OpFunction  %2  None %245
-%242 = OpFunctionParameter  %8
-%243 = OpFunctionParameter  %6
-%241 = OpLabel
-%246 = OpLoad  %18  %39
-OpBranch %247
-%247 = OpLabel
-%248 = OpImageQuerySize  %8  %246
-%249 = OpULessThan  %75  %242 %248
-%250 = OpAll  %52  %249
-OpSelectionMerge %251 None
-OpBranchConditional %250 %252 %251
-%252 = OpLabel
-OpImageWrite %246 %242 %243
-OpBranch %251
-%251 = OpLabel
+%240 = OpFunction  %2  None %241
+%238 = OpFunctionParameter  %8
+%239 = OpFunctionParameter  %6
+%237 = OpLabel
+%242 = OpLoad  %18  %39
+OpBranch %243
+%243 = OpLabel
+OpImageWrite %242 %238 %239
 OpReturn
 OpFunctionEnd
-%257 = OpFunction  %2  None %258
-%254 = OpFunctionParameter  %8
-%255 = OpFunctionParameter  %10
-%256 = OpFunctionParameter  %6
-%253 = OpLabel
-%259 = OpLoad  %19  %41
-OpBranch %260
-%260 = OpLabel
-%261 = OpBitcast  %5  %255
-%262 = OpCompositeConstruct  %12  %254 %261
-%263 = OpImageQuerySize  %12  %259
-%264 = OpULessThan  %96  %262 %263
-%265 = OpAll  %52  %264
-OpSelectionMerge %266 None
-OpBranchConditional %265 %267 %266
-%267 = OpLabel
-OpImageWrite %259 %262 %256
-OpBranch %266
-%266 = OpLabel
+%248 = OpFunction  %2  None %249
+%245 = OpFunctionParameter  %8
+%246 = OpFunctionParameter  %10
+%247 = OpFunctionParameter  %6
+%244 = OpLabel
+%250 = OpLoad  %19  %41
+OpBranch %251
+%251 = OpLabel
+%252 = OpBitcast  %5  %246
+%253 = OpCompositeConstruct  %12  %245 %252
+OpImageWrite %250 %253 %247
 OpReturn
 OpFunctionEnd
-%272 = OpFunction  %2  None %273
-%269 = OpFunctionParameter  %8
-%270 = OpFunctionParameter  %5
-%271 = OpFunctionParameter  %6
-%268 = OpLabel
-%274 = OpLoad  %19  %41
-OpBranch %275
-%275 = OpLabel
-%276 = OpCompositeConstruct  %12  %269 %270
-%277 = OpImageQuerySize  %12  %274
-%278 = OpULessThan  %96  %276 %277
-%279 = OpAll  %52  %278
-OpSelectionMerge %280 None
-OpBranchConditional %279 %281 %280
-%281 = OpLabel
-OpImageWrite %274 %276 %271
-OpBranch %280
-%280 = OpLabel
+%258 = OpFunction  %2  None %259
+%255 = OpFunctionParameter  %8
+%256 = OpFunctionParameter  %5
+%257 = OpFunctionParameter  %6
+%254 = OpLabel
+%260 = OpLoad  %19  %41
+OpBranch %261
+%261 = OpLabel
+%262 = OpCompositeConstruct  %12  %255 %256
+OpImageWrite %260 %262 %257
 OpReturn
 OpFunctionEnd
-%285 = OpFunction  %2  None %286
-%283 = OpFunctionParameter  %12
-%284 = OpFunctionParameter  %6
-%282 = OpLabel
-%287 = OpLoad  %20  %43
-OpBranch %288
-%288 = OpLabel
-%289 = OpImageQuerySize  %12  %287
-%290 = OpULessThan  %96  %283 %289
-%291 = OpAll  %52  %290
-OpSelectionMerge %292 None
-OpBranchConditional %291 %293 %292
-%293 = OpLabel
-OpImageWrite %287 %283 %284
-OpBranch %292
-%292 = OpLabel
+%266 = OpFunction  %2  None %267
+%264 = OpFunctionParameter  %12
+%265 = OpFunctionParameter  %6
+%263 = OpLabel
+%268 = OpLoad  %20  %43
+OpBranch %269
+%269 = OpLabel
+OpImageWrite %268 %264 %265
 OpReturn
 OpFunctionEnd
-%297 = OpFunction  %2  None %298
-%294 = OpLabel
-%299 = OpLoad  %3  %21
-%300 = OpLoad  %7  %23
-%301 = OpLoad  %9  %25
-%302 = OpLoad  %11  %27
-%303 = OpLoad  %13  %29
-%304 = OpLoad  %17  %37
-%305 = OpLoad  %18  %39
-%306 = OpLoad  %19  %41
-%307 = OpLoad  %20  %43
-OpBranch %314
-%314 = OpLabel
-%315 = OpFunctionCall  %6  %48 %308 %308
-%316 = OpFunctionCall  %6  %66 %309 %308
-%317 = OpFunctionCall  %6  %85 %309 %310 %308
-%318 = OpFunctionCall  %6  %106 %309 %308 %308
-%319 = OpFunctionCall  %6  %124 %311 %308
-%320 = OpFunctionCall  %6  %141 %309 %308
-%321 = OpFunctionCall  %2  %233 %308 %53
-%322 = OpFunctionCall  %2  %244 %309 %53
-%323 = OpFunctionCall  %2  %257 %309 %310 %53
-%324 = OpFunctionCall  %2  %272 %309 %308 %53
-%325 = OpFunctionCall  %2  %285 %311 %53
-OpStore %295 %313
+%273 = OpFunction  %2  None %274
+%270 = OpLabel
+%275 = OpLoad  %3  %21
+%276 = OpLoad  %7  %23
+%277 = OpLoad  %9  %25
+%278 = OpLoad  %11  %27
+%279 = OpLoad  %13  %29
+%280 = OpLoad  %17  %37
+%281 = OpLoad  %18  %39
+%282 = OpLoad  %19  %41
+%283 = OpLoad  %20  %43
+OpBranch %290
+%290 = OpLabel
+%291 = OpFunctionCall  %6  %48 %284 %284
+%292 = OpFunctionCall  %6  %66 %285 %284
+%293 = OpFunctionCall  %6  %85 %285 %286 %284
+%294 = OpFunctionCall  %6  %106 %285 %284 %284
+%295 = OpFunctionCall  %6  %124 %287 %284
+%296 = OpFunctionCall  %6  %141 %285 %284
+%297 = OpFunctionCall  %2  %233 %284 %53
+%298 = OpFunctionCall  %2  %240 %285 %53
+%299 = OpFunctionCall  %2  %248 %285 %286 %53
+%300 = OpFunctionCall  %2  %258 %285 %284 %53
+%301 = OpFunctionCall  %2  %266 %287 %53
+OpStore %271 %289
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 4f187709a7..0f8c381b5a 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -246,7 +246,6 @@ impl super::Device {
             index: BoundsCheckPolicy::Unchecked,
             buffer: BoundsCheckPolicy::Unchecked,
             image_load: image_check,
-            image_store: BoundsCheckPolicy::Unchecked,
             binding_array: BoundsCheckPolicy::Unchecked,
         };
 
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index e108d38202..18b9c2dba5 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -146,7 +146,6 @@ impl super::Device {
                 index: bounds_check_policy,
                 buffer: bounds_check_policy,
                 image_load: bounds_check_policy,
-                image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                 // TODO: support bounds checks on binding arrays
                 binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
             },
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 1a89aa807a..215c0dd958 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -1773,7 +1773,6 @@ impl super::Adapter {
                     } else {
                         naga::proc::BoundsCheckPolicy::Restrict
                     },
-                    image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                     // TODO: support bounds checks on binding arrays
                     binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                 },
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 00f6c7a41c..2f2e045fda 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -736,7 +736,6 @@ impl super::Device {
                             index: naga::proc::BoundsCheckPolicy::Unchecked,
                             buffer: naga::proc::BoundsCheckPolicy::Unchecked,
                             image_load: naga::proc::BoundsCheckPolicy::Unchecked,
-                            image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                             binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                         };
                     }
@@ -1678,7 +1677,6 @@ impl crate::Device for super::Device {
                         index: naga::proc::BoundsCheckPolicy::Unchecked,
                         buffer: naga::proc::BoundsCheckPolicy::Unchecked,
                         image_load: naga::proc::BoundsCheckPolicy::Unchecked,
-                        image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                         binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                     };
                 }

From 55ae943086a1224a0bbf74e807ea5e12af72e6b2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 27 Jul 2024 18:05:15 +0200
Subject: [PATCH 105/226] build(deps): bump the patch-updates group across 1
 directory with 22 updates (#6055)

Bumps the patch-updates group with 17 updates in the / directory:

| Package | From | To |
| --- | --- | --- |
| [env_logger](https://github.com/rust-cli/env_logger) | `0.11.3` | `0.11.5` |
| [tokio](https://github.com/tokio-rs/tokio) | `1.38.1` | `1.39.1` |
| [anstream](https://github.com/rust-cli/anstyle) | `0.6.14` | `0.6.15` |
| [anstyle](https://github.com/rust-cli/anstyle) | `1.0.7` | `1.0.8` |
| [anstyle-parse](https://github.com/rust-cli/anstyle) | `0.2.4` | `0.2.5` |
| [anstyle-query](https://github.com/rust-cli/anstyle) | `1.1.0` | `1.1.1` |
| [anstyle-wincon](https://github.com/rust-cli/anstyle) | `3.0.3` | `3.0.4` |
| [clap](https://github.com/clap-rs/clap) | `4.5.9` | `4.5.11` |
| [clap_lex](https://github.com/clap-rs/clap) | `0.7.1` | `0.7.2` |
| [colorchoice](https://github.com/rust-cli/anstyle) | `1.0.1` | `1.0.2` |
| [env_filter](https://github.com/rust-cli/env_logger) | `0.1.0` | `0.1.2` |
| [generator](https://github.com/Xudong-Huang/generator-rs) | `0.8.1` | `0.8.2` |
| [is_terminal_polyfill](https://github.com/polyfill-rs/is_terminal_polyfill) | `1.70.0` | `1.70.1` |
| [jobserver](https://github.com/rust-lang/jobserver-rs) | `0.1.31` | `0.1.32` |
| [object](https://github.com/gimli-rs/object) | `0.36.1` | `0.36.2` |
| [toml_datetime](https://github.com/toml-rs/toml) | `0.6.6` | `0.6.7` |
| [version_check](https://github.com/SergioBenitez/version_check) | `0.9.4` | `0.9.5` |



Updates `env_logger` from 0.11.3 to 0.11.5
- [Release notes](https://github.com/rust-cli/env_logger/releases)
- [Changelog](https://github.com/rust-cli/env_logger/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-cli/env_logger/compare/v0.11.3...v0.11.5)

Updates `tokio` from 1.38.1 to 1.39.1
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.38.1...tokio-1.39.1)

Updates `anstream` from 0.6.14 to 0.6.15
- [Commits](https://github.com/rust-cli/anstyle/compare/anstream-v0.6.14...anstream-v0.6.15)

Updates `anstyle` from 1.0.7 to 1.0.8
- [Commits](https://github.com/rust-cli/anstyle/compare/v1.0.7...v1.0.8)

Updates `anstyle-parse` from 0.2.4 to 0.2.5
- [Commits](https://github.com/rust-cli/anstyle/compare/anstyle-parse-v0.2.4...anstyle-parse-v0.2.5)

Updates `anstyle-query` from 1.1.0 to 1.1.1
- [Commits](https://github.com/rust-cli/anstyle/compare/anstyle-query-v1.1.0...anstyle-query-v1.1.1)

Updates `anstyle-wincon` from 3.0.3 to 3.0.4
- [Commits](https://github.com/rust-cli/anstyle/compare/anstyle-wincon-v3.0.3...anstyle-wincon-v3.0.4)

Updates `clap` from 4.5.9 to 4.5.11
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.9...clap_complete-v4.5.11)

Updates `clap_builder` from 4.5.9 to 4.5.11
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.5.9...v4.5.11)

Updates `clap_derive` from 4.5.8 to 4.5.11
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.5.8...v4.5.11)

Updates `clap_lex` from 0.7.1 to 0.7.2
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_lex-v0.7.1...clap_lex-v0.7.2)

Updates `colorchoice` from 1.0.1 to 1.0.2
- [Commits](https://github.com/rust-cli/anstyle/compare/colorchoice-v1.0.1...colorchoice-v1.0.2)

Updates `env_filter` from 0.1.0 to 0.1.2
- [Release notes](https://github.com/rust-cli/env_logger/releases)
- [Changelog](https://github.com/rust-cli/env_logger/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-cli/env_logger/compare/env_filter-v0.1.0...env_filter-v0.1.2)

Updates `generator` from 0.8.1 to 0.8.2
- [Release notes](https://github.com/Xudong-Huang/generator-rs/releases)
- [Commits](https://github.com/Xudong-Huang/generator-rs/compare/0.8.1...0.8.2)

Updates `is_terminal_polyfill` from 1.70.0 to 1.70.1
- [Changelog](https://github.com/polyfill-rs/is_terminal_polyfill/blob/main-v1.70/CHANGELOG.md)
- [Commits](https://github.com/polyfill-rs/is_terminal_polyfill/compare/v1.70.0...v1.70.1)

Updates `jobserver` from 0.1.31 to 0.1.32
- [Commits](https://github.com/rust-lang/jobserver-rs/compare/0.1.31...0.1.32)

Updates `object` from 0.36.1 to 0.36.2
- [Changelog](https://github.com/gimli-rs/object/blob/master/CHANGELOG.md)
- [Commits](https://github.com/gimli-rs/object/compare/0.36.1...0.36.2)

Updates `tokio-macros` from 2.3.0 to 2.4.0
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-macros-2.3.0...tokio-macros-2.4.0)

Updates `toml_datetime` from 0.6.6 to 0.6.7
- [Commits](https://github.com/toml-rs/toml/compare/toml_datetime-v0.6.6...toml_datetime-v0.6.7)

Updates `version_check` from 0.9.4 to 0.9.5
- [Commits](https://github.com/SergioBenitez/version_check/compare/v0.9.4...v0.9.5)

Updates `windows-core` from 0.54.0 to 0.58.0
- [Release notes](https://github.com/microsoft/windows-rs/releases)
- [Commits](https://github.com/microsoft/windows-rs/compare/0.54.0...0.58.0)

Updates `windows-result` from 0.1.2 to 0.2.0
- [Release notes](https://github.com/microsoft/windows-rs/releases)
- [Commits](https://github.com/microsoft/windows-rs/commits)

---
updated-dependencies:
- dependency-name: env_logger
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: tokio
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: anstream
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: anstyle
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: anstyle-parse
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: anstyle-query
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: anstyle-wincon
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_builder
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_derive
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_lex
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: colorchoice
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: env_filter
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: generator
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: is_terminal_polyfill
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: jobserver
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: object
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: tokio-macros
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: toml_datetime
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: version_check
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: windows-core
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: windows-result
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 149 +++++++++++++++++++++++------------------------------
 Cargo.toml |   2 +-
 2 files changed, 66 insertions(+), 85 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1603038a2c..397504b87c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -105,9 +105,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.6.14"
+version = "0.6.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -120,33 +120,33 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
+checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
+checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
 dependencies = [
  "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.3"
+version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
 dependencies = [
  "anstyle",
  "windows-sys 0.52.0",
@@ -512,9 +512,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.9"
+version = "4.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462"
+checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -522,9 +522,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.9"
+version = "4.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942"
+checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa"
 dependencies = [
  "anstream",
  "anstyle",
@@ -534,9 +534,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.8"
+version = "4.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
+checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
@@ -546,9 +546,9 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
+checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "cmake"
@@ -623,9 +623,9 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
+checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
 
 [[package]]
 name = "com"
@@ -920,7 +920,7 @@ name = "d3d12"
 version = "22.0.0"
 dependencies = [
  "bitflags 2.6.0",
- "libloading 0.7.4",
+ "libloading 0.8.5",
  "winapi",
 ]
 
@@ -1140,7 +1140,7 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412"
 dependencies = [
- "libloading 0.7.4",
+ "libloading 0.8.5",
 ]
 
 [[package]]
@@ -1221,9 +1221,9 @@ dependencies = [
 
 [[package]]
 name = "env_filter"
-version = "0.1.0"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
+checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
 dependencies = [
  "log",
  "regex",
@@ -1231,9 +1231,9 @@ dependencies = [
 
 [[package]]
 name = "env_logger"
-version = "0.11.3"
+version = "0.11.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
+checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1513,16 +1513,15 @@ dependencies = [
 
 [[package]]
 name = "generator"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb"
+checksum = "979f00864edc7516466d6b3157706e06c032f22715700ddd878228a91d02bc56"
 dependencies = [
- "cc",
  "cfg-if",
  "libc",
  "log",
  "rustversion",
- "windows 0.54.0",
+ "windows",
 ]
 
 [[package]]
@@ -1686,7 +1685,7 @@ dependencies = [
  "presser",
  "thiserror",
  "winapi",
- "windows 0.58.0",
+ "windows",
 ]
 
 [[package]]
@@ -1747,7 +1746,7 @@ dependencies = [
  "bitflags 2.6.0",
  "com",
  "libc",
- "libloading 0.7.4",
+ "libloading 0.8.5",
  "thiserror",
  "widestring",
  "winapi",
@@ -1889,9 +1888,9 @@ dependencies = [
 
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.0"
+version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
 [[package]]
 name = "itertools"
@@ -1932,9 +1931,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
 
 [[package]]
 name = "jobserver"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
 dependencies = [
  "libc",
 ]
@@ -2177,6 +2176,18 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
+[[package]]
+name = "mio"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
+dependencies = [
+ "hermit-abi 0.3.9",
+ "libc",
+ "wasi",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "naga"
 version = "22.0.0"
@@ -2515,9 +2526,9 @@ checksum = "d079845b37af429bfe5dfa76e6d087d788031045b25cfc6fd898486fd9847666"
 
 [[package]]
 name = "object"
-version = "0.36.1"
+version = "0.36.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce"
+checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e"
 dependencies = [
  "memchr",
 ]
@@ -3587,28 +3598,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.1"
+version = "1.39.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
+checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
- "mio",
- "num_cpus",
+ "mio 1.0.1",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
+checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3617,9 +3627,9 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.6"
+version = "0.6.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+checksum = "f8fb9f64314842840f1d940ac544da178732128f1c78c21772e876579e0da1db"
 
 [[package]]
 name = "toml_edit"
@@ -3874,9 +3884,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
 
 [[package]]
 name = "version_check"
-version = "0.9.4"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
 [[package]]
 name = "vsimd"
@@ -4327,7 +4337,7 @@ dependencies = [
  "js-sys",
  "khronos-egl",
  "libc",
- "libloading 0.7.4",
+ "libloading 0.8.5",
  "log",
  "metal",
  "naga",
@@ -4346,7 +4356,7 @@ dependencies = [
  "web-sys",
  "wgpu-types",
  "winapi",
- "windows 0.58.0",
+ "windows",
  "winit 0.29.15",
 ]
 
@@ -4468,33 +4478,13 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
-[[package]]
-name = "windows"
-version = "0.54.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
-dependencies = [
- "windows-core 0.54.0",
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "windows"
 version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
 dependencies = [
- "windows-core 0.58.0",
- "windows-targets 0.52.6",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.54.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
-dependencies = [
- "windows-result 0.1.2",
+ "windows-core",
  "windows-targets 0.52.6",
 ]
 
@@ -4506,7 +4496,7 @@ checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
 dependencies = [
  "windows-implement",
  "windows-interface",
- "windows-result 0.2.0",
+ "windows-result",
  "windows-strings",
  "windows-targets 0.52.6",
 ]
@@ -4533,15 +4523,6 @@ dependencies = [
  "syn 2.0.72",
 ]
 
-[[package]]
-name = "windows-result"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
-dependencies = [
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "windows-result"
 version = "0.2.0"
@@ -4557,7 +4538,7 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
 dependencies = [
- "windows-result 0.2.0",
+ "windows-result",
  "windows-targets 0.52.6",
 ]
 
@@ -4823,7 +4804,7 @@ dependencies = [
  "instant",
  "libc",
  "log",
- "mio",
+ "mio 0.8.11",
  "ndk 0.7.0",
  "ndk-glue",
  "objc",
diff --git a/Cargo.toml b/Cargo.toml
index 944402cd73..9d06a676aa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -183,7 +183,7 @@ deno_url = "0.143.0"
 deno_web = "0.174.0"
 deno_webidl = "0.143.0"
 deno_webgpu = { version = "0.118.0", path = "./deno_webgpu" }
-tokio = "1.38.1"
+tokio = "1.39.1"
 termcolor = "1.4.1"
 
 # android dependencies

From 7462754bdeca514d2f17a94e7fe07b5eb33f3d83 Mon Sep 17 00:00:00 2001
From: Samson <16504129+sagudev@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:07:26 +0200
Subject: [PATCH 106/226] Remove `'de: 'static"` serde bound and replace
 `&'static str` with `Cow` in some errors (#6048)

* Remove `serde(bound(deserialize = "'de: 'static"))` and replace `&'static str` with `Cow` in deser errors

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>

* Allow `clippy::result_large_err`

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>

---------

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>
---
 wgpu-core/src/command/bind.rs | 1 +
 wgpu-core/src/device/mod.rs   | 2 --
 wgpu-core/src/instance.rs     | 7 +++----
 wgpu-core/src/resource.rs     | 9 +++------
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 04a992928c..5e3f249301 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -248,6 +248,7 @@ mod compat {
                 .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None })
         }
 
+        #[allow(clippy::result_large_err)]
         pub fn get_invalid(&self) -> Result<(), (usize, Error)> {
             for (index, entry) in self.entries.iter().enumerate() {
                 entry.check().map_err(|e| (index, e))?;
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 222c50248a..d33de22dac 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -365,7 +365,6 @@ fn map_buffer<A: HalApi>(
 
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 pub struct DeviceMismatch {
     pub(super) res: ResourceErrorIdent,
     pub(super) res_device: ResourceErrorIdent,
@@ -391,7 +390,6 @@ impl std::error::Error for DeviceMismatch {}
 
 #[derive(Clone, Debug, Error)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[non_exhaustive]
 pub enum DeviceError {
     #[error("{0} is invalid.")]
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 65bed375f1..c4433ed148 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -1,5 +1,5 @@
-use std::collections::HashMap;
 use std::sync::Arc;
+use std::{borrow::Cow, collections::HashMap};
 
 use crate::{
     api_log,
@@ -26,7 +26,7 @@ type HalSurface<A> = <A as hal::Api>::Surface;
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("Limit '{name}' value {requested} is better than allowed {allowed}")]
 pub struct FailedLimit {
-    name: &'static str,
+    name: Cow<'static, str>,
     requested: u64,
     allowed: u64,
 }
@@ -36,7 +36,7 @@ fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLim
 
     requested.check_limits_with_fail_fn(allowed, false, |name, requested, allowed| {
         failed.push(FailedLimit {
-            name,
+            name: Cow::Borrowed(name),
             requested,
             allowed,
         })
@@ -389,7 +389,6 @@ pub enum GetSurfaceSupportError {
 
 #[derive(Clone, Debug, Error)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 /// Error when requesting a device from the adaptor
 #[non_exhaustive]
 pub enum RequestDeviceError {
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 5b11525126..c3d5c478f5 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -22,7 +22,7 @@ use smallvec::SmallVec;
 use thiserror::Error;
 
 use std::{
-    borrow::Borrow,
+    borrow::{Borrow, Cow},
     fmt::Debug,
     iter,
     mem::{self, ManuallyDrop},
@@ -78,7 +78,7 @@ impl TrackingData {
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct ResourceErrorIdent {
-    r#type: &'static str,
+    r#type: Cow<'static, str>,
     label: String,
 }
 
@@ -156,7 +156,7 @@ pub(crate) trait Labeled: ResourceType {
 
     fn error_ident(&self) -> ResourceErrorIdent {
         ResourceErrorIdent {
-            r#type: Self::TYPE,
+            r#type: Cow::Borrowed(Self::TYPE),
             label: self.label().to_owned(),
         }
     }
@@ -343,7 +343,6 @@ pub struct BufferMapOperation {
 
 #[derive(Clone, Debug, Error)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[non_exhaustive]
 pub enum BufferAccessError {
     #[error(transparent)]
@@ -393,7 +392,6 @@ pub enum BufferAccessError {
 
 #[derive(Clone, Debug, Error)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[error("Usage flags {actual:?} of {res} do not contain required usage flags {expected:?}")]
 pub struct MissingBufferUsageError {
     pub(crate) res: ResourceErrorIdent,
@@ -411,7 +409,6 @@ pub struct MissingTextureUsageError {
 
 #[derive(Clone, Debug, Error)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-#[cfg_attr(feature = "serde", serde(bound(deserialize = "'de: 'static")))]
 #[error("{0} has been destroyed")]
 pub struct DestroyedResourceError(pub ResourceErrorIdent);
 

From 3eb3595d0237cf5055e22be2e9a46f977a81ec4a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:20:57 +0200
Subject: [PATCH 107/226] build(deps): bump crate-ci/typos from 1.23.3 to
 1.23.5 (#6058)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.3 to 1.23.5.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.23.3...v1.23.5)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 70a83b51d7..203e990b3d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -632,7 +632,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.23.3
+        uses: crate-ci/typos@v1.23.5
 
   check-cts-runner:
     # runtime is normally 2 minutes

From 69eea63757f097bc0953e5ed607eefe1977f9efa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:37:03 +0200
Subject: [PATCH 108/226] build(deps): bump the patch-updates group with 2
 updates (#6059)

Bumps the patch-updates group with 2 updates: [serde_json](https://github.com/serde-rs/json) and [tokio](https://github.com/tokio-rs/tokio).


Updates `serde_json` from 1.0.120 to 1.0.121
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.120...v1.0.121)

Updates `tokio` from 1.39.1 to 1.39.2
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.39.1...tokio-1.39.2)

---
updated-dependencies:
- dependency-name: serde_json
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: tokio
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 11 ++++++-----
 Cargo.toml |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 397504b87c..2dbf69ee76 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2013,7 +2013,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -3176,12 +3176,13 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.120"
+version = "1.0.121"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
+checksum = "4ab380d7d9f22ef3f21ad3e6c1ebe8e4fc7a2000ccba2e4d71fc96f15b2cb609"
 dependencies = [
  "indexmap",
  "itoa",
+ "memchr",
  "ryu",
  "serde",
 ]
@@ -3598,9 +3599,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.39.1"
+version = "1.39.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a"
+checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1"
 dependencies = [
  "backtrace",
  "bytes",
diff --git a/Cargo.toml b/Cargo.toml
index 9d06a676aa..23d5b5cd7d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -121,7 +121,7 @@ renderdoc-sys = "1.1.0"
 ron = "0.8"
 rustc-hash = "1.1.0"
 serde = "1"
-serde_json = "1.0.120"
+serde_json = "1.0.121"
 smallvec = "1"
 static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }
@@ -183,7 +183,7 @@ deno_url = "0.143.0"
 deno_web = "0.174.0"
 deno_webidl = "0.143.0"
 deno_webgpu = { version = "0.118.0", path = "./deno_webgpu" }
-tokio = "1.39.1"
+tokio = "1.39.2"
 termcolor = "1.4.1"
 
 # android dependencies

From b145250ebcbcf6381d9f15806dfdf893fcc8e9f9 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:48:13 +0200
Subject: [PATCH 109/226] [test] remove the workaround that keeps resources
 alive from the poll test

The workaround is no longer needed with aade481bdf7f8f9ae18423bf9f0dc1279844f37e.
---
 tests/tests/poll.rs | 117 +++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 66 deletions(-)

diff --git a/tests/tests/poll.rs b/tests/tests/poll.rs
index 6b86436f7a..7e99cbcd7d 100644
--- a/tests/tests/poll.rs
+++ b/tests/tests/poll.rs
@@ -1,86 +1,71 @@
 use std::num::NonZeroU64;
 
 use wgpu::{
-    BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayout, BindGroupLayoutDescriptor,
-    BindGroupLayoutEntry, BindingResource, BindingType, Buffer, BufferBindingType,
-    BufferDescriptor, BufferUsages, CommandBuffer, CommandEncoderDescriptor, ComputePassDescriptor,
-    Maintain, ShaderStages,
+    BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry,
+    BindingResource, BindingType, BufferBindingType, BufferDescriptor, BufferUsages, CommandBuffer,
+    CommandEncoderDescriptor, ComputePassDescriptor, Maintain, ShaderStages,
 };
 
 use wgpu_test::{gpu_test, GpuTestConfiguration, TestingContext};
 
-struct DummyWorkData {
-    _buffer: Buffer,
-    _bgl: BindGroupLayout,
-    _bg: BindGroup,
-    cmd_buf: CommandBuffer,
-}
-
-impl DummyWorkData {
-    fn new(ctx: &TestingContext) -> Self {
-        let buffer = ctx.device.create_buffer(&BufferDescriptor {
-            label: None,
-            size: 16,
-            usage: BufferUsages::UNIFORM,
-            mapped_at_creation: false,
-        });
+fn generate_dummy_work(ctx: &TestingContext) -> CommandBuffer {
+    let buffer = ctx.device.create_buffer(&BufferDescriptor {
+        label: None,
+        size: 16,
+        usage: BufferUsages::UNIFORM,
+        mapped_at_creation: false,
+    });
 
-        let bind_group_layout = ctx
-            .device
-            .create_bind_group_layout(&BindGroupLayoutDescriptor {
-                label: None,
-                entries: &[BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Uniform,
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(NonZeroU64::new(16).unwrap()),
-                    },
-                    count: None,
-                }],
-            });
-
-        let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor {
+    let bind_group_layout = ctx
+        .device
+        .create_bind_group_layout(&BindGroupLayoutDescriptor {
             label: None,
-            layout: &bind_group_layout,
-            entries: &[BindGroupEntry {
+            entries: &[BindGroupLayoutEntry {
                 binding: 0,
-                resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()),
+                visibility: ShaderStages::COMPUTE,
+                ty: BindingType::Buffer {
+                    ty: BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(16).unwrap()),
+                },
+                count: None,
             }],
         });
 
-        let mut cmd_buf = ctx
-            .device
-            .create_command_encoder(&CommandEncoderDescriptor::default());
-
-        let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default());
-        cpass.set_bind_group(0, &bind_group, &[]);
-        drop(cpass);
-
-        Self {
-            _buffer: buffer,
-            _bgl: bind_group_layout,
-            _bg: bind_group,
-            cmd_buf: cmd_buf.finish(),
-        }
-    }
+    let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout,
+        entries: &[BindGroupEntry {
+            binding: 0,
+            resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()),
+        }],
+    });
+
+    let mut cmd_buf = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default());
+
+    let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default());
+    cpass.set_bind_group(0, &bind_group, &[]);
+    drop(cpass);
+
+    cmd_buf.finish()
 }
 
 #[gpu_test]
 static WAIT: GpuTestConfiguration = GpuTestConfiguration::new().run_async(|ctx| async move {
-    let data = DummyWorkData::new(&ctx);
+    let cmd_buf = generate_dummy_work(&ctx);
 
-    ctx.queue.submit(Some(data.cmd_buf));
+    ctx.queue.submit(Some(cmd_buf));
     ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
 });
 
 #[gpu_test]
 static DOUBLE_WAIT: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        ctx.queue.submit(Some(data.cmd_buf));
+        ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
         ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
     });
@@ -88,9 +73,9 @@ static DOUBLE_WAIT: GpuTestConfiguration =
 #[gpu_test]
 static WAIT_ON_SUBMISSION: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        let index = ctx.queue.submit(Some(data.cmd_buf));
+        let index = ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait_for(index))
             .await
             .panic_on_timeout();
@@ -99,9 +84,9 @@ static WAIT_ON_SUBMISSION: GpuTestConfiguration =
 #[gpu_test]
 static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        let index = ctx.queue.submit(Some(data.cmd_buf));
+        let index = ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait_for(index.clone()))
             .await
             .panic_on_timeout();
@@ -113,11 +98,11 @@ static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration =
 #[gpu_test]
 static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data1 = DummyWorkData::new(&ctx);
-        let data2 = DummyWorkData::new(&ctx);
+        let cmd_buf1 = generate_dummy_work(&ctx);
+        let cmd_buf2 = generate_dummy_work(&ctx);
 
-        let index1 = ctx.queue.submit(Some(data1.cmd_buf));
-        let index2 = ctx.queue.submit(Some(data2.cmd_buf));
+        let index1 = ctx.queue.submit(Some(cmd_buf1));
+        let index2 = ctx.queue.submit(Some(cmd_buf2));
         ctx.async_poll(Maintain::wait_for(index2))
             .await
             .panic_on_timeout();

From 1cb7ebab99850ac35d9e26093a59da7fa2fbf2af Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 3 Jul 2024 15:25:39 +0200
Subject: [PATCH 110/226] [wgpu-hal] replace `Instance.destroy_surface()` with
 `Drop` impls on `Surface`s

Only the metal and vulkan backends require
destruction code and it can go in a `Drop` impl since
the `Instance` is unused in those implementations.
---
 wgpu-core/src/global.rs                       | 12 +----------
 wgpu-core/src/hub.rs                          |  1 -
 wgpu-core/src/instance.rs                     | 20 +------------------
 wgpu-hal/examples/halmark/main.rs             |  2 +-
 wgpu-hal/examples/ray-traced-triangle/main.rs |  2 +-
 wgpu-hal/src/dx12/instance.rs                 |  3 ---
 wgpu-hal/src/empty.rs                         |  1 -
 wgpu-hal/src/gles/egl.rs                      |  2 --
 wgpu-hal/src/gles/web.rs                      |  2 --
 wgpu-hal/src/gles/wgl.rs                      |  1 -
 wgpu-hal/src/lib.rs                           |  1 -
 wgpu-hal/src/metal/mod.rs                     |  4 ----
 wgpu-hal/src/metal/surface.rs                 | 16 +++++++++------
 wgpu-hal/src/vulkan/instance.rs               | 10 ++++++----
 14 files changed, 20 insertions(+), 57 deletions(-)

diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index 6f6756a88c..7116f357b2 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -1,5 +1,3 @@
-use std::sync::Arc;
-
 use wgt::Backend;
 
 use crate::{
@@ -8,7 +6,6 @@ use crate::{
     instance::{Instance, Surface},
     registry::{Registry, RegistryReport},
     resource_log,
-    storage::Element,
 };
 
 #[derive(Debug, PartialEq, Eq)]
@@ -152,14 +149,7 @@ impl Drop for Global {
             self.hubs.gl.clear(&surfaces_locked, true);
         }
 
-        // destroy surfaces
-        for element in surfaces_locked.map.drain(..) {
-            if let Element::Occupied(arc_surface, _) = element {
-                let surface = Arc::into_inner(arc_surface)
-                    .expect("Surface cannot be destroyed because is still in use");
-                self.instance.destroy_surface(surface);
-            }
-        }
+        surfaces_locked.map.clear();
     }
 }
 
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index a318f91fc0..559f3c47c2 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -248,7 +248,6 @@ impl<A: HalApi> Hub<A> {
                         let suf = A::surface_as_hal(surface);
                         unsafe {
                             suf.unwrap().unconfigure(device.raw());
-                            //TODO: we could destroy the surface here
                         }
                     }
                 }
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index c4433ed148..1a74de83e1 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -112,24 +112,6 @@ impl Instance {
             flags: instance_desc.flags,
         }
     }
-
-    pub(crate) fn destroy_surface(&self, surface: Surface) {
-        fn destroy<A: HalApi>(instance: &Option<A::Instance>, mut surface: Option<HalSurface<A>>) {
-            if let Some(surface) = surface.take() {
-                unsafe {
-                    instance.as_ref().unwrap().destroy_surface(surface);
-                }
-            }
-        }
-        #[cfg(vulkan)]
-        destroy::<hal::api::Vulkan>(&self.vulkan, surface.vulkan);
-        #[cfg(metal)]
-        destroy::<hal::api::Metal>(&self.metal, surface.metal);
-        #[cfg(dx12)]
-        destroy::<hal::api::Dx12>(&self.dx12, surface.dx12);
-        #[cfg(gles)]
-        destroy::<hal::api::Gles>(&self.gl, surface.gl);
-    }
 }
 
 pub struct Surface {
@@ -707,7 +689,7 @@ impl Global {
             #[cfg(gles)]
             unconfigure::<hal::api::Gles>(self, &surface.gl, &present);
         }
-        self.instance.destroy_surface(surface);
+        drop(surface)
     }
 
     fn enumerate<A: HalApi>(
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index 30ff45ff5b..dabcea418a 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -578,7 +578,7 @@ impl<A: hal::Api> Example<A> {
 
             self.surface.unconfigure(&self.device);
             self.device.exit(self.queue);
-            self.instance.destroy_surface(self.surface);
+            drop(self.surface);
             drop(self.adapter);
         }
     }
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index 7cd6547f2c..b1aceeb101 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -1039,7 +1039,7 @@ impl<A: hal::Api> Example<A> {
 
             self.surface.unconfigure(&self.device);
             self.device.exit(self.queue);
-            self.instance.destroy_surface(self.surface);
+            drop(self.surface);
             drop(self.adapter);
         }
     }
diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs
index a629018404..c9557355fb 100644
--- a/wgpu-hal/src/dx12/instance.rs
+++ b/wgpu-hal/src/dx12/instance.rs
@@ -143,9 +143,6 @@ impl crate::Instance for super::Instance {
             ))),
         }
     }
-    unsafe fn destroy_surface(&self, _surface: super::Surface) {
-        // just drop
-    }
 
     unsafe fn enumerate_adapters(
         &self,
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 89a04ce48b..956b7b08a5 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -53,7 +53,6 @@ impl crate::Instance for Context {
     ) -> Result<Context, crate::InstanceError> {
         Ok(Context)
     }
-    unsafe fn destroy_surface(&self, surface: Context) {}
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&Context>,
diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs
index 8cf69cc076..e0340d8290 100644
--- a/wgpu-hal/src/gles/egl.rs
+++ b/wgpu-hal/src/gles/egl.rs
@@ -1002,8 +1002,6 @@ impl crate::Instance for Instance {
         })
     }
 
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
-
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&Surface>,
diff --git a/wgpu-hal/src/gles/web.rs b/wgpu-hal/src/gles/web.rs
index a6c79721b4..99d4ff59b5 100644
--- a/wgpu-hal/src/gles/web.rs
+++ b/wgpu-hal/src/gles/web.rs
@@ -171,8 +171,6 @@ impl crate::Instance for Instance {
 
         self.create_surface_from_canvas(canvas)
     }
-
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
 }
 
 #[derive(Debug)]
diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs
index 64ed063254..68bedb11d2 100644
--- a/wgpu-hal/src/gles/wgl.rs
+++ b/wgpu-hal/src/gles/wgl.rs
@@ -535,7 +535,6 @@ impl crate::Instance for Instance {
             srgb_capable: self.srgb_capable,
         })
     }
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
 
     unsafe fn enumerate_adapters(
         &self,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 812bb7299c..9b6d49135e 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -443,7 +443,6 @@ pub trait Instance: Sized + WasmNotSendSync {
         display_handle: raw_window_handle::RawDisplayHandle,
         window_handle: raw_window_handle::RawWindowHandle,
     ) -> Result<<Self::A as Api>::Surface, InstanceError>;
-    unsafe fn destroy_surface(&self, surface: <Self::A as Api>::Surface);
     /// `surface_hint` is only used by the GLES backend targeting WebGL2
     unsafe fn enumerate_adapters(
         &self,
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 177b02569a..0003983706 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -117,10 +117,6 @@ impl crate::Instance for Instance {
         }
     }
 
-    unsafe fn destroy_surface(&self, surface: Surface) {
-        unsafe { surface.dispose() };
-    }
-
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&Surface>,
diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs
index b0ea55e9fe..8bbdb63786 100644
--- a/wgpu-hal/src/metal/surface.rs
+++ b/wgpu-hal/src/metal/surface.rs
@@ -70,12 +70,6 @@ impl super::Surface {
         }
     }
 
-    pub unsafe fn dispose(self) {
-        if let Some(view) = self.view {
-            let () = msg_send![view.as_ptr(), release];
-        }
-    }
-
     /// If not called on the main thread, this will panic.
     #[allow(clippy::transmute_ptr_to_ref)]
     pub unsafe fn from_view(
@@ -178,6 +172,16 @@ impl super::Surface {
     }
 }
 
+impl Drop for super::Surface {
+    fn drop(&mut self) {
+        if let Some(view) = self.view {
+            unsafe {
+                let () = msg_send![view.as_ptr(), release];
+            }
+        }
+    }
+}
+
 impl crate::Surface for super::Surface {
     type A = super::Api;
 
diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs
index b3ced3275e..1d7386e623 100644
--- a/wgpu-hal/src/vulkan/instance.rs
+++ b/wgpu-hal/src/vulkan/instance.rs
@@ -880,10 +880,6 @@ impl crate::Instance for super::Instance {
         }
     }
 
-    unsafe fn destroy_surface(&self, surface: super::Surface) {
-        unsafe { surface.functor.destroy_surface(surface.raw, None) };
-    }
-
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&super::Surface>,
@@ -942,6 +938,12 @@ impl crate::Instance for super::Instance {
     }
 }
 
+impl Drop for super::Surface {
+    fn drop(&mut self) {
+        unsafe { self.functor.destroy_surface(self.raw, None) };
+    }
+}
+
 impl crate::Surface for super::Surface {
     type A = super::Api;
 

From f3e8e594ed7b482a9c208b048d5b95d95eb57841 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 3 Jul 2024 15:26:30 +0200
Subject: [PATCH 111/226] remove `Hub.surface_unconfigure()` since the `Hub`
 reference was unused.

---
 wgpu-core/src/hub.rs      |  7 -------
 wgpu-core/src/instance.rs | 18 +++++++-----------
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index 559f3c47c2..a5b1e5982d 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -263,13 +263,6 @@ impl<A: HalApi> Hub<A> {
         }
     }
 
-    pub(crate) fn surface_unconfigure(&self, device: &Device<A>, surface: &A::Surface) {
-        unsafe {
-            use hal::Surface;
-            surface.unconfigure(device.raw());
-        }
-    }
-
     pub fn generate_report(&self) -> HubReport {
         HubReport {
             adapters: self.adapters.generate_report(),
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 1a74de83e1..1b65b0c9bb 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -662,15 +662,11 @@ impl Global {
 
         api_log!("Surface::drop {id:?}");
 
-        fn unconfigure<A: HalApi>(
-            global: &Global,
-            surface: &Option<HalSurface<A>>,
-            present: &Presentation,
-        ) {
+        fn unconfigure<A: HalApi>(surface: &Option<HalSurface<A>>, present: &Presentation) {
             if let Some(surface) = surface {
-                let hub = HalApi::hub(global);
                 if let Some(device) = present.device.downcast_ref::<A>() {
-                    hub.surface_unconfigure(device, surface);
+                    use hal::Surface;
+                    unsafe { surface.unconfigure(device.raw()) };
                 }
             }
         }
@@ -681,13 +677,13 @@ impl Global {
 
         if let Some(present) = surface.presentation.lock().take() {
             #[cfg(vulkan)]
-            unconfigure::<hal::api::Vulkan>(self, &surface.vulkan, &present);
+            unconfigure::<hal::api::Vulkan>(&surface.vulkan, &present);
             #[cfg(metal)]
-            unconfigure::<hal::api::Metal>(self, &surface.metal, &present);
+            unconfigure::<hal::api::Metal>(&surface.metal, &present);
             #[cfg(dx12)]
-            unconfigure::<hal::api::Dx12>(self, &surface.dx12, &present);
+            unconfigure::<hal::api::Dx12>(&surface.dx12, &present);
             #[cfg(gles)]
-            unconfigure::<hal::api::Gles>(self, &surface.gl, &present);
+            unconfigure::<hal::api::Gles>(&surface.gl, &present);
         }
         drop(surface)
     }

From d1da4456a6e3839c772ac7ef97a49b696ac1eb0b Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:15:55 +0200
Subject: [PATCH 112/226] remove `Global.clear_backend()`

The method was only used by the player's tests which
was refactored to create a new `Global` instead.

Removing it cleans up the internals of `Hub.clear()`,
we should avoid having test only items.
---
 player/tests/test.rs    | 58 ++++++++++++++++++++---------------------
 wgpu-core/src/global.rs | 15 +++--------
 wgpu-core/src/hub.rs    | 11 +++-----
 3 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/player/tests/test.rs b/player/tests/test.rs
index a0df6f638c..b3ca944921 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -178,8 +178,6 @@ impl Test<'_> {
                 );
             }
         }
-
-        wgc::gfx_select!(device_id => global.clear_backend(()));
     }
 }
 
@@ -202,40 +200,42 @@ impl Corpus {
         let dir = path.parent().unwrap();
         let corpus: Corpus = ron::de::from_reader(File::open(&path).unwrap()).unwrap();
 
-        let global = wgc::global::Global::new(
-            "test",
-            wgt::InstanceDescriptor {
-                backends: corpus.backends,
-                flags: wgt::InstanceFlags::debugging(),
-                dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
-                gles_minor_version: wgt::Gles3MinorVersion::default(),
-            },
-        );
         for &backend in BACKENDS {
             if !corpus.backends.contains(backend.into()) {
                 continue;
             }
-            let adapter = match global.request_adapter(
-                &wgc::instance::RequestAdapterOptions {
-                    power_preference: wgt::PowerPreference::None,
-                    force_fallback_adapter: false,
-                    compatible_surface: None,
-                },
-                wgc::instance::AdapterInputs::IdSet(&[wgc::id::Id::zip(0, 0, backend)]),
-            ) {
-                Ok(adapter) => adapter,
-                Err(_) => continue,
-            };
-
-            println!("\tBackend {:?}", backend);
-            let supported_features =
-                wgc::gfx_select!(adapter => global.adapter_features(adapter)).unwrap();
-            let downlevel_caps =
-                wgc::gfx_select!(adapter => global.adapter_downlevel_capabilities(adapter))
-                    .unwrap();
             let mut test_num = 0;
             for test_path in &corpus.tests {
                 println!("\t\tTest '{:?}'", test_path);
+
+                let global = wgc::global::Global::new(
+                    "test",
+                    wgt::InstanceDescriptor {
+                        backends: backend.into(),
+                        flags: wgt::InstanceFlags::debugging(),
+                        dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
+                        gles_minor_version: wgt::Gles3MinorVersion::default(),
+                    },
+                );
+                let adapter = match global.request_adapter(
+                    &wgc::instance::RequestAdapterOptions {
+                        power_preference: wgt::PowerPreference::None,
+                        force_fallback_adapter: false,
+                        compatible_surface: None,
+                    },
+                    wgc::instance::AdapterInputs::IdSet(&[wgc::id::Id::zip(0, 0, backend)]),
+                ) {
+                    Ok(adapter) => adapter,
+                    Err(_) => continue,
+                };
+
+                println!("\tBackend {:?}", backend);
+                let supported_features =
+                    wgc::gfx_select!(adapter => global.adapter_features(adapter)).unwrap();
+                let downlevel_caps =
+                    wgc::gfx_select!(adapter => global.adapter_downlevel_capabilities(adapter))
+                        .unwrap();
+
                 let test = Test::load(dir.join(test_path), adapter.backend());
                 if !supported_features.contains(test.features) {
                     println!(
diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index 7116f357b2..54dcc8111c 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -87,13 +87,6 @@ impl Global {
         }
     }
 
-    pub fn clear_backend<A: HalApi>(&self, _dummy: ()) {
-        let hub = A::hub(self);
-        let surfaces_locked = self.surfaces.read();
-        // this is used for tests, which keep the adapter
-        hub.clear(&surfaces_locked, false);
-    }
-
     pub fn generate_report(&self) -> GlobalReport {
         GlobalReport {
             surfaces: self.surfaces.generate_report(),
@@ -134,19 +127,19 @@ impl Drop for Global {
         // destroy hubs before the instance gets dropped
         #[cfg(vulkan)]
         {
-            self.hubs.vulkan.clear(&surfaces_locked, true);
+            self.hubs.vulkan.clear(&surfaces_locked);
         }
         #[cfg(metal)]
         {
-            self.hubs.metal.clear(&surfaces_locked, true);
+            self.hubs.metal.clear(&surfaces_locked);
         }
         #[cfg(dx12)]
         {
-            self.hubs.dx12.clear(&surfaces_locked, true);
+            self.hubs.dx12.clear(&surfaces_locked);
         }
         #[cfg(gles)]
         {
-            self.hubs.gl.clear(&surfaces_locked, true);
+            self.hubs.gl.clear(&surfaces_locked);
         }
 
         surfaces_locked.map.clear();
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index a5b1e5982d..1357a2e423 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -214,10 +214,7 @@ impl<A: HalApi> Hub<A> {
         }
     }
 
-    //TODO: instead of having a hacky `with_adapters` parameter,
-    // we should have `clear_device(device_id)` that specifically destroys
-    // everything related to a logical device.
-    pub(crate) fn clear(&self, surface_guard: &Storage<Surface>, with_adapters: bool) {
+    pub(crate) fn clear(&self, surface_guard: &Storage<Surface>) {
         use hal::Surface;
 
         let mut devices = self.devices.write();
@@ -257,10 +254,8 @@ impl<A: HalApi> Hub<A> {
         self.queues.write().map.clear();
         devices.map.clear();
 
-        if with_adapters {
-            drop(devices);
-            self.adapters.write().map.clear();
-        }
+        drop(devices);
+        self.adapters.write().map.clear();
     }
 
     pub fn generate_report(&self) -> HubReport {

From 2ea081fabf7a14320bc1a949bc140f0b21854bcf Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Tue, 9 Jul 2024 14:30:53 +0200
Subject: [PATCH 113/226] remove waiting functionality from
 `Global.{buffer,texture,texture_view}_drop()`

Those resources won't be destroyed if used by a submission anyway.
---
 deno_webgpu/buffer.rs            |  2 +-
 deno_webgpu/texture.rs           |  4 +--
 player/src/lib.rs                |  6 ++--
 wgpu-core/src/device/global.rs   | 57 ++++----------------------------
 wgpu-core/src/device/resource.rs |  5 +--
 wgpu/src/backend/wgpu_core.rs    |  6 ++--
 6 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/deno_webgpu/buffer.rs b/deno_webgpu/buffer.rs
index e0b0e50d31..9a4900112a 100644
--- a/deno_webgpu/buffer.rs
+++ b/deno_webgpu/buffer.rs
@@ -27,7 +27,7 @@ impl Resource for WebGpuBuffer {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.buffer_drop(self.1, true));
+        gfx_select!(self.1 => self.0.buffer_drop(self.1));
     }
 }
 
diff --git a/deno_webgpu/texture.rs b/deno_webgpu/texture.rs
index 2dc1a740a5..8acba24998 100644
--- a/deno_webgpu/texture.rs
+++ b/deno_webgpu/texture.rs
@@ -24,7 +24,7 @@ impl Resource for WebGpuTexture {
     fn close(self: Rc<Self>) {
         if self.owned {
             let instance = &self.instance;
-            gfx_select!(self.id => instance.texture_drop(self.id, true));
+            gfx_select!(self.id => instance.texture_drop(self.id));
         }
     }
 }
@@ -39,7 +39,7 @@ impl Resource for WebGpuTextureView {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.texture_view_drop(self.1, true)).unwrap();
+        gfx_select!(self.1 => self.0.texture_view_drop(self.1)).unwrap();
     }
 }
 
diff --git a/player/src/lib.rs b/player/src/lib.rs
index cf89b2469d..3bbcbfdb12 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -157,7 +157,7 @@ impl GlobalPlay for wgc::global::Global {
                 self.buffer_destroy::<A>(id).unwrap();
             }
             Action::DestroyBuffer(id) => {
-                self.buffer_drop::<A>(id, true);
+                self.buffer_drop::<A>(id);
             }
             Action::CreateTexture(id, desc) => {
                 let (_, error) = self.device_create_texture::<A>(device, &desc, Some(id));
@@ -169,7 +169,7 @@ impl GlobalPlay for wgc::global::Global {
                 self.texture_destroy::<A>(id).unwrap();
             }
             Action::DestroyTexture(id) => {
-                self.texture_drop::<A>(id, true);
+                self.texture_drop::<A>(id);
             }
             Action::CreateTextureView {
                 id,
@@ -182,7 +182,7 @@ impl GlobalPlay for wgc::global::Global {
                 }
             }
             Action::DestroyTextureView(id) => {
-                self.texture_view_drop::<A>(id, true).unwrap();
+                self.texture_view_drop::<A>(id).unwrap();
             }
             Action::CreateSampler(id, desc) => {
                 let (_, error) = self.device_create_sampler::<A>(device, &desc, Some(id));
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 96727b04f5..1e5db459c4 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -409,7 +409,7 @@ impl Global {
         buffer.destroy()
     }
 
-    pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId, wait: bool) {
+    pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId) {
         profiling::scope!("Buffer::drop");
         api_log!("Buffer::drop {buffer_id:?}");
 
@@ -431,20 +431,6 @@ impl Global {
             #[cfg(feature = "trace")]
             buffer_id,
         );
-
-        if wait {
-            let Some(last_submit_index) = buffer
-                .device
-                .lock_life()
-                .get_buffer_latest_submission_index(&buffer)
-            else {
-                return;
-            };
-            match buffer.device.wait_for_submit(last_submit_index) {
-                Ok(()) => (),
-                Err(e) => log::error!("Failed to wait for buffer {:?}: {}", buffer_id, e),
-            }
-        }
     }
 
     pub fn device_create_texture<A: HalApi>(
@@ -601,31 +587,17 @@ impl Global {
         texture.destroy()
     }
 
-    pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId, wait: bool) {
+    pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId) {
         profiling::scope!("Texture::drop");
         api_log!("Texture::drop {texture_id:?}");
 
         let hub = A::hub(self);
 
-        if let Some(texture) = hub.textures.unregister(texture_id) {
+        if let Some(_texture) = hub.textures.unregister(texture_id) {
             #[cfg(feature = "trace")]
-            if let Some(t) = texture.device.trace.lock().as_mut() {
+            if let Some(t) = _texture.device.trace.lock().as_mut() {
                 t.add(trace::Action::DestroyTexture(texture_id));
             }
-
-            if wait {
-                let Some(last_submit_index) = texture
-                    .device
-                    .lock_life()
-                    .get_texture_latest_submission_index(&texture)
-                else {
-                    return;
-                };
-                match texture.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for texture {texture_id:?}: {e}"),
-                }
-            }
         }
     }
 
@@ -679,34 +651,17 @@ impl Global {
     pub fn texture_view_drop<A: HalApi>(
         &self,
         texture_view_id: id::TextureViewId,
-        wait: bool,
     ) -> Result<(), resource::TextureViewDestroyError> {
         profiling::scope!("TextureView::drop");
         api_log!("TextureView::drop {texture_view_id:?}");
 
         let hub = A::hub(self);
 
-        if let Some(view) = hub.texture_views.unregister(texture_view_id) {
+        if let Some(_view) = hub.texture_views.unregister(texture_view_id) {
             #[cfg(feature = "trace")]
-            if let Some(t) = view.device.trace.lock().as_mut() {
+            if let Some(t) = _view.device.trace.lock().as_mut() {
                 t.add(trace::Action::DestroyTextureView(texture_view_id));
             }
-
-            if wait {
-                let Some(last_submit_index) = view
-                    .device
-                    .lock_life()
-                    .get_texture_latest_submission_index(&view.parent)
-                else {
-                    return Ok(());
-                };
-                match view.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => {
-                        log::error!("Failed to wait for texture view {texture_view_id:?}: {e}")
-                    }
-                }
-            }
         }
         Ok(())
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 9f8f48e566..8f9f5022d9 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -32,7 +32,7 @@ use crate::{
         UsageScopePool,
     },
     validation::{self, validate_color_attachment_bytes_per_sample},
-    FastHashMap, LabelHelpers as _, PreHashedKey, PreHashedMap, SubmissionIndex,
+    FastHashMap, LabelHelpers as _, PreHashedKey, PreHashedMap,
 };
 
 use arrayvec::ArrayVec;
@@ -3474,9 +3474,10 @@ impl<A: HalApi> Device<A> {
         }
     }
 
+    #[cfg(feature = "replay")]
     pub(crate) fn wait_for_submit(
         &self,
-        submission_index: SubmissionIndex,
+        submission_index: crate::SubmissionIndex,
     ) -> Result<(), WaitIdleError> {
         let guard = self.fence.read();
         let fence = guard.as_ref().unwrap();
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index cc4ad9b997..7806552494 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1651,7 +1651,7 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn buffer_drop(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
-        wgc::gfx_select!(buffer => self.0.buffer_drop(*buffer, false))
+        wgc::gfx_select!(buffer => self.0.buffer_drop(*buffer))
     }
 
     fn texture_destroy(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
@@ -1660,7 +1660,7 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn texture_drop(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
-        wgc::gfx_select!(texture => self.0.texture_drop(*texture, false))
+        wgc::gfx_select!(texture => self.0.texture_drop(*texture))
     }
 
     fn texture_view_drop(
@@ -1668,7 +1668,7 @@ impl crate::Context for ContextWgpuCore {
         texture_view: &Self::TextureViewId,
         __texture_view_data: &Self::TextureViewData,
     ) {
-        let _ = wgc::gfx_select!(*texture_view => self.0.texture_view_drop(*texture_view, false));
+        let _ = wgc::gfx_select!(*texture_view => self.0.texture_view_drop(*texture_view));
     }
 
     fn sampler_drop(&self, sampler: &Self::SamplerId, _sampler_data: &Self::SamplerData) {

From 7502e652131e3057cc5124571ac8d6d3f66c9f73 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 12 Jul 2024 13:57:41 +0200
Subject: [PATCH 114/226] remove unused `Global.device_get_buffer_sub_data`

---
 wgpu-core/src/device/global.rs | 46 ----------------------------------
 1 file changed, 46 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 1e5db459c4..9e3df6adb0 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -336,52 +336,6 @@ impl Global {
         Ok(())
     }
 
-    #[doc(hidden)]
-    pub fn device_get_buffer_sub_data<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-        buffer_id: id::BufferId,
-        offset: BufferAddress,
-        data: &mut [u8],
-    ) -> BufferAccessResult {
-        profiling::scope!("Device::get_buffer_sub_data");
-
-        let hub = A::hub(self);
-
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
-        device.check_is_valid()?;
-
-        let snatch_guard = device.snatchable_lock.read();
-
-        let buffer = hub
-            .buffers
-            .get(buffer_id)
-            .map_err(|_| BufferAccessError::InvalidBufferId(buffer_id))?;
-        buffer.check_usage(wgt::BufferUsages::MAP_READ)?;
-        //assert!(buffer isn't used by the GPU);
-
-        let raw_buf = buffer.try_raw(&snatch_guard)?;
-        unsafe {
-            let mapping = device
-                .raw()
-                .map_buffer(raw_buf, offset..offset + data.len() as u64)
-                .map_err(DeviceError::from)?;
-            if !mapping.is_coherent {
-                device.raw().invalidate_mapped_ranges(
-                    raw_buf,
-                    iter::once(offset..offset + data.len() as u64),
-                );
-            }
-            ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len());
-            device.raw().unmap_buffer(raw_buf);
-        }
-
-        Ok(())
-    }
-
     pub fn buffer_destroy<A: HalApi>(
         &self,
         buffer_id: id::BufferId,

From 650054bbcd88e1ec5e0f5029cc22f9d8d6e13263 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 12 Jul 2024 13:55:50 +0200
Subject: [PATCH 115/226] [player] simplify sync buffer writing

---
 player/src/lib.rs                |  3 +-
 wgpu-core/src/device/global.rs   | 71 +++++++-------------------------
 wgpu-core/src/device/resource.rs | 13 ++----
 3 files changed, 20 insertions(+), 67 deletions(-)

diff --git a/player/src/lib.rs b/player/src/lib.rs
index 3bbcbfdb12..4ec9116ead 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -330,8 +330,7 @@ impl GlobalPlay for wgc::global::Global {
                     self.queue_write_buffer::<A>(device.into_queue_id(), id, range.start, &bin)
                         .unwrap();
                 } else {
-                    self.device_wait_for_buffer::<A>(device, id).unwrap();
-                    self.device_set_buffer_sub_data::<A>(device, id, range.start, &bin[..size])
+                    self.device_set_buffer_data::<A>(id, range.start, &bin[..size])
                         .unwrap();
                 }
             }
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 9e3df6adb0..0df0bc377a 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -30,8 +30,7 @@ use wgt::{BufferAddress, TextureFormat};
 
 use std::{
     borrow::Cow,
-    iter,
-    ptr::{self, NonNull},
+    ptr::NonNull,
     sync::{atomic::Ordering, Arc},
 };
 
@@ -252,70 +251,31 @@ impl Global {
     }
 
     #[cfg(feature = "replay")]
-    pub fn device_wait_for_buffer<A: HalApi>(
+    pub fn device_set_buffer_data<A: HalApi>(
         &self,
-        device_id: DeviceId,
-        buffer_id: id::BufferId,
-    ) -> Result<(), WaitIdleError> {
-        let hub = A::hub(self);
-
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
-
-        let buffer = match hub.buffers.get(buffer_id) {
-            Ok(buffer) => buffer,
-            Err(_) => return Ok(()),
-        };
-
-        let last_submission = device
-            .lock_life()
-            .get_buffer_latest_submission_index(&buffer);
-
-        if let Some(last_submission) = last_submission {
-            device.wait_for_submit(last_submission)
-        } else {
-            Ok(())
-        }
-    }
-
-    #[doc(hidden)]
-    pub fn device_set_buffer_sub_data<A: HalApi>(
-        &self,
-        device_id: DeviceId,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         data: &[u8],
     ) -> BufferAccessResult {
-        profiling::scope!("Device::set_buffer_sub_data");
-
         let hub = A::hub(self);
 
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
-
         let buffer = hub
             .buffers
             .get(buffer_id)
             .map_err(|_| BufferAccessError::InvalidBufferId(buffer_id))?;
 
-        #[cfg(feature = "trace")]
-        if let Some(ref mut trace) = *device.trace.lock() {
-            let data_path = trace.make_binary("bin", data);
-            trace.add(trace::Action::WriteBuffer {
-                id: buffer_id,
-                data: data_path,
-                range: offset..offset + data.len() as BufferAddress,
-                queued: false,
-            });
-        }
+        let device = &buffer.device;
 
         device.check_is_valid()?;
         buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?;
-        //assert!(buffer isn't used by the GPU);
+
+        let last_submission = device
+            .lock_life()
+            .get_buffer_latest_submission_index(&buffer);
+
+        if let Some(last_submission) = last_submission {
+            device.wait_for_submit(last_submission)?;
+        }
 
         let snatch_guard = device.snatchable_lock.read();
         let raw_buf = buffer.try_raw(&snatch_guard)?;
@@ -324,11 +284,12 @@ impl Global {
                 .raw()
                 .map_buffer(raw_buf, offset..offset + data.len() as u64)
                 .map_err(DeviceError::from)?;
-            ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
+            std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
             if !mapping.is_coherent {
-                device
-                    .raw()
-                    .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64));
+                device.raw().flush_mapped_ranges(
+                    raw_buf,
+                    std::iter::once(offset..offset + data.len() as u64),
+                );
             }
             device.raw().unmap_buffer(raw_buf);
         }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 8f9f5022d9..09e609e48e 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3478,24 +3478,17 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn wait_for_submit(
         &self,
         submission_index: crate::SubmissionIndex,
-    ) -> Result<(), WaitIdleError> {
+    ) -> Result<(), DeviceError> {
         let guard = self.fence.read();
         let fence = guard.as_ref().unwrap();
-        let last_done_index = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .get_fence_value(fence)
-                .map_err(DeviceError::from)?
-        };
+        let last_done_index = unsafe { self.raw.as_ref().unwrap().get_fence_value(fence)? };
         if last_done_index < submission_index {
             log::info!("Waiting for submission {:?}", submission_index);
             unsafe {
                 self.raw
                     .as_ref()
                     .unwrap()
-                    .wait(fence, submission_index, !0)
-                    .map_err(DeviceError::from)?
+                    .wait(fence, submission_index, !0)?
             };
             drop(guard);
             let closures = self

From 76f9b2f87a24762c45169d532344fb1f0a0b5bd3 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 12 Jul 2024 15:10:39 +0200
Subject: [PATCH 116/226] use `ManuallyDrop` for `Destroyed{Buffer,Texture}`

---
 wgpu-core/src/resource.rs | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index c3d5c478f5..c2138808ff 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -711,7 +711,7 @@ impl<A: HalApi> Buffer<A> {
             };
 
             queue::TempResource::DestroyedBuffer(DestroyedBuffer {
-                raw: Some(raw),
+                raw: ManuallyDrop::new(raw),
                 device: Arc::clone(&self.device),
                 label: self.label().to_owned(),
                 bind_groups,
@@ -761,7 +761,7 @@ crate::impl_trackable!(Buffer);
 /// A buffer that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
 pub struct DestroyedBuffer<A: HalApi> {
-    raw: Option<A::Buffer>,
+    raw: ManuallyDrop<A::Buffer>,
     device: Arc<Device<A>>,
     label: String,
     bind_groups: Vec<Weak<BindGroup<A>>>,
@@ -781,13 +781,12 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
         }
         drop(deferred);
 
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label());
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_buffer(raw);
-            }
+        resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_buffer(raw);
         }
     }
 }
@@ -1174,7 +1173,7 @@ impl<A: HalApi> Texture<A> {
             };
 
             queue::TempResource::DestroyedTexture(DestroyedTexture {
-                raw: Some(raw),
+                raw: ManuallyDrop::new(raw),
                 views,
                 bind_groups,
                 device: Arc::clone(&self.device),
@@ -1363,7 +1362,7 @@ impl Global {
 /// A texture that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
 pub struct DestroyedTexture<A: HalApi> {
-    raw: Option<A::Texture>,
+    raw: ManuallyDrop<A::Texture>,
     views: Vec<Weak<TextureView<A>>>,
     bind_groups: Vec<Weak<BindGroup<A>>>,
     device: Arc<Device<A>>,
@@ -1389,13 +1388,12 @@ impl<A: HalApi> Drop for DestroyedTexture<A> {
         }
         drop(deferred);
 
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Texture (destroyed) {:?}", self.label());
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_texture(raw);
-            }
+        resource_log!("Destroy raw Texture (destroyed) {:?}", self.label());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_texture(raw);
         }
     }
 }

From 6351a75b0cd9ec26948ca4934a5316fd781c567e Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 18 Jul 2024 15:34:56 +0200
Subject: [PATCH 117/226] remove implemented TODO

---
 wgpu-core/src/instance.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 1b65b0c9bb..9ddbaae2d5 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -321,10 +321,6 @@ impl<A: HalApi> Adapter<A> {
             );
         }
 
-        if let Some(_) = desc.label {
-            //TODO
-        }
-
         if let Some(failed) = check_limits(&desc.required_limits, &caps.limits).pop() {
             return Err(RequestDeviceError::LimitsExceeded(failed));
         }

From bfad205cf55fc8e28098deb95529803da314ffcf Mon Sep 17 00:00:00 2001
From: AthosOfAthos <athos@usinternet.com>
Date: Tue, 30 Jul 2024 03:58:38 -0500
Subject: [PATCH 118/226] =?UTF-8?q?Reduced=20verbosity=20for=20'Device::ma?=
 =?UTF-8?q?intain:=20waiting=20for=20submission=20index=E2=80=A6=20(#6044)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 wgpu-core/src/device/resource.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 09e609e48e..045eccfbc2 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -443,7 +443,7 @@ impl<A: HalApi> Device<A> {
                     .map_err(DeviceError::from)?
             };
         }
-        log::info!("Device::maintain: waiting for submission index {submission_index}");
+        log::trace!("Device::maintain: waiting for submission index {submission_index}");
 
         let mut life_tracker = self.lock_life();
         let submission_closures =

From 7ff80d65fcf099afa4b87a953487805bfdd058e5 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Tue, 30 Jul 2024 14:59:32 -0700
Subject: [PATCH 119/226] [naga] Use cfg aliases to enable
 `naga::back::continue_forward`.

Rather than `feature = "blah"`, use the new `cfg` identifiers
introduced by the `cfg_aliases` invocation in `naga/build.rs` to
decide whether to compile the `naga::back::continue_forward` module,
which is only used by the GLSL and HLSL backends.

The `hlsl_out` `cfg` identifer has a more complex condition than just
`feature = "hlsl-out"`, introduced by #5919.

Fixes #6063.
---
 naga/src/back/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs
index 43d88a437d..352adc37ec 100644
--- a/naga/src/back/mod.rs
+++ b/naga/src/back/mod.rs
@@ -19,7 +19,7 @@ pub mod wgsl;
 #[cfg(any(hlsl_out, msl_out, spv_out, glsl_out))]
 pub mod pipeline_constants;
 
-#[cfg(any(feature = "hlsl-out", feature = "glsl-out"))]
+#[cfg(any(hlsl_out, glsl_out))]
 mod continue_forward;
 
 /// Names of vector components.

From 9c6ae1beae2490ce44d99034f7f1faada936f3d8 Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Wed, 31 Jul 2024 18:24:23 +0200
Subject: [PATCH 120/226] Log spring cleaning (#6065)

Remove unused logs in wgpu-core and wgpu-hal
---
 CHANGELOG.md                     |  4 ++++
 wgpu-core/src/command/bind.rs    |  2 --
 wgpu-core/src/command/compute.rs |  2 --
 wgpu-core/src/command/mod.rs     |  2 --
 wgpu-core/src/command/render.rs  |  3 ---
 wgpu-core/src/device/life.rs     | 12 +---------
 wgpu-core/src/device/queue.rs    |  3 ---
 wgpu-core/src/device/resource.rs |  8 -------
 wgpu-core/src/present.rs         | 12 ----------
 wgpu-core/src/resource.rs        |  1 -
 wgpu-core/src/storage.rs         |  3 ---
 wgpu-core/src/track/buffer.rs    |  8 +------
 wgpu-core/src/track/texture.rs   | 38 --------------------------------
 wgpu-hal/src/dx12/command.rs     | 26 ----------------------
 wgpu-hal/src/dx12/device.rs      | 35 -----------------------------
 15 files changed, 6 insertions(+), 153 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81beb96854..a267e6565b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -58,6 +58,10 @@ Bottom level categories:
 - Fix crash when dropping the surface after the device. By @wumpf in [#6052](https://github.com/gfx-rs/wgpu/pull/6052)
 - Fix error message that is thrown in create_render_pass to no longer say `compute_pass`. By @matthew-wong1 [#6041](https://github.com/gfx-rs/wgpu/pull/6041)
 
+### Changes
+
+- Reduce the amount of debug and trace logs emitted by wgpu-core and wgpu-hal. By @nical in [#6065](https://github.com/gfx-rs/wgpu/issues/6065)
+
 ### Dependency Updates
 
 #### GLES
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 5e3f249301..7e3d9ce9cd 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -383,8 +383,6 @@ impl<A: HalApi> Binder<A> {
         bind_group: &Arc<BindGroup<A>>,
         offsets: &[wgt::DynamicOffset],
     ) -> &'a [EntryPayload<A>] {
-        log::trace!("\tBinding [{}] = group {}", index, bind_group.error_ident());
-
         let payload = &mut self.payloads[index];
         payload.group = Some(bind_group.clone());
         payload.dynamic_offsets.clear();
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index c92b08e72f..1d751531ac 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -269,8 +269,6 @@ impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi>
                 .set_and_remove_from_usage_scope_sparse(&mut self.scope.buffers, indirect_buffer);
         }
 
-        log::trace!("Encoding dispatch barriers");
-
         CommandBuffer::drain_barriers(
             self.raw_encoder,
             &mut self.intermediate_trackers,
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index e73a5bc0b0..7290330daf 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -506,7 +506,6 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands<A> {
-        log::trace!("Extracting BakedCommands from {}", self.error_ident());
         let data = self.data.lock().take().unwrap();
         BakedCommands {
             encoder: data.encoder.raw,
@@ -626,7 +625,6 @@ impl Global {
                             cmd_buf_data.status = CommandEncoderStatus::Finished;
                             //Note: if we want to stop tracking the swapchain texture view,
                             // this is the place to do it.
-                            log::trace!("Command buffer {:?}", encoder_id);
                             None
                         }
                     }
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 130c04704c..f42bc02358 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1600,8 +1600,6 @@ impl Global {
             *status = CommandEncoderStatus::Error;
             encoder.open_pass(hal_label).map_pass_err(pass_scope)?;
 
-            log::trace!("Encoding render pass begin in {}", cmd_buf.error_ident());
-
             let info = RenderPassInfo::start(
                 device,
                 hal_label,
@@ -1907,7 +1905,6 @@ impl Global {
                 }
             }
 
-            log::trace!("Merging renderpass into {}", cmd_buf.error_ident());
             let (trackers, pending_discard_init_fixups) = state
                 .info
                 .finish(state.raw_encoder, state.snatch_guard)
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 3696d8abe4..b282775ac0 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -5,7 +5,7 @@ use crate::{
     },
     hal_api::HalApi,
     id,
-    resource::{self, Buffer, Labeled, Texture, Trackable},
+    resource::{self, Buffer, Texture, Trackable},
     snatch::SnatchGuard,
     SubmissionIndex,
 };
@@ -283,7 +283,6 @@ impl<A: HalApi> LifetimeTracker<A> {
 
         let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect();
         for a in self.active.drain(..done_count) {
-            log::debug!("Active submission {} is done", a.index);
             self.ready_to_map.extend(a.mapped);
             for encoder in a.encoders {
                 let raw = unsafe { encoder.land() };
@@ -339,12 +338,6 @@ impl<A: HalApi> LifetimeTracker<A> {
                 .rev()
                 .find(|a| a.contains_buffer(&buffer));
 
-            log::trace!(
-                "Mapping of {} at submission {:?}",
-                buffer.error_ident(),
-                submission.as_deref().map(|s| s.index)
-            );
-
             submission
                 .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
                 .push(buffer);
@@ -369,8 +362,6 @@ impl<A: HalApi> LifetimeTracker<A> {
             Vec::with_capacity(self.ready_to_map.len());
 
         for buffer in self.ready_to_map.drain(..) {
-            let tracker_index = buffer.tracker_index();
-
             // This _cannot_ be inlined into the match. If it is, the lock will be held
             // open through the whole match, resulting in a deadlock when we try to re-lock
             // the buffer back to active.
@@ -391,7 +382,6 @@ impl<A: HalApi> LifetimeTracker<A> {
                 _ => panic!("No pending mapping."),
             };
             let status = if pending_mapping.range.start != pending_mapping.range.end {
-                log::debug!("Buffer {tracker_index:?} map state -> Active");
                 let host = pending_mapping.op.host;
                 let size = pending_mapping.range.end - pending_mapping.range.start;
                 match super::map_buffer(
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 625395fdc1..81c9729521 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1187,7 +1187,6 @@ impl Global {
                                 ))
                                 .map_err(DeviceError::from)?
                         };
-                        log::trace!("Stitching command buffer {:?} before submission", cmb_id);
 
                         //Note: locking the trackers has to be done after the storages
                         let mut trackers = device.trackers.lock();
@@ -1241,8 +1240,6 @@ impl Global {
                             pending_textures: FastHashMap::default(),
                         });
                     }
-
-                    log::trace!("Device after submission {}", submit_index);
                 }
             }
 
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 045eccfbc2..6bafd6844c 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1230,12 +1230,6 @@ impl<A: HalApi> Device<A> {
             texture.hal_usage & mask_copy & mask_dimension & mask_mip_level
         };
 
-        log::debug!(
-            "Create view for {} filters usages to {:?}",
-            texture.error_ident(),
-            usage
-        );
-
         // use the combined depth-stencil format for the view
         let format = if resolved_format.is_depth_stencil_component(texture.desc.format) {
             texture.desc.format
@@ -2796,7 +2790,6 @@ impl<A: HalApi> Device<A> {
                     .iter()
                     .any(|ct| ct.write_mask != first.write_mask || ct.blend != first.blend)
             } {
-                log::debug!("Color targets: {:?}", color_targets);
                 self.require_downlevel_flags(wgt::DownlevelFlags::INDEPENDENT_BLEND)?;
             }
         }
@@ -3483,7 +3476,6 @@ impl<A: HalApi> Device<A> {
         let fence = guard.as_ref().unwrap();
         let last_done_index = unsafe { self.raw.as_ref().unwrap().get_fence_value(fence)? };
         if last_done_index < submission_index {
-            log::info!("Waiting for submission {:?}", submission_index);
             unsafe {
                 self.raw
                     .as_ref()
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 7a2200eae1..e22a772680 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -230,7 +230,6 @@ impl Global {
                     .insert_single(&texture, hal::TextureUses::UNINITIALIZED);
 
                 let id = fid.assign(texture);
-                log::debug!("Created CURRENT Surface Texture {:?}", id);
 
                 if present.acquired_texture.is_some() {
                     return Err(SurfaceError::AlreadyAcquired);
@@ -301,10 +300,6 @@ impl Global {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
-            log::debug!(
-                "Removing swapchain texture {:?} from the device tracker",
-                texture_id
-            );
             let texture = hub.textures.unregister(texture_id);
             if let Some(texture) = texture {
                 device
@@ -336,8 +331,6 @@ impl Global {
             }
         };
 
-        log::debug!("Presented. End of Frame");
-
         match result {
             Ok(()) => Ok(Status::Good),
             Err(err) => match err {
@@ -387,11 +380,6 @@ impl Global {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
-            log::debug!(
-                "Removing swapchain texture {:?} from the device tracker",
-                texture_id
-            );
-
             let texture = hub.textures.unregister(texture_id);
 
             if let Some(texture) = texture {
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index c2138808ff..c5871ea3ad 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -617,7 +617,6 @@ impl<A: HalApi> Buffer<A> {
         let device = &self.device;
         let snatch_guard = device.snatchable_lock.read();
         let raw_buf = self.try_raw(&snatch_guard)?;
-        log::debug!("{} map state -> Idle", self.error_ident());
         match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
             BufferMapState::Init { staging_buffer } => {
                 #[cfg(feature = "trace")]
diff --git a/wgpu-core/src/storage.rs b/wgpu-core/src/storage.rs
index f2875b3542..fda9cbd036 100644
--- a/wgpu-core/src/storage.rs
+++ b/wgpu-core/src/storage.rs
@@ -119,13 +119,11 @@ where
     }
 
     pub(crate) fn insert(&mut self, id: Id<T::Marker>, value: Arc<T>) {
-        log::trace!("User is inserting {}{:?}", T::TYPE, id);
         let (index, epoch, _backend) = id.unzip();
         self.insert_impl(index as usize, epoch, Element::Occupied(value, epoch))
     }
 
     pub(crate) fn insert_error(&mut self, id: Id<T::Marker>) {
-        log::trace!("User is inserting as error {}{:?}", T::TYPE, id);
         let (index, epoch, _) = id.unzip();
         self.insert_impl(index as usize, epoch, Element::Error(epoch))
     }
@@ -143,7 +141,6 @@ where
     }
 
     pub(crate) fn remove(&mut self, id: Id<T::Marker>) -> Option<Arc<T>> {
-        log::trace!("User is removing {}{:?}", T::TYPE, id);
         let (index, epoch, _) = id.unzip();
         match std::mem::replace(&mut self.map[index as usize], Element::Vacant) {
             Element::Occupied(value, storage_epoch) => {
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index ed95f9ce8a..4920cc4cc5 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -735,8 +735,6 @@ unsafe fn insert<T: Clone>(
     strict_assert_eq!(invalid_resource_state(new_start_state), false);
     strict_assert_eq!(invalid_resource_state(new_end_state), false);
 
-    log::trace!("\tbuf {index}: insert {new_start_state:?}..{new_end_state:?}");
-
     unsafe {
         if let Some(&mut ref mut start_state) = start_states {
             *start_state.get_unchecked_mut(index) = new_start_state;
@@ -751,7 +749,7 @@ unsafe fn insert<T: Clone>(
 #[inline(always)]
 unsafe fn merge<A: HalApi>(
     current_states: &mut [BufferUses],
-    index32: u32,
+    _index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
     metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
@@ -769,8 +767,6 @@ unsafe fn merge<A: HalApi>(
         ));
     }
 
-    log::trace!("\tbuf {index32}: merge {current_state:?} + {new_state:?}");
-
     *current_state = merged_state;
 
     Ok(())
@@ -795,8 +791,6 @@ unsafe fn barrier(
         selector: (),
         usage: current_state..new_state,
     });
-
-    log::trace!("\tbuf {index}: transition {current_state:?} -> {new_state:?}");
 }
 
 #[inline(always)]
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index 73321687cb..592c9a6c82 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -1124,8 +1124,6 @@ unsafe fn insert<T: Clone>(
             // check that resource states don't have any conflicts.
             strict_assert_eq!(invalid_resource_state(state), false);
 
-            log::trace!("\ttex {index}: insert start {state:?}");
-
             if let Some(start_state) = start_state {
                 unsafe { *start_state.simple.get_unchecked_mut(index) = state };
             }
@@ -1141,8 +1139,6 @@ unsafe fn insert<T: Clone>(
             let complex =
                 unsafe { ComplexTextureState::from_selector_state_iter(full_range, state_iter) };
 
-            log::trace!("\ttex {index}: insert start {complex:?}");
-
             if let Some(start_state) = start_state {
                 unsafe { *start_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX };
                 start_state.complex.insert(index, complex.clone());
@@ -1163,8 +1159,6 @@ unsafe fn insert<T: Clone>(
                 // check that resource states don't have any conflicts.
                 strict_assert_eq!(invalid_resource_state(state), false);
 
-                log::trace!("\ttex {index}: insert end {state:?}");
-
                 // We only need to insert into the end, as there is guaranteed to be
                 // a start state provider.
                 unsafe { *end_state.simple.get_unchecked_mut(index) = state };
@@ -1176,8 +1170,6 @@ unsafe fn insert<T: Clone>(
                     ComplexTextureState::from_selector_state_iter(full_range, state_iter)
                 };
 
-                log::trace!("\ttex {index}: insert end {complex:?}");
-
                 // We only need to insert into the end, as there is guaranteed to be
                 // a start state provider.
                 unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX };
@@ -1215,8 +1207,6 @@ unsafe fn merge<A: HalApi>(
         (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => {
             let merged_state = *current_simple | new_simple;
 
-            log::trace!("\ttex {index}: merge simple {current_simple:?} + {new_simple:?}");
-
             if invalid_resource_state(merged_state) {
                 return Err(ResourceUsageCompatibilityError::from_texture(
                     unsafe { metadata_provider.get(index) },
@@ -1242,8 +1232,6 @@ unsafe fn merge<A: HalApi>(
             for (selector, new_state) in new_many {
                 let merged_state = *current_simple | new_state;
 
-                log::trace!("\ttex {index}: merge {selector:?} {current_simple:?} + {new_state:?}");
-
                 if invalid_resource_state(merged_state) {
                     return Err(ResourceUsageCompatibilityError::from_texture(
                         unsafe { metadata_provider.get(index) },
@@ -1280,11 +1268,6 @@ unsafe fn merge<A: HalApi>(
                     // simple states are never unknown.
                     let merged_state = merged_state - TextureUses::UNKNOWN;
 
-                    log::trace!(
-                        "\ttex {index}: merge mip {mip_id} layers {layers:?} \
-                         {current_layer_state:?} + {new_simple:?}"
-                    );
-
                     if invalid_resource_state(merged_state) {
                         return Err(ResourceUsageCompatibilityError::from_texture(
                             unsafe { metadata_provider.get(index) },
@@ -1321,11 +1304,6 @@ unsafe fn merge<A: HalApi>(
                             continue;
                         }
 
-                        log::trace!(
-                            "\ttex {index}: merge mip {mip_id} layers {layers:?} \
-                             {current_layer_state:?} + {new_state:?}"
-                        );
-
                         if invalid_resource_state(merged_state) {
                             return Err(ResourceUsageCompatibilityError::from_texture(
                                 unsafe { metadata_provider.get(index) },
@@ -1373,8 +1351,6 @@ unsafe fn barrier(
                 return;
             }
 
-            log::trace!("\ttex {index}: transition simple {current_simple:?} -> {new_simple:?}");
-
             barriers.push(PendingTransition {
                 id: index as _,
                 selector: texture_selector.clone(),
@@ -1391,10 +1367,6 @@ unsafe fn barrier(
                     continue;
                 }
 
-                log::trace!(
-                    "\ttex {index}: transition {selector:?} {current_simple:?} -> {new_state:?}"
-                );
-
                 barriers.push(PendingTransition {
                     id: index as _,
                     selector,
@@ -1415,11 +1387,6 @@ unsafe fn barrier(
                         continue;
                     }
 
-                    log::trace!(
-                        "\ttex {index}: transition mip {mip_id} layers {layers:?} \
-                         {current_layer_state:?} -> {new_simple:?}"
-                    );
-
                     barriers.push(PendingTransition {
                         id: index as _,
                         selector: TextureSelector {
@@ -1449,11 +1416,6 @@ unsafe fn barrier(
                             continue;
                         }
 
-                        log::trace!(
-                            "\ttex {index}: transition mip {mip_id} layers {layers:?} \
-                            {current_layer_state:?} -> {new_state:?}"
-                        );
-
                         barriers.push(PendingTransition {
                             id: index as _,
                             selector: TextureSelector {
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index fbaa956dfb..5e05a3bcf5 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -220,7 +220,6 @@ impl super::CommandEncoder {
     }
 
     fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) {
-        log::trace!("Reset signature {:?}", layout.signature);
         if let Some(root_index) = layout.special_constants_root_index {
             self.pass.root_elements[root_index as usize] =
                 super::RootElement::SpecialConstantBuffer {
@@ -315,17 +314,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     {
         self.temp.barriers.clear();
 
-        log::trace!(
-            "List {:p} buffer transitions",
-            self.list.as_ref().unwrap().as_ptr()
-        );
         for barrier in barriers {
-            log::trace!(
-                "\t{:p}: usage {:?}..{:?}",
-                barrier.buffer.resource.as_ptr(),
-                barrier.usage.start,
-                barrier.usage.end
-            );
             let s0 = conv::map_buffer_usage_to_state(barrier.usage.start);
             let s1 = conv::map_buffer_usage_to_state(barrier.usage.end);
             if s0 != s1 {
@@ -374,18 +363,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     {
         self.temp.barriers.clear();
 
-        log::trace!(
-            "List {:p} texture transitions",
-            self.list.as_ref().unwrap().as_ptr()
-        );
         for barrier in barriers {
-            log::trace!(
-                "\t{:p}: usage {:?}..{:?}, range {:?}",
-                barrier.texture.resource.as_ptr(),
-                barrier.usage.start,
-                barrier.usage.end,
-                barrier.range
-            );
             let s0 = conv::map_texture_usage_to_state(barrier.usage.start);
             let s1 = conv::map_texture_usage_to_state(barrier.usage.end);
             if s0 != s1 {
@@ -879,13 +857,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
         group: &super::BindGroup,
         dynamic_offsets: &[wgt::DynamicOffset],
     ) {
-        log::trace!("Set group[{}]", index);
         let info = &layout.bind_group_infos[index as usize];
         let mut root_index = info.base_root_index as usize;
 
         // Bind CBV/SRC/UAV descriptor tables
         if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
-            log::trace!("\tBind element[{}] = view", root_index);
             self.pass.root_elements[root_index] =
                 super::RootElement::Table(group.handle_views.unwrap().gpu);
             root_index += 1;
@@ -893,7 +869,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
         // Bind Sampler descriptor tables.
         if info.tables.contains(super::TableTypes::SAMPLERS) {
-            log::trace!("\tBind element[{}] = sampler", root_index);
             self.pass.root_elements[root_index] =
                 super::RootElement::Table(group.handle_samplers.unwrap().gpu);
             root_index += 1;
@@ -906,7 +881,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
             .zip(group.dynamic_buffers.iter())
             .zip(dynamic_offsets)
         {
-            log::trace!("\tBind element[{}] = dynamic", root_index);
             self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer {
                 kind,
                 address: gpu_base + offset as d3d12::GpuAddress,
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index fa3e828fba..8012086a90 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -196,7 +196,6 @@ impl super::Device {
         }
 
         let value = cur_value + 1;
-        log::debug!("Waiting for idle with value {}", value);
         self.present_queue.signal(&self.idler.fence, value);
         let hr = self
             .idler
@@ -817,11 +816,6 @@ impl crate::Device for super::Device {
             }
         }
 
-        log::debug!(
-            "Creating Root Signature '{}'",
-            desc.label.unwrap_or_default()
-        );
-
         let mut binding_map = hlsl::BindingMap::default();
         let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = (
             hlsl::BindTarget::default(),
@@ -844,11 +838,6 @@ impl crate::Device for super::Device {
         if pc_start != u32::MAX && pc_end != u32::MIN {
             let parameter_index = parameters.len();
             let size = (pc_end - pc_start) / 4;
-            log::debug!(
-                "\tParam[{}] = push constant (count = {})",
-                parameter_index,
-                size,
-            );
             parameters.push(d3d12::RootParameter::constants(
                 d3d12::ShaderVisibility::All,
                 native_binding(&bind_cbv),
@@ -942,12 +931,6 @@ impl crate::Device for super::Device {
                 bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
             }
             if ranges.len() > range_base {
-                log::debug!(
-                    "\tParam[{}] = views (vis = {:?}, count = {})",
-                    parameters.len(),
-                    visibility_view_static,
-                    ranges.len() - range_base,
-                );
                 parameters.push(d3d12::RootParameter::descriptor_table(
                     conv::map_visibility(visibility_view_static),
                     &ranges[range_base..],
@@ -981,12 +964,6 @@ impl crate::Device for super::Device {
                 bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
             }
             if ranges.len() > range_base {
-                log::debug!(
-                    "\tParam[{}] = samplers (vis = {:?}, count = {})",
-                    parameters.len(),
-                    visibility_sampler,
-                    ranges.len() - range_base,
-                );
                 parameters.push(d3d12::RootParameter::descriptor_table(
                     conv::map_visibility(visibility_sampler),
                     &ranges[range_base..],
@@ -1036,12 +1013,6 @@ impl crate::Device for super::Device {
                 );
                 info.dynamic_buffers.push(kind);
 
-                log::debug!(
-                    "\tParam[{}] = dynamic {:?} (vis = {:?})",
-                    parameters.len(),
-                    buffer_ty,
-                    dynamic_buffers_visibility,
-                );
                 parameters.push(d3d12::RootParameter::descriptor(
                     parameter_ty,
                     dynamic_buffers_visibility,
@@ -1062,7 +1033,6 @@ impl crate::Device for super::Device {
                 | crate::PipelineLayoutFlags::NUM_WORK_GROUPS,
         ) {
             let parameter_index = parameters.len();
-            log::debug!("\tParam[{}] = special", parameter_index);
             parameters.push(d3d12::RootParameter::constants(
                 d3d12::ShaderVisibility::All, // really needed for VS and CS only
                 native_binding(&bind_cbv),
@@ -1075,9 +1045,6 @@ impl crate::Device for super::Device {
             (None, None)
         };
 
-        log::trace!("{:#?}", parameters);
-        log::trace!("Bindings {:#?}", binding_map);
-
         let (blob, error) = self
             .library
             .serialize_root_signature(
@@ -1105,8 +1072,6 @@ impl crate::Device for super::Device {
             .create_root_signature(blob, 0)
             .into_device_result("Root signature creation")?;
 
-        log::debug!("\traw = {:?}", raw);
-
         if let Some(label) = desc.label {
             let cwstr = conv::map_label(label);
             unsafe { raw.SetName(cwstr.as_ptr()) };

From a4e7a293d7d4ddef86b485f36d0b9f90786275bc Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 13:07:56 +0200
Subject: [PATCH 121/226] [tests] remove `Arc` around device field of
 `TestingContext`

---
 tests/src/run.rs       | 6 +++---
 tests/tests/encoder.rs | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/src/run.rs b/tests/src/run.rs
index 82c1d34e69..303c4c24af 100644
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@@ -1,4 +1,4 @@
-use std::{panic::AssertUnwindSafe, sync::Arc};
+use std::panic::AssertUnwindSafe;
 
 use futures_lite::FutureExt;
 use wgpu::{Adapter, Device, Instance, Queue};
@@ -18,7 +18,7 @@ pub struct TestingContext {
     pub adapter: Adapter,
     pub adapter_info: wgpu::AdapterInfo,
     pub adapter_downlevel_capabilities: wgpu::DownlevelCapabilities,
-    pub device: Arc<Device>,
+    pub device: Device,
     pub device_features: wgpu::Features,
     pub device_limits: wgpu::Limits,
     pub queue: Queue,
@@ -73,7 +73,7 @@ pub async fn execute_test(
         adapter,
         adapter_info,
         adapter_downlevel_capabilities,
-        device: Arc::new(device),
+        device,
         device_features: config.params.required_features,
         device_limits: config.params.required_limits.clone(),
         queue,
diff --git a/tests/tests/encoder.rs b/tests/tests/encoder.rs
index 337dffc2d0..e4755dcd74 100644
--- a/tests/tests/encoder.rs
+++ b/tests/tests/encoder.rs
@@ -19,8 +19,8 @@ static DROP_QUEUE_BEFORE_CREATING_COMMAND_ENCODER: GpuTestConfiguration =
         .run_sync(|ctx| {
             // Use the device after the queue is dropped. Currently this panics
             // but it probably shouldn't
-            let device = ctx.device.clone();
-            drop(ctx);
+            let TestingContext { device, queue, .. } = ctx;
+            drop(queue);
             let _encoder =
                 device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
         });

From f19217479dec659f7cbf429460c054b1cc0bebd4 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 17:52:37 +0200
Subject: [PATCH 122/226] remove `Tracker.add_from_render_bundle`

The render bundle resources are already kept alive by the render bundle itself, there is no need to add them.
---
 wgpu-core/src/command/render.rs  |  5 +----
 wgpu-core/src/track/mod.rs       | 21 ---------------------
 wgpu-core/src/track/stateless.rs | 24 ------------------------
 3 files changed, 1 insertion(+), 49 deletions(-)

diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index f42bc02358..251daa7d15 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -2716,9 +2716,7 @@ fn execute_bundle<A: HalApi>(
 ) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::execute_bundle {}", bundle.error_ident());
 
-    // Have to clone the bundle arc, otherwise we keep a mutable reference to the bundle
-    // while later trying to add the bundle's resources to the tracker.
-    let bundle = state.tracker.bundles.insert_single(bundle).clone();
+    let bundle = state.tracker.bundles.insert_single(bundle);
 
     bundle.same_device_as(cmd_buf.as_ref())?;
 
@@ -2769,7 +2767,6 @@ fn execute_bundle<A: HalApi>(
 
     unsafe {
         state.info.usage_scope.merge_render_bundle(&bundle.used)?;
-        state.tracker.add_from_render_bundle(&bundle.used)?;
     };
     state.reset_bundle();
     Ok(())
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index be3534cdfb..5fcebb5784 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -681,25 +681,4 @@ impl<A: HalApi> Tracker<A> {
                 .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.textures)
         };
     }
-
-    /// Tracks the stateless resources from the given renderbundle. It is expected
-    /// that the stateful resources will get merged into a usage scope first.
-    ///
-    /// # Safety
-    ///
-    /// The maximum ID given by each bind group resource must be less than the
-    /// value given to `set_size`
-    pub unsafe fn add_from_render_bundle(
-        &mut self,
-        render_bundle: &RenderBundleScope<A>,
-    ) -> Result<(), ResourceUsageCompatibilityError> {
-        self.bind_groups
-            .add_from_tracker(&*render_bundle.bind_groups.read());
-        self.render_pipelines
-            .add_from_tracker(&*render_bundle.render_pipelines.read());
-        self.query_sets
-            .add_from_tracker(&*render_bundle.query_sets.read());
-
-        Ok(())
-    }
 }
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 91cdc0fa36..b2de45363e 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -90,28 +90,4 @@ impl<T: Trackable> StatelessTracker<T> {
 
         unsafe { self.metadata.insert(index, resource) }
     }
-
-    /// Adds the given resources from the given tracker.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    pub fn add_from_tracker(&mut self, other: &Self) {
-        let incoming_size = other.metadata.size();
-        if incoming_size > self.metadata.size() {
-            self.set_size(incoming_size);
-        }
-
-        for index in other.metadata.owned_indices() {
-            self.tracker_assert_in_bounds(index);
-            other.tracker_assert_in_bounds(index);
-            unsafe {
-                let previously_owned = self.metadata.contains_unchecked(index);
-
-                if !previously_owned {
-                    let other_resource = other.metadata.get_resource_unchecked(index);
-                    self.metadata.insert(index, other_resource.clone());
-                }
-            }
-        }
-    }
 }

From 14170fd963c66f07ed136fb8374f0368cc9163a9 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 18:26:58 +0200
Subject: [PATCH 123/226] remove unused `RenderBundleScope.query_sets`

---
 wgpu-core/src/command/bundle.rs | 5 -----
 wgpu-core/src/lock/rank.rs      | 1 -
 wgpu-core/src/track/mod.rs      | 5 -----
 3 files changed, 11 deletions(-)

diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 20ff40efef..6cf6920255 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -390,11 +390,6 @@ impl RenderBundleEncoder {
             .render_pipelines
             .write()
             .set_size(indices.render_pipelines.size());
-        state
-            .trackers
-            .query_sets
-            .write()
-            .set_size(indices.query_sets.size());
 
         let base = &self.base;
 
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index f960b3c028..f109fe1a88 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -140,7 +140,6 @@ define_lock_ranks! {
     rank RENDER_BUNDLE_SCOPE_TEXTURES "RenderBundleScope::textures" followed by { }
     rank RENDER_BUNDLE_SCOPE_BIND_GROUPS "RenderBundleScope::bind_groups" followed by { }
     rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { }
-    rank RENDER_BUNDLE_SCOPE_QUERY_SETS "RenderBundleScope::query_sets" followed by { }
     rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
     rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
     rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { }
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 5fcebb5784..5f6fb1a188 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -459,7 +459,6 @@ pub(crate) struct RenderBundleScope<A: HalApi> {
     // Don't need to track views and samplers, they are never used directly, only by bind groups.
     pub bind_groups: RwLock<StatelessTracker<binding_model::BindGroup<A>>>,
     pub render_pipelines: RwLock<StatelessTracker<pipeline::RenderPipeline<A>>>,
-    pub query_sets: RwLock<StatelessTracker<resource::QuerySet<A>>>,
 }
 
 impl<A: HalApi> RenderBundleScope<A> {
@@ -482,10 +481,6 @@ impl<A: HalApi> RenderBundleScope<A> {
                 rank::RENDER_BUNDLE_SCOPE_RENDER_PIPELINES,
                 StatelessTracker::new(),
             ),
-            query_sets: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_QUERY_SETS,
-                StatelessTracker::new(),
-            ),
         }
     }
 

From 3a5ad193db8eb344cfa8e1e5cc58c0cf446c3903 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 18:42:10 +0200
Subject: [PATCH 124/226] remove all internal `BindGroupState` `Mutex`es

---
 wgpu-core/src/device/resource.rs |  7 ++++---
 wgpu-core/src/lock/rank.rs       |  9 ---------
 wgpu-core/src/track/buffer.rs    | 26 ++++++++++----------------
 wgpu-core/src/track/stateless.rs | 21 ++++++++-------------
 wgpu-core/src/track/texture.rs   | 22 +++++++++-------------
 5 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 6bafd6844c..581c520094 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1998,7 +1998,7 @@ impl<A: HalApi> Device<A> {
 
     fn create_sampler_binding<'a>(
         self: &Arc<Self>,
-        used: &BindGroupStates<A>,
+        used: &mut BindGroupStates<A>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
         sampler: &'a Arc<Sampler<A>>,
@@ -2177,7 +2177,7 @@ impl<A: HalApi> Device<A> {
                     (res_index, num_bindings)
                 }
                 Br::Sampler(ref sampler) => {
-                    let sampler = self.create_sampler_binding(&used, binding, decl, sampler)?;
+                    let sampler = self.create_sampler_binding(&mut used, binding, decl, sampler)?;
 
                     let res_index = hal_samplers.len();
                     hal_samplers.push(sampler);
@@ -2189,7 +2189,8 @@ impl<A: HalApi> Device<A> {
 
                     let res_index = hal_samplers.len();
                     for sampler in samplers.iter() {
-                        let sampler = self.create_sampler_binding(&used, binding, decl, sampler)?;
+                        let sampler =
+                            self.create_sampler_binding(&mut used, binding, decl, sampler)?;
 
                         hal_samplers.push(sampler);
                     }
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index f109fe1a88..b4b5e27489 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -91,18 +91,12 @@ define_lock_ranks! {
         DEVICE_SNATCHABLE_LOCK,
         DEVICE_USAGE_SCOPES,
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
-        BUFFER_BIND_GROUP_STATE_BUFFERS,
-        TEXTURE_BIND_GROUP_STATE_TEXTURES,
         BUFFER_MAP_STATE,
-        STATELESS_BIND_GROUP_STATE_RESOURCES,
     }
     rank DEVICE_SNATCHABLE_LOCK "Device::snatchable_lock" followed by {
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
         DEVICE_TRACE,
         BUFFER_MAP_STATE,
-        BUFFER_BIND_GROUP_STATE_BUFFERS,
-        TEXTURE_BIND_GROUP_STATE_TEXTURES,
-        STATELESS_BIND_GROUP_STATE_RESOURCES,
         // Uncomment this to see an interesting cycle.
         // COMMAND_BUFFER_DATA,
     }
@@ -125,7 +119,6 @@ define_lock_ranks! {
     }
 
     rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { }
-    rank BUFFER_BIND_GROUP_STATE_BUFFERS "BufferBindGroupState::buffers" followed by { }
     rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { }
     rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { }
     rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { }
@@ -142,10 +135,8 @@ define_lock_ranks! {
     rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { }
     rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
     rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
-    rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { }
     rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
     rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
-    rank TEXTURE_BIND_GROUP_STATE_TEXTURES "TextureBindGroupState::textures" followed by { }
     rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
     rank TEXTURE_CLEAR_MODE "Texture::clear_mode" followed by { }
     rank TEXTURE_VIEWS "Texture::views" followed by { }
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index 4920cc4cc5..b0bcdca03e 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -9,7 +9,6 @@ use std::sync::{Arc, Weak};
 use super::{PendingTransition, TrackerIndex};
 use crate::{
     hal_api::HalApi,
-    lock::{rank, Mutex},
     resource::{Buffer, Trackable},
     snatch::SnatchGuard,
     track::{
@@ -41,12 +40,12 @@ impl ResourceUses for BufferUses {
 /// Stores all the buffers that a bind group stores.
 #[derive(Debug)]
 pub(crate) struct BufferBindGroupState<A: HalApi> {
-    buffers: Mutex<Vec<(Arc<Buffer<A>>, BufferUses)>>,
+    buffers: Vec<(Arc<Buffer<A>>, BufferUses)>,
 }
 impl<A: HalApi> BufferBindGroupState<A> {
     pub fn new() -> Self {
         Self {
-            buffers: Mutex::new(rank::BUFFER_BIND_GROUP_STATE_BUFFERS, Vec::new()),
+            buffers: Vec::new(),
         }
     }
 
@@ -54,27 +53,23 @@ impl<A: HalApi> BufferBindGroupState<A> {
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    #[allow(clippy::pattern_type_mismatch)]
-    pub(crate) fn optimize(&self) {
-        let mut buffers = self.buffers.lock();
-        buffers.sort_unstable_by_key(|(b, _)| b.tracker_index());
+    pub(crate) fn optimize(&mut self) {
+        self.buffers
+            .sort_unstable_by_key(|(b, _)| b.tracker_index());
     }
 
     /// Returns a list of all buffers tracked. May contain duplicates.
-    #[allow(clippy::pattern_type_mismatch)]
     pub fn used_tracker_indices(&self) -> impl Iterator<Item = TrackerIndex> + '_ {
-        let buffers = self.buffers.lock();
-        buffers
+        self.buffers
             .iter()
-            .map(|(ref b, _)| b.tracker_index())
+            .map(|(b, _)| b.tracker_index())
             .collect::<Vec<_>>()
             .into_iter()
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(&self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
-        let mut buffers = self.buffers.lock();
-        buffers.push((buffer.clone(), state));
+    pub fn add_single(&mut self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
+        self.buffers.push((buffer.clone(), state));
     }
 }
 
@@ -136,8 +131,7 @@ impl<A: HalApi> BufferUsageScope<A> {
         &mut self,
         bind_group: &BufferBindGroupState<A>,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let buffers = bind_group.buffers.lock();
-        for &(ref resource, state) in &*buffers {
+        for &(ref resource, state) in bind_group.buffers.iter() {
             let index = resource.tracker_index().as_usize();
 
             unsafe {
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index b2de45363e..3899e45ac6 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -5,22 +5,18 @@
 
 use std::sync::Arc;
 
-use crate::{
-    lock::{rank, Mutex},
-    resource::Trackable,
-    track::ResourceMetadata,
-};
+use crate::{resource::Trackable, track::ResourceMetadata};
 
 /// Stores all the resources that a bind group stores.
 #[derive(Debug)]
 pub(crate) struct StatelessBindGroupState<T: Trackable> {
-    resources: Mutex<Vec<Arc<T>>>,
+    resources: Vec<Arc<T>>,
 }
 
 impl<T: Trackable> StatelessBindGroupState<T> {
     pub fn new() -> Self {
         Self {
-            resources: Mutex::new(rank::STATELESS_BIND_GROUP_STATE_RESOURCES, Vec::new()),
+            resources: Vec::new(),
         }
     }
 
@@ -28,15 +24,14 @@ impl<T: Trackable> StatelessBindGroupState<T> {
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&self) {
-        let mut resources = self.resources.lock();
-        resources.sort_unstable_by_key(|resource| resource.tracker_index());
+    pub(crate) fn optimize(&mut self) {
+        self.resources
+            .sort_unstable_by_key(|resource| resource.tracker_index());
     }
 
     /// Adds the given resource.
-    pub fn add_single(&self, resource: &Arc<T>) {
-        let mut resources = self.resources.lock();
-        resources.push(resource.clone());
+    pub fn add_single(&mut self, resource: &Arc<T>) {
+        self.resources.push(resource.clone());
     }
 }
 
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index 592c9a6c82..77361a10f0 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -21,7 +21,6 @@
 use super::{range::RangedStates, PendingTransition, PendingTransitionList, TrackerIndex};
 use crate::{
     hal_api::HalApi,
-    lock::{rank, Mutex},
     resource::{Texture, TextureInner, Trackable},
     snatch::SnatchGuard,
     track::{
@@ -161,12 +160,12 @@ struct TextureBindGroupStateData<A: HalApi> {
 /// Stores all the textures that a bind group stores.
 #[derive(Debug)]
 pub(crate) struct TextureBindGroupState<A: HalApi> {
-    textures: Mutex<Vec<TextureBindGroupStateData<A>>>,
+    textures: Vec<TextureBindGroupStateData<A>>,
 }
 impl<A: HalApi> TextureBindGroupState<A> {
     pub fn new() -> Self {
         Self {
-            textures: Mutex::new(rank::TEXTURE_BIND_GROUP_STATE_TEXTURES, Vec::new()),
+            textures: Vec::new(),
         }
     }
 
@@ -174,20 +173,19 @@ impl<A: HalApi> TextureBindGroupState<A> {
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&self) {
-        let mut textures = self.textures.lock();
-        textures.sort_unstable_by_key(|v| v.texture.tracker_index());
+    pub(crate) fn optimize(&mut self) {
+        self.textures
+            .sort_unstable_by_key(|v| v.texture.tracker_index());
     }
 
     /// Adds the given resource with the given state.
     pub fn add_single(
-        &self,
+        &mut self,
         texture: &Arc<Texture<A>>,
         selector: Option<TextureSelector>,
         state: TextureUses,
     ) {
-        let mut textures = self.textures.lock();
-        textures.push(TextureBindGroupStateData {
+        self.textures.push(TextureBindGroupStateData {
             selector,
             texture: texture.clone(),
             usage: state,
@@ -327,8 +325,7 @@ impl<A: HalApi> TextureUsageScope<A> {
         &mut self,
         bind_group: &TextureBindGroupState<A>,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let textures = bind_group.textures.lock();
-        for t in &*textures {
+        for t in bind_group.textures.iter() {
             unsafe { self.merge_single(&t.texture, t.selector.clone(), t.usage)? };
         }
 
@@ -616,8 +613,7 @@ impl<A: HalApi> TextureTracker<A> {
             self.set_size(incoming_size);
         }
 
-        let textures = bind_group_state.textures.lock();
-        for t in textures.iter() {
+        for t in bind_group_state.textures.iter() {
             let index = t.texture.tracker_index().as_usize();
             scope.tracker_assert_in_bounds(index);
 

From a3142ade91a59b3600c1c204a8288b9736e7f04f Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 18:45:52 +0200
Subject: [PATCH 125/226] don't optimize `BindGroupStates.{views,samplers}`

The resources inside `StatelessBindGroupState` are never merged with any other tracker.
---
 wgpu-core/src/track/mod.rs       | 2 --
 wgpu-core/src/track/stateless.rs | 9 ---------
 2 files changed, 11 deletions(-)

diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 5f6fb1a188..cbeca973b4 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -444,8 +444,6 @@ impl<A: HalApi> BindGroupStates<A> {
     pub fn optimize(&mut self) {
         self.buffers.optimize();
         self.textures.optimize();
-        self.views.optimize();
-        self.samplers.optimize();
     }
 }
 
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 3899e45ac6..8dfe5333df 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -20,15 +20,6 @@ impl<T: Trackable> StatelessBindGroupState<T> {
         }
     }
 
-    /// Optimize the buffer bind group state by sorting it by ID.
-    ///
-    /// When this list of states is merged into a tracker, the memory
-    /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&mut self) {
-        self.resources
-            .sort_unstable_by_key(|resource| resource.tracker_index());
-    }
-
     /// Adds the given resource.
     pub fn add_single(&mut self, resource: &Arc<T>) {
         self.resources.push(resource.clone());

From 5cb1be63aa4c650feadac6cf8b127963eb4c5146 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 19:01:44 +0200
Subject: [PATCH 126/226] refactor the `StatelessTracker` to hold a `Vec` of
 `Arc`s

Also removes the `StatelessBindGroupState` since it does the same thing.
---
 wgpu-core/src/command/bundle.rs  | 10 -----
 wgpu-core/src/command/compute.rs |  9 -----
 wgpu-core/src/command/render.rs  |  7 ----
 wgpu-core/src/device/resource.rs |  4 +-
 wgpu-core/src/track/mod.rs       | 10 ++---
 wgpu-core/src/track/stateless.rs | 67 ++++----------------------------
 6 files changed, 14 insertions(+), 93 deletions(-)

diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 6cf6920255..b0fe21a9e0 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -380,16 +380,6 @@ impl RenderBundleEncoder {
             .textures
             .write()
             .set_size(indices.textures.size());
-        state
-            .trackers
-            .bind_groups
-            .write()
-            .set_size(indices.bind_groups.size());
-        state
-            .trackers
-            .render_pipelines
-            .write()
-            .set_size(indices.render_pipelines.size());
 
         let base = &self.base;
 
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 1d751531ac..643e5ffa63 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -484,15 +484,6 @@ impl Global {
         let indices = &state.device.tracker_indices;
         state.tracker.buffers.set_size(indices.buffers.size());
         state.tracker.textures.set_size(indices.textures.size());
-        state
-            .tracker
-            .bind_groups
-            .set_size(indices.bind_groups.size());
-        state
-            .tracker
-            .compute_pipelines
-            .set_size(indices.compute_pipelines.size());
-        state.tracker.query_sets.set_size(indices.query_sets.size());
 
         let timestamp_writes = if let Some(tw) = timestamp_writes.take() {
             tw.query_set
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 251daa7d15..5227d075ee 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1620,13 +1620,6 @@ impl Global {
             let indices = &device.tracker_indices;
             tracker.buffers.set_size(indices.buffers.size());
             tracker.textures.set_size(indices.textures.size());
-            tracker.views.set_size(indices.texture_views.size());
-            tracker.bind_groups.set_size(indices.bind_groups.size());
-            tracker
-                .render_pipelines
-                .set_size(indices.render_pipelines.size());
-            tracker.bundles.set_size(indices.bundles.size());
-            tracker.query_sets.set_size(indices.query_sets.size());
 
             let raw = &mut encoder.raw;
 
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 581c520094..0072dd9318 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2005,7 +2005,7 @@ impl<A: HalApi> Device<A> {
     ) -> Result<&'a A::Sampler, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
-        used.samplers.add_single(sampler);
+        used.samplers.insert_single(sampler.clone());
 
         sampler.same_device(self)?;
 
@@ -2054,7 +2054,7 @@ impl<A: HalApi> Device<A> {
         used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
         snatch_guard: &'a SnatchGuard<'a>,
     ) -> Result<hal::TextureBinding<'a, A>, binding_model::CreateBindGroupError> {
-        used.views.add_single(view);
+        used.views.insert_single(view.clone());
 
         view.same_device(self)?;
 
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index cbeca973b4..134d4c6954 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -117,7 +117,7 @@ pub(crate) use buffer::{
     BufferBindGroupState, BufferTracker, BufferUsageScope, DeviceBufferTracker,
 };
 use metadata::{ResourceMetadata, ResourceMetadataProvider};
-pub(crate) use stateless::{StatelessBindGroupState, StatelessTracker};
+pub(crate) use stateless::StatelessTracker;
 pub(crate) use texture::{
     DeviceTextureTracker, TextureBindGroupState, TextureSelector, TextureTracker,
     TextureTrackerSetSingle, TextureUsageScope,
@@ -423,8 +423,8 @@ impl<T: ResourceUses> fmt::Display for InvalidUse<T> {
 pub(crate) struct BindGroupStates<A: HalApi> {
     pub buffers: BufferBindGroupState<A>,
     pub textures: TextureBindGroupState<A>,
-    pub views: StatelessBindGroupState<resource::TextureView<A>>,
-    pub samplers: StatelessBindGroupState<resource::Sampler<A>>,
+    pub views: StatelessTracker<resource::TextureView<A>>,
+    pub samplers: StatelessTracker<resource::Sampler<A>>,
 }
 
 impl<A: HalApi> BindGroupStates<A> {
@@ -432,8 +432,8 @@ impl<A: HalApi> BindGroupStates<A> {
         Self {
             buffers: BufferBindGroupState::new(),
             textures: TextureBindGroupState::new(),
-            views: StatelessBindGroupState::new(),
-            samplers: StatelessBindGroupState::new(),
+            views: StatelessTracker::new(),
+            samplers: StatelessTracker::new(),
         }
     }
 
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 8dfe5333df..d1c2c87dd5 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -1,79 +1,26 @@
-//! Stateless Trackers
-//!
-//! Stateless trackers don't have any state, so make no
-//! distinction between a usage scope and a full tracker.
-
 use std::sync::Arc;
 
-use crate::{resource::Trackable, track::ResourceMetadata};
-
-/// Stores all the resources that a bind group stores.
+/// A tracker that holds strong references to resources.
+///
+/// This is only used to keep resources alive.
 #[derive(Debug)]
-pub(crate) struct StatelessBindGroupState<T: Trackable> {
+pub(crate) struct StatelessTracker<T> {
     resources: Vec<Arc<T>>,
 }
 
-impl<T: Trackable> StatelessBindGroupState<T> {
+impl<T> StatelessTracker<T> {
     pub fn new() -> Self {
         Self {
             resources: Vec::new(),
         }
     }
 
-    /// Adds the given resource.
-    pub fn add_single(&mut self, resource: &Arc<T>) {
-        self.resources.push(resource.clone());
-    }
-}
-
-/// Stores all resource state within a command buffer or device.
-#[derive(Debug)]
-pub(crate) struct StatelessTracker<T: Trackable> {
-    metadata: ResourceMetadata<Arc<T>>,
-}
-
-impl<T: Trackable> StatelessTracker<T> {
-    pub fn new() -> Self {
-        Self {
-            metadata: ResourceMetadata::new(),
-        }
-    }
-
-    fn tracker_assert_in_bounds(&self, index: usize) {
-        self.metadata.tracker_assert_in_bounds(index);
-    }
-
-    /// Sets the size of all the vectors inside the tracker.
-    ///
-    /// Must be called with the highest possible Resource ID of this type
-    /// before all unsafe functions are called.
-    pub fn set_size(&mut self, size: usize) {
-        self.metadata.set_size(size);
-    }
-
-    /// Extend the vectors to let the given index be valid.
-    fn allow_index(&mut self, index: usize) {
-        if index >= self.metadata.size() {
-            self.set_size(index + 1);
-        }
-    }
-
     /// Inserts a single resource into the resource tracker.
     ///
-    /// If the resource already exists in the tracker, it will be overwritten.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    ///
     /// Returns a reference to the newly inserted resource.
     /// (This allows avoiding a clone/reference count increase in many cases.)
     pub fn insert_single(&mut self, resource: Arc<T>) -> &Arc<T> {
-        let index = resource.tracker_index().as_usize();
-
-        self.allow_index(index);
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe { self.metadata.insert(index, resource) }
+        self.resources.push(resource);
+        unsafe { self.resources.last().unwrap_unchecked() }
     }
 }

From 826e3716e5fea28a927a1ff2b5e1312cc9e48c95 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 19:08:52 +0200
Subject: [PATCH 127/226] remove all internal `RenderBundleScope` `RwLock`s

---
 wgpu-core/src/command/bundle.rs | 23 +++--------------
 wgpu-core/src/lock/rank.rs      |  4 ---
 wgpu-core/src/track/mod.rs      | 44 ++++++++++-----------------------
 3 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index b0fe21a9e0..542c52b886 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -370,16 +370,8 @@ impl RenderBundleEncoder {
         };
 
         let indices = &state.device.tracker_indices;
-        state
-            .trackers
-            .buffers
-            .write()
-            .set_size(indices.buffers.size());
-        state
-            .trackers
-            .textures
-            .write()
-            .set_size(indices.textures.size());
+        state.trackers.buffers.set_size(indices.buffers.size());
+        state.trackers.textures.set_size(indices.textures.size());
 
         let base = &self.base;
 
@@ -626,7 +618,7 @@ fn set_bind_group<A: HalApi>(
 
     state.set_bind_group(index, &bind_group, offsets_range);
     unsafe { state.trackers.merge_bind_group(&bind_group.used)? };
-    state.trackers.bind_groups.write().insert_single(bind_group);
+    state.trackers.bind_groups.insert_single(bind_group);
     // Note: stateless trackers are not merged: the lifetime reference
     // is held to the bind group itself.
     Ok(())
@@ -671,11 +663,7 @@ fn set_pipeline<A: HalApi>(
     state.invalidate_bind_groups(&pipeline_state, &pipeline.layout);
     state.pipeline = Some(pipeline_state);
 
-    state
-        .trackers
-        .render_pipelines
-        .write()
-        .insert_single(pipeline);
+    state.trackers.render_pipelines.insert_single(pipeline);
     Ok(())
 }
 
@@ -694,7 +682,6 @@ fn set_index_buffer<A: HalApi>(
     state
         .trackers
         .buffers
-        .write()
         .merge_single(&buffer, hal::BufferUses::INDEX)?;
 
     buffer.same_device(&state.device)?;
@@ -739,7 +726,6 @@ fn set_vertex_buffer<A: HalApi>(
     state
         .trackers
         .buffers
-        .write()
         .merge_single(&buffer, hal::BufferUses::VERTEX)?;
 
     buffer.same_device(&state.device)?;
@@ -881,7 +867,6 @@ fn multi_draw_indirect<A: HalApi>(
     state
         .trackers
         .buffers
-        .write()
         .merge_single(&buffer, hal::BufferUses::INDIRECT)?;
 
     buffer.same_device(&state.device)?;
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index b4b5e27489..5e9bd37193 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -129,10 +129,6 @@ define_lock_ranks! {
     rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { }
     rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { }
     rank REGISTRY_STORAGE "Registry::storage" followed by { }
-    rank RENDER_BUNDLE_SCOPE_BUFFERS "RenderBundleScope::buffers" followed by { }
-    rank RENDER_BUNDLE_SCOPE_TEXTURES "RenderBundleScope::textures" followed by { }
-    rank RENDER_BUNDLE_SCOPE_BIND_GROUPS "RenderBundleScope::bind_groups" followed by { }
-    rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { }
     rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
     rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
     rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 134d4c6954..c8b634ed75 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -104,7 +104,7 @@ mod texture;
 use crate::{
     binding_model, command,
     hal_api::HalApi,
-    lock::{rank, Mutex, RwLock},
+    lock::{rank, Mutex},
     pipeline,
     resource::{self, Labeled, ResourceErrorIdent},
     snatch::SnatchGuard,
@@ -452,33 +452,21 @@ impl<A: HalApi> BindGroupStates<A> {
 /// and need to be owned by the render bundles.
 #[derive(Debug)]
 pub(crate) struct RenderBundleScope<A: HalApi> {
-    pub buffers: RwLock<BufferUsageScope<A>>,
-    pub textures: RwLock<TextureUsageScope<A>>,
+    pub buffers: BufferUsageScope<A>,
+    pub textures: TextureUsageScope<A>,
     // Don't need to track views and samplers, they are never used directly, only by bind groups.
-    pub bind_groups: RwLock<StatelessTracker<binding_model::BindGroup<A>>>,
-    pub render_pipelines: RwLock<StatelessTracker<pipeline::RenderPipeline<A>>>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup<A>>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline<A>>,
 }
 
 impl<A: HalApi> RenderBundleScope<A> {
     /// Create the render bundle scope and pull the maximum IDs from the hubs.
     pub fn new() -> Self {
         Self {
-            buffers: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_BUFFERS,
-                BufferUsageScope::default(),
-            ),
-            textures: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_TEXTURES,
-                TextureUsageScope::default(),
-            ),
-            bind_groups: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_BIND_GROUPS,
-                StatelessTracker::new(),
-            ),
-            render_pipelines: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_RENDER_PIPELINES,
-                StatelessTracker::new(),
-            ),
+            buffers: BufferUsageScope::default(),
+            textures: TextureUsageScope::default(),
+            bind_groups: StatelessTracker::new(),
+            render_pipelines: StatelessTracker::new(),
         }
     }
 
@@ -495,12 +483,8 @@ impl<A: HalApi> RenderBundleScope<A> {
         &mut self,
         bind_group: &BindGroupStates<A>,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        unsafe { self.buffers.write().merge_bind_group(&bind_group.buffers)? };
-        unsafe {
-            self.textures
-                .write()
-                .merge_bind_group(&bind_group.textures)?
-        };
+        unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? };
+        unsafe { self.textures.merge_bind_group(&bind_group.textures)? };
 
         Ok(())
     }
@@ -586,10 +570,8 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
         &mut self,
         render_bundle: &RenderBundleScope<A>,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        self.buffers
-            .merge_usage_scope(&*render_bundle.buffers.read())?;
-        self.textures
-            .merge_usage_scope(&*render_bundle.textures.read())?;
+        self.buffers.merge_usage_scope(&render_bundle.buffers)?;
+        self.textures.merge_usage_scope(&render_bundle.textures)?;
 
         Ok(())
     }

From 62af9d78b5200ecdf495a6478d1e4b6dc53d2a4d Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 19:18:30 +0200
Subject: [PATCH 128/226] rename `{Buffer,Texture}BindGroupState`'s
 `add_single` to `insert_single`

Also change it's definition to take an owned `Arc`. This makes these functions consistent with the other trackers.
---
 wgpu-core/src/device/resource.rs |  4 ++--
 wgpu-core/src/track/buffer.rs    |  4 ++--
 wgpu-core/src/track/texture.rs   | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 0072dd9318..2d354a2252 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1917,7 +1917,7 @@ impl<A: HalApi> Device<A> {
 
         let buffer = &bb.buffer;
 
-        used.buffers.add_single(buffer, internal_use);
+        used.buffers.insert_single(buffer.clone(), internal_use);
 
         buffer.same_device(self)?;
 
@@ -2068,7 +2068,7 @@ impl<A: HalApi> Device<A> {
         // Careful here: the texture may no longer have its own ref count,
         // if it was deleted by the user.
         used.textures
-            .add_single(texture, Some(view.selector.clone()), internal_use);
+            .insert_single(texture.clone(), Some(view.selector.clone()), internal_use);
 
         texture.check_usage(pub_usage)?;
 
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index b0bcdca03e..a24148230b 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -68,8 +68,8 @@ impl<A: HalApi> BufferBindGroupState<A> {
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(&mut self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
-        self.buffers.push((buffer.clone(), state));
+    pub fn insert_single(&mut self, buffer: Arc<Buffer<A>>, state: BufferUses) {
+        self.buffers.push((buffer, state));
     }
 }
 
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index 77361a10f0..6c177829f6 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -179,16 +179,16 @@ impl<A: HalApi> TextureBindGroupState<A> {
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(
+    pub fn insert_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: Arc<Texture<A>>,
         selector: Option<TextureSelector>,
-        state: TextureUses,
+        usage: TextureUses,
     ) {
         self.textures.push(TextureBindGroupStateData {
             selector,
-            texture: texture.clone(),
-            usage: state,
+            texture,
+            usage,
         });
     }
 }

From 4e777bd0e7991460b5af63604441b6ec2f52c983 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 19:49:57 +0200
Subject: [PATCH 129/226] merge the texture and texture view trackers of
 `BindGroupStates`

---
 wgpu-core/src/device/resource.rs | 10 ++----
 wgpu-core/src/track/buffer.rs    |  2 +-
 wgpu-core/src/track/mod.rs       | 18 +++++------
 wgpu-core/src/track/texture.rs   | 52 +++++++++++---------------------
 4 files changed, 29 insertions(+), 53 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 2d354a2252..e66e452063 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2054,8 +2054,6 @@ impl<A: HalApi> Device<A> {
         used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
         snatch_guard: &'a SnatchGuard<'a>,
     ) -> Result<hal::TextureBinding<'a, A>, binding_model::CreateBindGroupError> {
-        used.views.insert_single(view.clone());
-
         view.same_device(self)?;
 
         let (pub_usage, internal_use) = self.texture_use_parameters(
@@ -2064,12 +2062,10 @@ impl<A: HalApi> Device<A> {
             view,
             "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture",
         )?;
-        let texture = &view.parent;
-        // Careful here: the texture may no longer have its own ref count,
-        // if it was deleted by the user.
-        used.textures
-            .insert_single(texture.clone(), Some(view.selector.clone()), internal_use);
 
+        used.views.insert_single(view.clone(), internal_use);
+
+        let texture = &view.parent;
         texture.check_usage(pub_usage)?;
 
         used_texture_ranges.push(TextureInitTrackerAction {
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index a24148230b..8fdcf31674 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -37,7 +37,7 @@ impl ResourceUses for BufferUses {
     }
 }
 
-/// Stores all the buffers that a bind group stores.
+/// Stores a bind group's buffers + their usages (within the bind group).
 #[derive(Debug)]
 pub(crate) struct BufferBindGroupState<A: HalApi> {
     buffers: Vec<(Arc<Buffer<A>>, BufferUses)>,
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index c8b634ed75..82c1406db5 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -119,8 +119,8 @@ pub(crate) use buffer::{
 use metadata::{ResourceMetadata, ResourceMetadataProvider};
 pub(crate) use stateless::StatelessTracker;
 pub(crate) use texture::{
-    DeviceTextureTracker, TextureBindGroupState, TextureSelector, TextureTracker,
-    TextureTrackerSetSingle, TextureUsageScope,
+    DeviceTextureTracker, TextureSelector, TextureTracker, TextureTrackerSetSingle,
+    TextureUsageScope, TextureViewBindGroupState,
 };
 use wgt::strict_assert_ne;
 
@@ -422,8 +422,7 @@ impl<T: ResourceUses> fmt::Display for InvalidUse<T> {
 #[derive(Debug)]
 pub(crate) struct BindGroupStates<A: HalApi> {
     pub buffers: BufferBindGroupState<A>,
-    pub textures: TextureBindGroupState<A>,
-    pub views: StatelessTracker<resource::TextureView<A>>,
+    pub views: TextureViewBindGroupState<A>,
     pub samplers: StatelessTracker<resource::Sampler<A>>,
 }
 
@@ -431,8 +430,7 @@ impl<A: HalApi> BindGroupStates<A> {
     pub fn new() -> Self {
         Self {
             buffers: BufferBindGroupState::new(),
-            textures: TextureBindGroupState::new(),
-            views: StatelessTracker::new(),
+            views: TextureViewBindGroupState::new(),
             samplers: StatelessTracker::new(),
         }
     }
@@ -443,7 +441,7 @@ impl<A: HalApi> BindGroupStates<A> {
     /// accesses will be in a constant ascending order.
     pub fn optimize(&mut self) {
         self.buffers.optimize();
-        self.textures.optimize();
+        self.views.optimize();
     }
 }
 
@@ -484,7 +482,7 @@ impl<A: HalApi> RenderBundleScope<A> {
         bind_group: &BindGroupStates<A>,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? };
-        unsafe { self.textures.merge_bind_group(&bind_group.textures)? };
+        unsafe { self.textures.merge_bind_group(&bind_group.views)? };
 
         Ok(())
     }
@@ -551,7 +549,7 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     ) -> Result<(), ResourceUsageCompatibilityError> {
         unsafe {
             self.buffers.merge_bind_group(&bind_group.buffers)?;
-            self.textures.merge_bind_group(&bind_group.textures)?;
+            self.textures.merge_bind_group(&bind_group.views)?;
         }
 
         Ok(())
@@ -653,7 +651,7 @@ impl<A: HalApi> Tracker<A> {
         };
         unsafe {
             self.textures
-                .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.textures)
+                .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.views)
         };
     }
 }
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index 6c177829f6..243bd25207 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -21,7 +21,7 @@
 use super::{range::RangedStates, PendingTransition, PendingTransitionList, TrackerIndex};
 use crate::{
     hal_api::HalApi,
-    resource::{Texture, TextureInner, Trackable},
+    resource::{Texture, TextureInner, TextureView, Trackable},
     snatch::SnatchGuard,
     track::{
         invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider,
@@ -150,23 +150,14 @@ impl ComplexTextureState {
     }
 }
 
+/// Stores a bind group's texture views + their usages (within the bind group).
 #[derive(Debug)]
-struct TextureBindGroupStateData<A: HalApi> {
-    selector: Option<TextureSelector>,
-    texture: Arc<Texture<A>>,
-    usage: TextureUses,
+pub(crate) struct TextureViewBindGroupState<A: HalApi> {
+    views: Vec<(Arc<TextureView<A>>, TextureUses)>,
 }
-
-/// Stores all the textures that a bind group stores.
-#[derive(Debug)]
-pub(crate) struct TextureBindGroupState<A: HalApi> {
-    textures: Vec<TextureBindGroupStateData<A>>,
-}
-impl<A: HalApi> TextureBindGroupState<A> {
+impl<A: HalApi> TextureViewBindGroupState<A> {
     pub fn new() -> Self {
-        Self {
-            textures: Vec::new(),
-        }
+        Self { views: Vec::new() }
     }
 
     /// Optimize the texture bind group state by sorting it by ID.
@@ -174,22 +165,13 @@ impl<A: HalApi> TextureBindGroupState<A> {
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
     pub(crate) fn optimize(&mut self) {
-        self.textures
-            .sort_unstable_by_key(|v| v.texture.tracker_index());
+        self.views
+            .sort_unstable_by_key(|(view, _)| view.parent.tracker_index());
     }
 
     /// Adds the given resource with the given state.
-    pub fn insert_single(
-        &mut self,
-        texture: Arc<Texture<A>>,
-        selector: Option<TextureSelector>,
-        usage: TextureUses,
-    ) {
-        self.textures.push(TextureBindGroupStateData {
-            selector,
-            texture,
-            usage,
-        });
+    pub fn insert_single(&mut self, view: Arc<TextureView<A>>, usage: TextureUses) {
+        self.views.push((view, usage));
     }
 }
 
@@ -323,10 +305,10 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &TextureBindGroupState<A>,
+        bind_group: &TextureViewBindGroupState<A>,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        for t in bind_group.textures.iter() {
-            unsafe { self.merge_single(&t.texture, t.selector.clone(), t.usage)? };
+        for (view, usage) in bind_group.views.iter() {
+            unsafe { self.merge_single(&view.parent, Some(view.selector.clone()), *usage)? };
         }
 
         Ok(())
@@ -606,21 +588,21 @@ impl<A: HalApi> TextureTracker<A> {
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
         scope: &mut TextureUsageScope<A>,
-        bind_group_state: &TextureBindGroupState<A>,
+        bind_group_state: &TextureViewBindGroupState<A>,
     ) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
             self.set_size(incoming_size);
         }
 
-        for t in bind_group_state.textures.iter() {
-            let index = t.texture.tracker_index().as_usize();
+        for (view, _) in bind_group_state.views.iter() {
+            let index = view.parent.tracker_index().as_usize();
             scope.tracker_assert_in_bounds(index);
 
             if unsafe { !scope.metadata.contains_unchecked(index) } {
                 continue;
             }
-            let texture_selector = &t.texture.full_range;
+            let texture_selector = &view.parent.full_range;
             unsafe {
                 insert_or_barrier_update(
                     texture_selector,

From 7b4cbc26192d6d56a31f8e67769e656a6627b222 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Sat, 3 Aug 2024 11:42:43 +0200
Subject: [PATCH 130/226] add comments in `BindGroupStates.optimize`

---
 wgpu-core/src/track/mod.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 82c1406db5..2784094ef9 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -441,7 +441,11 @@ impl<A: HalApi> BindGroupStates<A> {
     /// accesses will be in a constant ascending order.
     pub fn optimize(&mut self) {
         self.buffers.optimize();
+        // Views are stateless, however, `TextureViewBindGroupState`
+        // is special as it will be merged with other texture trackers.
         self.views.optimize();
+        // Samplers are stateless and don't need to be optimized
+        // since the tracker is never merged with any other tracker.
     }
 }
 

From 8a0d1e1cbab02ecbca300dcb7bac6814cda0887f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 09:00:10 +0200
Subject: [PATCH 131/226] build(deps): bump EmbarkStudios/cargo-deny-action
 from 1 to 2 (#6077)

Bumps [EmbarkStudios/cargo-deny-action](https://github.com/embarkstudios/cargo-deny-action) from 1 to 2.
- [Release notes](https://github.com/embarkstudios/cargo-deny-action/releases)
- [Commits](https://github.com/embarkstudios/cargo-deny-action/compare/v1...v2)

---
updated-dependencies:
- dependency-name: EmbarkStudios/cargo-deny-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 203e990b3d..baa6d1be8e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -681,7 +681,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
-        uses: EmbarkStudios/cargo-deny-action@v1
+        uses: EmbarkStudios/cargo-deny-action@v2
         with:
           command: check advisories
           arguments: --all-features --workspace
@@ -698,7 +698,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
-        uses: EmbarkStudios/cargo-deny-action@v1
+        uses: EmbarkStudios/cargo-deny-action@v2
         with:
           command: check bans licenses sources
           arguments: --all-features --workspace

From e0bc30655a2f45f2ff063cf602e8f6eadb107ae0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 09:01:41 +0200
Subject: [PATCH 132/226] build(deps): bump the patch-updates group with 12
 updates (#6079)

Bumps the patch-updates group with 12 updates:

| Package | From | To |
| --- | --- | --- |
| [bytemuck](https://github.com/Lokathor/bytemuck) | `1.16.1` | `1.16.3` |
| [indexmap](https://github.com/indexmap-rs/indexmap) | `2.2.6` | `2.3.0` |
| [serde_json](https://github.com/serde-rs/json) | `1.0.121` | `1.0.122` |
| [bytes](https://github.com/tokio-rs/bytes) | `1.6.1` | `1.7.1` |
| [cc](https://github.com/rust-lang/cc-rs) | `1.1.6` | `1.1.7` |
| [clap](https://github.com/clap-rs/clap) | `4.5.11` | `4.5.13` |
| [clap_builder](https://github.com/clap-rs/clap) | `4.5.11` | `4.5.13` |
| [clap_derive](https://github.com/clap-rs/clap) | `4.5.11` | `4.5.13` |
| [flate2](https://github.com/rust-lang/flate2-rs) | `1.0.30` | `1.0.31` |
| [regex](https://github.com/rust-lang/regex) | `1.10.5` | `1.10.6` |
| [toml_datetime](https://github.com/toml-rs/toml) | `0.6.7` | `0.6.8` |
| [winapi-util](https://github.com/BurntSushi/winapi-util) | `0.1.8` | `0.1.9` |


Updates `bytemuck` from 1.16.1 to 1.16.3
- [Changelog](https://github.com/Lokathor/bytemuck/blob/main/changelog.md)
- [Commits](https://github.com/Lokathor/bytemuck/compare/v1.16.1...v1.16.3)

Updates `indexmap` from 2.2.6 to 2.3.0
- [Changelog](https://github.com/indexmap-rs/indexmap/blob/master/RELEASES.md)
- [Commits](https://github.com/indexmap-rs/indexmap/compare/2.2.6...2.3.0)

Updates `serde_json` from 1.0.121 to 1.0.122
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.121...v1.0.122)

Updates `bytes` from 1.6.1 to 1.7.1
- [Release notes](https://github.com/tokio-rs/bytes/releases)
- [Changelog](https://github.com/tokio-rs/bytes/blob/master/CHANGELOG.md)
- [Commits](https://github.com/tokio-rs/bytes/compare/v1.6.1...v1.7.1)

Updates `cc` from 1.1.6 to 1.1.7
- [Release notes](https://github.com/rust-lang/cc-rs/releases)
- [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.1.6...cc-v1.1.7)

Updates `clap` from 4.5.11 to 4.5.13
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.11...v4.5.13)

Updates `clap_builder` from 4.5.11 to 4.5.13
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.5.11...v4.5.13)

Updates `clap_derive` from 4.5.11 to 4.5.13
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.5.11...v4.5.13)

Updates `flate2` from 1.0.30 to 1.0.31
- [Release notes](https://github.com/rust-lang/flate2-rs/releases)
- [Commits](https://github.com/rust-lang/flate2-rs/commits)

Updates `regex` from 1.10.5 to 1.10.6
- [Release notes](https://github.com/rust-lang/regex/releases)
- [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/regex/compare/1.10.5...1.10.6)

Updates `toml_datetime` from 0.6.7 to 0.6.8
- [Commits](https://github.com/toml-rs/toml/compare/toml_datetime-v0.6.7...toml_datetime-v0.6.8)

Updates `winapi-util` from 0.1.8 to 0.1.9
- [Commits](https://github.com/BurntSushi/winapi-util/compare/0.1.8...0.1.9)

---
updated-dependencies:
- dependency-name: bytemuck
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: indexmap
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: serde_json
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: bytes
  dependency-type: indirect
  update-type: version-update:semver-minor
  dependency-group: patch-updates
- dependency-name: cc
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_builder
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_derive
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: flate2
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: regex
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: toml_datetime
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: winapi-util
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 52 ++++++++++++++++++++++++++--------------------------
 Cargo.toml |  2 +-
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2dbf69ee76..6040c0a442 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -370,9 +370,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
 [[package]]
 name = "bytemuck"
-version = "1.16.1"
+version = "1.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
+checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
 dependencies = [
  "bytemuck_derive",
 ]
@@ -396,9 +396,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.6.1"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952"
+checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
 
 [[package]]
 name = "calloop"
@@ -448,9 +448,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.6"
+version = "1.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
+checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
 dependencies = [
  "jobserver",
  "libc",
@@ -512,9 +512,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.11"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3"
+checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -522,9 +522,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.11"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa"
+checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
 dependencies = [
  "anstream",
  "anstyle",
@@ -534,9 +534,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.11"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e"
+checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
@@ -1300,9 +1300,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
 name = "flate2"
-version = "1.0.30"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1853,9 +1853,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0"
 dependencies = [
  "arbitrary",
  "equivalent",
@@ -2013,7 +2013,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
- "windows-targets 0.52.6",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -2954,9 +2954,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.5"
+version = "1.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3176,9 +3176,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.121"
+version = "1.0.122"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ab380d7d9f22ef3f21ad3e6c1ebe8e4fc7a2000ccba2e4d71fc96f15b2cb609"
+checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
 dependencies = [
  "indexmap",
  "itoa",
@@ -3628,9 +3628,9 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.7"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8fb9f64314842840f1d940ac544da178732128f1c78c21772e876579e0da1db"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
 
 [[package]]
 name = "toml_edit"
@@ -4466,11 +4466,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 23d5b5cd7d..f049821350 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -121,7 +121,7 @@ renderdoc-sys = "1.1.0"
 ron = "0.8"
 rustc-hash = "1.1.0"
 serde = "1"
-serde_json = "1.0.121"
+serde_json = "1.0.122"
 smallvec = "1"
 static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }

From 9619a43849de2a49e596621946382c4fb4b178a0 Mon Sep 17 00:00:00 2001
From: Samson <16504129+sagudev@users.noreply.github.com>
Date: Sat, 3 Aug 2024 16:56:35 +0200
Subject: [PATCH 133/226] Add reorder/add limits in `check_limit` to match
 `wgt::Limits`

---
 wgpu-types/src/lib.rs | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index abe66d4910..da8e6ff495 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1444,6 +1444,7 @@ impl Limits {
         compare!(max_texture_dimension_3d, Less);
         compare!(max_texture_array_layers, Less);
         compare!(max_bind_groups, Less);
+        compare!(max_bindings_per_bind_group, Less);
         compare!(max_dynamic_uniform_buffers_per_pipeline_layout, Less);
         compare!(max_dynamic_storage_buffers_per_pipeline_layout, Less);
         compare!(max_sampled_textures_per_shader_stage, Less);
@@ -1454,23 +1455,25 @@ impl Limits {
         compare!(max_uniform_buffer_binding_size, Less);
         compare!(max_storage_buffer_binding_size, Less);
         compare!(max_vertex_buffers, Less);
+        compare!(max_buffer_size, Less);
         compare!(max_vertex_attributes, Less);
         compare!(max_vertex_buffer_array_stride, Less);
-        if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 {
-            compare!(min_subgroup_size, Greater);
-            compare!(max_subgroup_size, Less);
-        }
-        compare!(max_push_constant_size, Less);
         compare!(min_uniform_buffer_offset_alignment, Greater);
         compare!(min_storage_buffer_offset_alignment, Greater);
         compare!(max_inter_stage_shader_components, Less);
+        compare!(max_color_attachments, Less);
+        compare!(max_color_attachment_bytes_per_sample, Less);
         compare!(max_compute_workgroup_storage_size, Less);
         compare!(max_compute_invocations_per_workgroup, Less);
         compare!(max_compute_workgroup_size_x, Less);
         compare!(max_compute_workgroup_size_y, Less);
         compare!(max_compute_workgroup_size_z, Less);
         compare!(max_compute_workgroups_per_dimension, Less);
-        compare!(max_buffer_size, Less);
+        if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 {
+            compare!(min_subgroup_size, Greater);
+            compare!(max_subgroup_size, Less);
+        }
+        compare!(max_push_constant_size, Less);
         compare!(max_non_sampler_bindings, Less);
     }
 }

From de960ccbba4f7f4e9c01abcb916f1973bd54df63 Mon Sep 17 00:00:00 2001
From: Samson <16504129+sagudev@users.noreply.github.com>
Date: Mon, 5 Aug 2024 15:45:02 +0200
Subject: [PATCH 134/226] Handle TooManyAttachments in wgpu-core (#6076)

---
 wgpu-core/src/command/mod.rs    |  2 ++
 wgpu-core/src/command/render.rs | 13 ++++++++++++-
 wgpu/src/backend/wgpu_core.rs   | 11 +----------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 7290330daf..ec600d2c60 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -595,6 +595,8 @@ pub enum CommandEncoderError {
     InvalidTimestampWritesQuerySetId(id::QuerySetId),
     #[error("Attachment TextureViewId {0:?} is invalid")]
     InvalidAttachmentId(id::TextureViewId),
+    #[error(transparent)]
+    InvalidColorAttachment(#[from] ColorAttachmentError),
     #[error("Resolve attachment TextureViewId {0:?} is invalid")]
     InvalidResolveTargetId(id::TextureViewId),
     #[error("Depth stencil attachment TextureViewId {0:?} is invalid")]
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 5227d075ee..8c00e0d302 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1342,10 +1342,21 @@ impl Global {
             hub: &crate::hub::Hub<A>,
             desc: &RenderPassDescriptor<'_>,
             arc_desc: &mut ArcRenderPassDescriptor<A>,
+            device: &Device<A>,
         ) -> Result<(), CommandEncoderError> {
             let query_sets = hub.query_sets.read();
             let texture_views = hub.texture_views.read();
 
+            let max_color_attachments = device.limits.max_color_attachments as usize;
+            if desc.color_attachments.len() > max_color_attachments {
+                return Err(CommandEncoderError::InvalidColorAttachment(
+                    ColorAttachmentError::TooMany {
+                        given: desc.color_attachments.len(),
+                        limit: max_color_attachments,
+                    },
+                ));
+            }
+
             for color_attachment in desc.color_attachments.iter() {
                 if let Some(RenderPassColorAttachment {
                     view: view_id,
@@ -1447,7 +1458,7 @@ impl Global {
             Err(e) => return make_err(e, arc_desc),
         };
 
-        let err = fill_arc_desc(hub, desc, &mut arc_desc).err();
+        let err = fill_arc_desc(hub, desc, &mut arc_desc, &cmd_buf.device).err();
 
         (RenderPass::new(Some(cmd_buf), arc_desc), err)
     }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 7806552494..0adf8c3e59 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1923,15 +1923,6 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         desc: &crate::RenderPassDescriptor<'_>,
     ) -> (Self::RenderPassId, Self::RenderPassData) {
-        if desc.color_attachments.len() > wgc::MAX_COLOR_ATTACHMENTS {
-            self.handle_error_fatal(
-                wgc::command::ColorAttachmentError::TooMany {
-                    given: desc.color_attachments.len(),
-                    limit: wgc::MAX_COLOR_ATTACHMENTS,
-                },
-                "CommandEncoder::begin_render_pass",
-            );
-        }
         let colors = desc
             .color_attachments
             .iter()
@@ -1943,7 +1934,7 @@ impl crate::Context for ContextWgpuCore {
                         channel: map_pass_channel(Some(&at.ops)),
                     })
             })
-            .collect::<ArrayVec<_, { wgc::MAX_COLOR_ATTACHMENTS }>>();
+            .collect::<Vec<_>>();
 
         let depth_stencil = desc.depth_stencil_attachment.as_ref().map(|dsa| {
             wgc::command::RenderPassDepthStencilAttachment {

From 8c7c5c4974717414d49ffbd19e91a337f2db7861 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 2 Aug 2024 13:54:32 +0200
Subject: [PATCH 135/226] decouple device and queue IDs

Devices and queues can have different lifetimes, we shouldn't assume that their IDs match.
---
 player/src/bin/play.rs           | 15 ++++++++-------
 player/src/lib.rs                | 11 ++++++-----
 player/tests/test.rs             |  5 +++--
 tests/tests/device.rs            | 27 ++++++++++++++++++++++++++-
 wgpu-core/src/device/global.rs   | 15 +++------------
 wgpu-core/src/device/life.rs     |  5 ++---
 wgpu-core/src/device/queue.rs    | 14 ++------------
 wgpu-core/src/device/resource.rs | 23 +++++++++++++++++------
 wgpu-core/src/id.rs              |  6 ------
 wgpu-core/src/lib.rs             |  2 +-
 wgpu/src/backend/wgpu_core.rs    |  4 ++--
 11 files changed, 70 insertions(+), 57 deletions(-)

diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs
index 6510ab23cd..8b6555369f 100644
--- a/player/src/bin/play.rs
+++ b/player/src/bin/play.rs
@@ -61,7 +61,7 @@ fn main() {
     }
     .unwrap();
 
-    let device = match actions.pop() {
+    let (device, queue) = match actions.pop() {
         Some(trace::Action::Init { desc, backend }) => {
             log::info!("Initializing the device for backend: {:?}", backend);
             let adapter = global
@@ -80,18 +80,19 @@ fn main() {
 
             let info = gfx_select!(adapter => global.adapter_get_info(adapter)).unwrap();
             log::info!("Picked '{}'", info.name);
-            let id = wgc::id::Id::zip(1, 0, backend);
+            let device_id = wgc::id::Id::zip(1, 0, backend);
+            let queue_id = wgc::id::Id::zip(1, 0, backend);
             let (_, _, error) = gfx_select!(adapter => global.adapter_request_device(
                 adapter,
                 &desc,
                 None,
-                Some(id),
-                Some(id.into_queue_id())
+                Some(device_id),
+                Some(queue_id)
             ));
             if let Some(e) = error {
                 panic!("{:?}", e);
             }
-            id
+            (device_id, queue_id)
         }
         _ => panic!("Expected Action::Init"),
     };
@@ -102,7 +103,7 @@ fn main() {
         gfx_select!(device => global.device_start_capture(device));
 
         while let Some(action) = actions.pop() {
-            gfx_select!(device => global.process(device, action, &dir, &mut command_buffer_id_manager));
+            gfx_select!(device => global.process(device, queue, action, &dir, &mut command_buffer_id_manager));
         }
 
         gfx_select!(device => global.device_stop_capture(device));
@@ -156,7 +157,7 @@ fn main() {
                                 target.exit();
                         }
                         Some(action) => {
-                            gfx_select!(device => global.process(device, action, &dir, &mut command_buffer_id_manager));
+                            gfx_select!(device => global.process(device, queue, action, &dir, &mut command_buffer_id_manager));
                         }
                         None => {
                             if !done {
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 4ec9116ead..5efeff1537 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -16,6 +16,7 @@ pub trait GlobalPlay {
     fn process<A: wgc::hal_api::HalApi>(
         &self,
         device: wgc::id::DeviceId,
+        queue: wgc::id::QueueId,
         action: trace::Action,
         dir: &Path,
         comb_manager: &mut wgc::identity::IdentityManager<wgc::id::markers::CommandBuffer>,
@@ -131,6 +132,7 @@ impl GlobalPlay for wgc::global::Global {
     fn process<A: wgc::hal_api::HalApi>(
         &self,
         device: wgc::id::DeviceId,
+        queue: wgc::id::QueueId,
         action: trace::Action,
         dir: &Path,
         comb_manager: &mut wgc::identity::IdentityManager<wgc::id::markers::CommandBuffer>,
@@ -327,7 +329,7 @@ impl GlobalPlay for wgc::global::Global {
                 let bin = std::fs::read(dir.join(data)).unwrap();
                 let size = (range.end - range.start) as usize;
                 if queued {
-                    self.queue_write_buffer::<A>(device.into_queue_id(), id, range.start, &bin)
+                    self.queue_write_buffer::<A>(queue, id, range.start, &bin)
                         .unwrap();
                 } else {
                     self.device_set_buffer_data::<A>(id, range.start, &bin[..size])
@@ -341,11 +343,11 @@ impl GlobalPlay for wgc::global::Global {
                 size,
             } => {
                 let bin = std::fs::read(dir.join(data)).unwrap();
-                self.queue_write_texture::<A>(device.into_queue_id(), &to, &bin, &layout, &size)
+                self.queue_write_texture::<A>(queue, &to, &bin, &layout, &size)
                     .unwrap();
             }
             Action::Submit(_index, ref commands) if commands.is_empty() => {
-                self.queue_submit::<A>(device.into_queue_id(), &[]).unwrap();
+                self.queue_submit::<A>(queue, &[]).unwrap();
             }
             Action::Submit(_index, commands) => {
                 let (encoder, error) = self.device_create_command_encoder::<A>(
@@ -361,8 +363,7 @@ impl GlobalPlay for wgc::global::Global {
                     panic!("{e}");
                 }
                 let cmdbuf = self.encode_commands::<A>(encoder, commands);
-                self.queue_submit::<A>(device.into_queue_id(), &[cmdbuf])
-                    .unwrap();
+                self.queue_submit::<A>(queue, &[cmdbuf]).unwrap();
             }
         }
     }
diff --git a/player/tests/test.rs b/player/tests/test.rs
index b3ca944921..f16e7fa32b 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -106,6 +106,7 @@ impl Test<'_> {
     ) {
         let backend = adapter.backend();
         let device_id = wgc::id::Id::zip(test_num, 0, backend);
+        let queue_id = wgc::id::Id::zip(test_num, 0, backend);
         let (_, _, error) = wgc::gfx_select!(adapter => global.adapter_request_device(
             adapter,
             &wgt::DeviceDescriptor {
@@ -116,7 +117,7 @@ impl Test<'_> {
             },
             None,
             Some(device_id),
-            Some(device_id.into_queue_id())
+            Some(queue_id)
         ));
         if let Some(e) = error {
             panic!("{:?}", e);
@@ -125,7 +126,7 @@ impl Test<'_> {
         let mut command_buffer_id_manager = wgc::identity::IdentityManager::new();
         println!("\t\t\tRunning...");
         for action in self.actions {
-            wgc::gfx_select!(device_id => global.process(device_id, action, dir, &mut command_buffer_id_manager));
+            wgc::gfx_select!(device_id => global.process(device_id, queue_id, action, dir, &mut command_buffer_id_manager));
         }
         println!("\t\t\tMapping...");
         for expect in &self.expectations {
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index a577379c20..d629f1b8e6 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -1,6 +1,8 @@
 use std::sync::atomic::AtomicBool;
 
-use wgpu_test::{fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu_test::{
+    fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext,
+};
 
 #[gpu_test]
 static CROSS_DEVICE_BIND_GROUP_USAGE: GpuTestConfiguration = GpuTestConfiguration::new()
@@ -908,3 +910,26 @@ static DEVICE_DESTROY_THEN_BUFFER_CLEANUP: GpuTestConfiguration = GpuTestConfigu
         // Poll the device, which should try to clean up its resources.
         ctx.instance.poll_all(true);
     });
+
+#[gpu_test]
+static DEVICE_AND_QUEUE_HAVE_DIFFERENT_IDS: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default())
+    .run_async(|ctx| async move {
+        let TestingContext {
+            adapter,
+            device_features,
+            device_limits,
+            device,
+            queue,
+            ..
+        } = ctx;
+
+        drop(device);
+
+        let (device2, queue2) =
+            wgpu_test::initialize_device(&adapter, device_features, device_limits).await;
+
+        drop(queue);
+        drop(device2);
+        drop(queue2); // this would previously panic since we would try to use the Device ID to drop the Queue
+    });
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 0df0bc377a..cd3d8e5f20 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -7,7 +7,7 @@ use crate::{
         ResolvedBindGroupEntry, ResolvedBindingResource, ResolvedBufferBinding,
     },
     command, conv,
-    device::{bgl, life::WaitIdleError, queue, DeviceError, DeviceLostClosure, DeviceLostReason},
+    device::{bgl, life::WaitIdleError, DeviceError, DeviceLostClosure, DeviceLostReason},
     global::Global,
     hal_api::HalApi,
     id::{self, AdapterId, DeviceId, QueueId, SurfaceId},
@@ -2040,7 +2040,7 @@ impl Global {
     pub fn device_poll<A: HalApi>(
         &self,
         device_id: DeviceId,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<bool, WaitIdleError> {
         api_log!("Device::poll {maintain:?}");
 
@@ -2050,15 +2050,6 @@ impl Global {
             .get(device_id)
             .map_err(|_| DeviceError::InvalidDeviceId)?;
 
-        if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
-            if submission_index.queue_id != device_id.into_queue_id() {
-                return Err(WaitIdleError::WrongSubmissionIndex(
-                    submission_index.queue_id,
-                    device_id,
-                ));
-            }
-        }
-
         let DevicePoll {
             closures,
             queue_empty,
@@ -2071,7 +2062,7 @@ impl Global {
 
     fn poll_single_device<A: HalApi>(
         device: &crate::device::Device<A>,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<DevicePoll, WaitIdleError> {
         let snatch_guard = device.snatchable_lock.read();
         let fence = device.fence.read();
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index b282775ac0..1bb687d7e2 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -4,7 +4,6 @@ use crate::{
         DeviceError, DeviceLostClosure,
     },
     hal_api::HalApi,
-    id,
     resource::{self, Buffer, Texture, Trackable},
     snatch::SnatchGuard,
     SubmissionIndex,
@@ -112,8 +111,8 @@ impl<A: HalApi> ActiveSubmission<A> {
 pub enum WaitIdleError {
     #[error(transparent)]
     Device(#[from] DeviceError),
-    #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")]
-    WrongSubmissionIndex(id::QueueId, id::DeviceId),
+    #[error("Tried to wait using a submission index ({0}) that has not been returned by a successful submission (last successful submission: {1})")]
+    WrongSubmissionIndex(SubmissionIndex, SubmissionIndex),
     #[error("GPU got stuck :(")]
     StuckGpu,
 }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 81c9729521..1b562d560c 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -131,13 +131,6 @@ impl SubmittedWorkDoneClosure {
     }
 }
 
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct WrappedSubmissionIndex {
-    pub queue_id: QueueId,
-    pub index: SubmissionIndex,
-}
-
 /// A texture or buffer to be freed soon.
 ///
 /// This is just a tagged raw texture or buffer, generally about to be added to
@@ -1044,7 +1037,7 @@ impl Global {
         &self,
         queue_id: QueueId,
         command_buffer_ids: &[id::CommandBufferId],
-    ) -> Result<WrappedSubmissionIndex, QueueSubmitError> {
+    ) -> Result<SubmissionIndex, QueueSubmitError> {
         profiling::scope!("Queue::submit");
         api_log!("Queue::submit {queue_id:?}");
 
@@ -1351,10 +1344,7 @@ impl Global {
 
         api_log!("Queue::submit to {queue_id:?} returned submit index {submit_index}");
 
-        Ok(WrappedSubmissionIndex {
-            queue_id,
-            index: submit_index,
-        })
+        Ok(submit_index)
     }
 
     pub fn queue_get_timestamp_period<A: HalApi>(
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e66e452063..154430a2f8 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -55,8 +55,8 @@ use std::{
 };
 
 use super::{
-    queue::{self, Queue},
-    DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR, ZERO_BUFFER_SIZE,
+    queue::Queue, DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR,
+    ZERO_BUFFER_SIZE,
 };
 
 /// Structure describing a logical device. Some members are internally mutable,
@@ -407,7 +407,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn maintain<'this>(
         &'this self,
         fence_guard: crate::lock::RwLockReadGuard<Option<A::Fence>>,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
         snatch_guard: SnatchGuard,
     ) -> Result<(UserClosures, bool), WaitIdleError> {
         profiling::scope!("Device::maintain");
@@ -417,9 +417,20 @@ impl<A: HalApi> Device<A> {
         // Determine which submission index `maintain` represents.
         let submission_index = match maintain {
             wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
-                // We don't need to check to see if the queue id matches
-                // as we already checked this from inside the poll call.
-                submission_index.index
+                let last_successful_submission_index = self
+                    .last_successful_submission_index
+                    .load(Ordering::Acquire);
+
+                if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
+                    if submission_index > last_successful_submission_index {
+                        return Err(WaitIdleError::WrongSubmissionIndex(
+                            submission_index,
+                            last_successful_submission_index,
+                        ));
+                    }
+                }
+
+                submission_index
             }
             wgt::Maintain::Wait => self
                 .last_successful_submission_index
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index c795063da5..83b2494391 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -326,12 +326,6 @@ impl CommandBufferId {
     }
 }
 
-impl DeviceId {
-    pub fn into_queue_id(self) -> QueueId {
-        Id(self.0, PhantomData)
-    }
-}
-
 #[test]
 fn test_id_backend() {
     for &b in &[
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index c46a8f103a..351916002f 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -102,7 +102,7 @@ pub(crate) use hash_utils::*;
 /// The index of a queue submission.
 ///
 /// These are the values stored in `Device::fence`.
-type SubmissionIndex = hal::FenceValue;
+pub type SubmissionIndex = hal::FenceValue;
 
 type Index = u32;
 type Epoch = u32;
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 0adf8c3e59..06632d68dd 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -552,7 +552,7 @@ impl crate::Context for ContextWgpuCore {
     type SurfaceId = wgc::id::SurfaceId;
     type SurfaceData = Surface;
     type SurfaceOutputDetail = SurfaceOutputDetail;
-    type SubmissionIndexData = wgc::device::queue::WrappedSubmissionIndex;
+    type SubmissionIndexData = wgc::SubmissionIndex;
 
     type RequestAdapterFuture = Ready<Option<(Self::AdapterId, Self::AdapterData)>>;
 
@@ -666,7 +666,7 @@ impl crate::Context for ContextWgpuCore {
             id: queue_id,
             error_sink,
         };
-        ready(Ok((device_id, device, device_id.into_queue_id(), queue)))
+        ready(Ok((device_id, device, queue_id, queue)))
     }
 
     fn instance_poll_all_devices(&self, force_wait: bool) -> bool {

From 594476c991f0cd3389ecf970b59d690e7d8d5be7 Mon Sep 17 00:00:00 2001
From: James Pruitt <jamescpruitt@gmail.com>
Date: Tue, 6 Aug 2024 02:19:23 -0600
Subject: [PATCH 136/226] Check Opengl version is 3.3+ before creating a GL
 context over a GL ES context (#5996)

* Retry with GLES if creating a GL context fails

* Cleaner GL context creation retry
---
 CHANGELOG.md             |  1 +
 wgpu-hal/src/gles/egl.rs | 73 ++++++++++++++++++++++++----------------
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a267e6565b..5632defd47 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,7 @@ Bottom level categories:
 
 #### General
 
+- If GL context creation fails retry with GLES. By @Rapdorian in [#5996](https://github.com/gfx-rs/wgpu/pull/5996)
 - Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
 - As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
 - Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs
index e0340d8290..9a8639d5a8 100644
--- a/wgpu-hal/src/gles/egl.rs
+++ b/wgpu-hal/src/gles/egl.rs
@@ -550,26 +550,25 @@ impl Inner {
         let supports_khr_context = display_extensions.contains("EGL_KHR_create_context");
 
         let mut context_attributes = vec![];
-        if supports_opengl {
-            context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
-            context_attributes.push(3);
-            context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
-            context_attributes.push(3);
-            if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
-                log::warn!("Ignoring specified GLES minor version as OpenGL is used");
-            }
-        } else {
-            context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
-            context_attributes.push(3); // Request GLES 3.0 or higher
-            if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
-                context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
-                context_attributes.push(match force_gles_minor_version {
-                    wgt::Gles3MinorVersion::Automatic => unreachable!(),
-                    wgt::Gles3MinorVersion::Version0 => 0,
-                    wgt::Gles3MinorVersion::Version1 => 1,
-                    wgt::Gles3MinorVersion::Version2 => 2,
-                });
-            }
+        let mut gl_context_attributes = vec![];
+        let mut gles_context_attributes = vec![];
+        gl_context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+        gl_context_attributes.push(3);
+        gl_context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+        gl_context_attributes.push(3);
+        if supports_opengl && force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            log::warn!("Ignoring specified GLES minor version as OpenGL is used");
+        }
+        gles_context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+        gles_context_attributes.push(3); // Request GLES 3.0 or higher
+        if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            gles_context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+            gles_context_attributes.push(match force_gles_minor_version {
+                wgt::Gles3MinorVersion::Automatic => unreachable!(),
+                wgt::Gles3MinorVersion::Version0 => 0,
+                wgt::Gles3MinorVersion::Version1 => 1,
+                wgt::Gles3MinorVersion::Version2 => 2,
+            });
         }
         if flags.contains(wgt::InstanceFlags::DEBUG) {
             if version >= (1, 5) {
@@ -606,15 +605,31 @@ impl Inner {
             context_attributes.push(khr_context_flags);
         }
         context_attributes.push(khronos_egl::NONE);
-        let context = match egl.create_context(display, config, None, &context_attributes) {
-            Ok(context) => context,
-            Err(e) => {
-                return Err(crate::InstanceError::with_source(
-                    String::from("unable to create GLES 3.x context"),
-                    e,
-                ));
-            }
-        };
+
+        gl_context_attributes.extend(&context_attributes);
+        gles_context_attributes.extend(&context_attributes);
+
+        let context = if supports_opengl {
+            egl.create_context(display, config, None, &gl_context_attributes)
+                .or_else(|_| {
+                    egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap();
+                    egl.create_context(display, config, None, &gles_context_attributes)
+                })
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create OpenGL or GLES 3.x context"),
+                        e,
+                    )
+                })
+        } else {
+            egl.create_context(display, config, None, &gles_context_attributes)
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create GLES 3.x context"),
+                        e,
+                    )
+                })
+        }?;
 
         // Testing if context can be binded without surface
         // and creating dummy pbuffer surface if not.

From 09cc4d211bc919c0f01a245367bc91ec0082f61c Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 12:24:13 +0200
Subject: [PATCH 137/226] remove `Labeled` supertrait of `Trackable`

This is no longer needed since 9c6ae1beae2490ce44d99034f7f1faada936f3d8 removed the usages.
---
 wgpu-core/src/resource.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index c5871ea3ad..54f2b114ec 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -173,7 +173,7 @@ macro_rules! impl_labeled {
     };
 }
 
-pub(crate) trait Trackable: Labeled {
+pub(crate) trait Trackable {
     fn tracker_index(&self) -> TrackerIndex;
 }
 

From 781b54a8b9cee1a2cb22bda565662edec52eb70e Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 12:28:01 +0200
Subject: [PATCH 138/226] remove `TrackingData` from resources that are not
 tracked

---
 wgpu-core/src/binding_model.rs   | 4 ----
 wgpu-core/src/device/resource.rs | 3 ---
 wgpu-core/src/pipeline.rs        | 2 --
 wgpu-core/src/track/mod.rs       | 6 ------
 4 files changed, 15 deletions(-)

diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 91952a8f8a..08d9cda566 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -513,7 +513,6 @@ pub struct BindGroupLayout<A: HalApi> {
     pub(crate) binding_count_validator: BindingTypeMaxCountValidator,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) tracking_data: TrackingData,
 }
 
 impl<A: HalApi> Drop for BindGroupLayout<A> {
@@ -535,7 +534,6 @@ crate::impl_resource_type!(BindGroupLayout);
 crate::impl_labeled!(BindGroupLayout);
 crate::impl_parent_device!(BindGroupLayout);
 crate::impl_storage_item!(BindGroupLayout);
-crate::impl_trackable!(BindGroupLayout);
 
 impl<A: HalApi> BindGroupLayout<A> {
     pub(crate) fn raw(&self) -> &A::BindGroupLayout {
@@ -657,7 +655,6 @@ pub struct PipelineLayout<A: HalApi> {
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) tracking_data: TrackingData,
     pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout<A>>, { hal::MAX_BIND_GROUPS }>,
     pub(crate) push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>,
 }
@@ -769,7 +766,6 @@ crate::impl_resource_type!(PipelineLayout);
 crate::impl_labeled!(PipelineLayout);
 crate::impl_parent_device!(PipelineLayout);
 crate::impl_storage_item!(PipelineLayout);
-crate::impl_trackable!(PipelineLayout);
 
 #[repr(C)]
 #[derive(Clone, Debug, Hash, Eq, PartialEq)]
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 154430a2f8..b3e4e7526b 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1866,7 +1866,6 @@ impl<A: HalApi> Device<A> {
             exclusive_pipeline: OnceCell::new(),
             binding_count_validator: count_validator,
             label: label.to_string(),
-            tracking_data: TrackingData::new(self.tracker_indices.bind_group_layouts.clone()),
         })
     }
 
@@ -2577,7 +2576,6 @@ impl<A: HalApi> Device<A> {
             raw: Some(raw),
             device: self.clone(),
             label: desc.label.to_string(),
-            tracking_data: TrackingData::new(self.tracker_indices.pipeline_layouts.clone()),
             bind_group_layouts,
             push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(),
         })
@@ -3428,7 +3426,6 @@ impl<A: HalApi> Device<A> {
         let cache = pipeline::PipelineCache {
             device: self.clone(),
             label: desc.label.to_string(),
-            tracking_data: TrackingData::new(self.tracker_indices.pipeline_caches.clone()),
             // This would be none in the error condition, which we don't implement yet
             raw: Some(raw),
         };
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 2ab49f83d0..da0a47eeee 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -303,7 +303,6 @@ pub struct PipelineCache<A: HalApi> {
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) tracking_data: TrackingData,
 }
 
 impl<A: HalApi> Drop for PipelineCache<A> {
@@ -322,7 +321,6 @@ crate::impl_resource_type!(PipelineCache);
 crate::impl_labeled!(PipelineCache);
 crate::impl_parent_device!(PipelineCache);
 crate::impl_storage_item!(PipelineCache);
-crate::impl_trackable!(PipelineCache);
 
 /// Describes how the vertex buffer is interpreted.
 #[derive(Clone, Debug)]
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 2784094ef9..bb0d8cee78 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -221,13 +221,10 @@ pub(crate) struct TrackerIndexAllocators {
     pub texture_views: Arc<SharedTrackerIndexAllocator>,
     pub samplers: Arc<SharedTrackerIndexAllocator>,
     pub bind_groups: Arc<SharedTrackerIndexAllocator>,
-    pub bind_group_layouts: Arc<SharedTrackerIndexAllocator>,
     pub compute_pipelines: Arc<SharedTrackerIndexAllocator>,
     pub render_pipelines: Arc<SharedTrackerIndexAllocator>,
-    pub pipeline_layouts: Arc<SharedTrackerIndexAllocator>,
     pub bundles: Arc<SharedTrackerIndexAllocator>,
     pub query_sets: Arc<SharedTrackerIndexAllocator>,
-    pub pipeline_caches: Arc<SharedTrackerIndexAllocator>,
 }
 
 impl TrackerIndexAllocators {
@@ -238,13 +235,10 @@ impl TrackerIndexAllocators {
             texture_views: Arc::new(SharedTrackerIndexAllocator::new()),
             samplers: Arc::new(SharedTrackerIndexAllocator::new()),
             bind_groups: Arc::new(SharedTrackerIndexAllocator::new()),
-            bind_group_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             compute_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
             render_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
-            pipeline_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             bundles: Arc::new(SharedTrackerIndexAllocator::new()),
             query_sets: Arc::new(SharedTrackerIndexAllocator::new()),
-            pipeline_caches: Arc::new(SharedTrackerIndexAllocator::new()),
         }
     }
 }

From 36c998a55879c4c15b771cdd859a05bcff90adc3 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:25:25 +0200
Subject: [PATCH 139/226] change `Device.create_bind_group_layout` to return an
 `Arc<BindGroupLayout<A>`

---
 wgpu-core/src/device/global.rs   |  3 ---
 wgpu-core/src/device/resource.rs | 11 +++++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index cd3d8e5f20..0e3c78c493 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -673,9 +673,6 @@ impl Global {
                 bgl.exclusive_pipeline
                     .set(binding_model::ExclusivePipeline::None)
                     .unwrap();
-
-                let bgl = Arc::new(bgl);
-
                 Ok(bgl)
             });
 
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index b3e4e7526b..69a8d2b44b 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1659,7 +1659,7 @@ impl<A: HalApi> Device<A> {
         label: &crate::Label,
         entry_map: bgl::EntryMap,
         origin: bgl::Origin,
-    ) -> Result<BindGroupLayout<A>, binding_model::CreateBindGroupLayoutError> {
+    ) -> Result<Arc<BindGroupLayout<A>>, binding_model::CreateBindGroupLayoutError> {
         #[derive(PartialEq)]
         enum WritableStorage {
             Yes,
@@ -1858,7 +1858,7 @@ impl<A: HalApi> Device<A> {
             .validate(&self.limits)
             .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?;
 
-        Ok(BindGroupLayout {
+        let bgl = BindGroupLayout {
             raw: Some(raw),
             device: self.clone(),
             entries: entry_map,
@@ -1866,7 +1866,11 @@ impl<A: HalApi> Device<A> {
             exclusive_pipeline: OnceCell::new(),
             binding_count_validator: count_validator,
             label: label.to_string(),
-        })
+        };
+
+        let bgl = Arc::new(bgl);
+
+        Ok(bgl)
     }
 
     pub(crate) fn create_buffer_binding<'a>(
@@ -2607,7 +2611,6 @@ impl<A: HalApi> Device<A> {
                             bgl::Origin::Derived,
                         ) {
                             Ok(bgl) => {
-                                let bgl = Arc::new(bgl);
                                 e.insert(bgl.clone());
                                 Ok(bgl)
                             }

From 47465ddb1c9e0a3e937a5d1cfd81810877ab27ed Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:29:39 +0200
Subject: [PATCH 140/226] change `Device.create_pipeline_layout` to return an
 `Arc<PipelineLayout<A>`

---
 wgpu-core/src/device/global.rs   |  2 +-
 wgpu-core/src/device/resource.rs | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 0e3c78c493..5b31cf4c18 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -761,7 +761,7 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let id = fid.assign(Arc::new(layout));
+            let id = fid.assign(layout);
             api_log!("Device::create_pipeline_layout -> {id:?}");
             return (id, None);
         };
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 69a8d2b44b..c5a44d44d9 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2484,7 +2484,8 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_pipeline_layout(
         self: &Arc<Self>,
         desc: &binding_model::ResolvedPipelineLayoutDescriptor<A>,
-    ) -> Result<binding_model::PipelineLayout<A>, binding_model::CreatePipelineLayoutError> {
+    ) -> Result<Arc<binding_model::PipelineLayout<A>>, binding_model::CreatePipelineLayoutError>
+    {
         use crate::binding_model::CreatePipelineLayoutError as Error;
 
         self.check_is_valid()?;
@@ -2576,13 +2577,17 @@ impl<A: HalApi> Device<A> {
 
         drop(raw_bind_group_layouts);
 
-        Ok(binding_model::PipelineLayout {
+        let layout = binding_model::PipelineLayout {
             raw: Some(raw),
             device: self.clone(),
             label: desc.label.to_string(),
             bind_group_layouts,
             push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(),
-        })
+        };
+
+        let layout = Arc::new(layout);
+
+        Ok(layout)
     }
 
     pub(crate) fn derive_pipeline_layout(
@@ -2628,7 +2633,6 @@ impl<A: HalApi> Device<A> {
         };
 
         let layout = self.create_pipeline_layout(&layout_desc)?;
-        let layout = Arc::new(layout);
         Ok(layout)
     }
 

From 9ce1772f8e356058b11a5bd0123542dbffadf188 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:34:53 +0200
Subject: [PATCH 141/226] change `Device.create_pipeline_cache` to return an
 `Arc<PipelineCache<A>`

---
 wgpu-core/src/device/global.rs   | 2 +-
 wgpu-core/src/device/resource.rs | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 5b31cf4c18..9a084018b7 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1737,7 +1737,7 @@ impl Global {
             let cache = unsafe { device.create_pipeline_cache(desc) };
             match cache {
                 Ok(cache) => {
-                    let id = fid.assign(Arc::new(cache));
+                    let id = fid.assign(cache);
                     api_log!("Device::create_pipeline_cache -> {id:?}");
                     return (id, None);
                 }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index c5a44d44d9..d756c95d35 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3397,7 +3397,7 @@ impl<A: HalApi> Device<A> {
     pub unsafe fn create_pipeline_cache(
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
-    ) -> Result<pipeline::PipelineCache<A>, pipeline::CreatePipelineCacheError> {
+    ) -> Result<Arc<pipeline::PipelineCache<A>>, pipeline::CreatePipelineCacheError> {
         use crate::pipeline_cache;
 
         self.check_is_valid()?;
@@ -3436,6 +3436,9 @@ impl<A: HalApi> Device<A> {
             // This would be none in the error condition, which we don't implement yet
             raw: Some(raw),
         };
+
+        let cache = Arc::new(cache);
+
         Ok(cache)
     }
 

From d8b1c5788a5c3d4012c02c180e5d4651eac13586 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 12:43:26 +0200
Subject: [PATCH 142/226] change `Device.create_command_encoder` to return an
 `Arc<CommandBuffer<A>`

---
 wgpu-core/src/command/mod.rs     |  9 ++-------
 wgpu-core/src/device/global.rs   |  2 +-
 wgpu-core/src/device/resource.rs | 14 ++++++--------
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index ec600d2c60..7d4d86673f 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -339,12 +339,7 @@ impl<A: HalApi> Drop for CommandBuffer<A> {
 }
 
 impl<A: HalApi> CommandBuffer<A> {
-    pub(crate) fn new(
-        encoder: A::CommandEncoder,
-        device: &Arc<Device<A>>,
-        #[cfg(feature = "trace")] enable_tracing: bool,
-        label: &Label,
-    ) -> Self {
+    pub(crate) fn new(encoder: A::CommandEncoder, device: &Arc<Device<A>>, label: &Label) -> Self {
         CommandBuffer {
             device: device.clone(),
             support_clear_texture: device.features.contains(wgt::Features::CLEAR_TEXTURE),
@@ -364,7 +359,7 @@ impl<A: HalApi> CommandBuffer<A> {
                     texture_memory_actions: Default::default(),
                     pending_query_resets: QueryResetMap::new(),
                     #[cfg(feature = "trace")]
-                    commands: if enable_tracing {
+                    commands: if device.trace.lock().is_some() {
                         Some(Vec::new())
                     } else {
                         None
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 9a084018b7..1581e0d68f 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1100,7 +1100,7 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let id = fid.assign(Arc::new(command_buffer));
+            let id = fid.assign(command_buffer);
             api_log!("Device::create_command_encoder -> {id:?}");
             return (id.into_command_encoder_id(), None);
         };
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index d756c95d35..311f262e05 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1599,7 +1599,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_command_encoder(
         self: &Arc<Self>,
         label: &crate::Label,
-    ) -> Result<command::CommandBuffer<A>, DeviceError> {
+    ) -> Result<Arc<command::CommandBuffer<A>>, DeviceError> {
         self.check_is_valid()?;
 
         let queue = self.get_queue().unwrap();
@@ -1608,13 +1608,11 @@ impl<A: HalApi> Device<A> {
             .command_allocator
             .acquire_encoder(self.raw(), queue.raw())?;
 
-        Ok(command::CommandBuffer::new(
-            encoder,
-            self,
-            #[cfg(feature = "trace")]
-            self.trace.lock().is_some(),
-            label,
-        ))
+        let command_buffer = command::CommandBuffer::new(encoder, self, label);
+
+        let command_buffer = Arc::new(command_buffer);
+
+        Ok(command_buffer)
     }
 
     /// Generate information about late-validated buffer bindings for pipelines.

From f6a3eef77e216597bcac64026f0ff7173c1d925b Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:05:42 +0200
Subject: [PATCH 143/226] change `Device.create_shader_module` to return an
 `Arc<ShaderModule<A>`

---
 wgpu-core/src/device/global.rs   | 10 +++-------
 wgpu-core/src/device/resource.rs | 20 ++++++++++++++------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 1581e0d68f..0aab087ad4 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -28,11 +28,7 @@ use hal::Device as _;
 
 use wgt::{BufferAddress, TextureFormat};
 
-use std::{
-    borrow::Cow,
-    ptr::NonNull,
-    sync::{atomic::Ordering, Arc},
-};
+use std::{borrow::Cow, ptr::NonNull, sync::atomic::Ordering};
 
 use super::{ImplicitPipelineIds, UserClosures};
 
@@ -996,7 +992,7 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let id = fid.assign(Arc::new(shader));
+            let id = fid.assign(shader);
             api_log!("Device::create_shader_module -> {id:?}");
             return (id, None);
         };
@@ -1050,7 +1046,7 @@ impl Global {
                 Ok(shader) => shader,
                 Err(e) => break 'error e,
             };
-            let id = fid.assign(Arc::new(shader));
+            let id = fid.assign(shader);
             api_log!("Device::create_shader_module_spirv -> {id:?}");
             return (id, None);
         };
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 311f262e05..c9105fd3a4 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1429,7 +1429,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: pipeline::ShaderModuleSource<'a>,
-    ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule<A>>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         let (module, source) = match source {
@@ -1546,12 +1546,16 @@ impl<A: HalApi> Device<A> {
             }
         };
 
-        Ok(pipeline::ShaderModule {
+        let module = pipeline::ShaderModule {
             raw: Some(raw),
             device: self.clone(),
             interface: Some(interface),
             label: desc.label.to_string(),
-        })
+        };
+
+        let module = Arc::new(module);
+
+        Ok(module)
     }
 
     #[allow(unused_unsafe)]
@@ -1559,7 +1563,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: &'a [u32],
-    ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule<A>>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         self.require_features(wgt::Features::SPIRV_SHADER_PASSTHROUGH)?;
@@ -1588,12 +1592,16 @@ impl<A: HalApi> Device<A> {
             }
         };
 
-        Ok(pipeline::ShaderModule {
+        let module = pipeline::ShaderModule {
             raw: Some(raw),
             device: self.clone(),
             interface: None,
             label: desc.label.to_string(),
-        })
+        };
+
+        let module = Arc::new(module);
+
+        Ok(module)
     }
 
     pub(crate) fn create_command_encoder(

From 34b0df277c60c44520abb324e23ca5ddfc53478c Mon Sep 17 00:00:00 2001
From: Mehmet Oguz Derin <mehmetoguzderin@mehmetoguzderin.com>
Date: Sat, 10 Aug 2024 19:02:29 +0900
Subject: [PATCH 144/226] Support `texture-compression-bc-sliced-3d` in wgpu
 (#5751)

---
 deno_webgpu/01_webgpu.js                      |  1 +
 deno_webgpu/lib.rs                            |  9 +++
 deno_webgpu/webgpu.idl                        |  1 +
 tests/tests/clear_texture.rs                  | 12 +++-
 wgpu-core/src/device/resource.rs              | 19 ++++++-
 wgpu-hal/src/dx12/adapter.rs                  |  1 +
 wgpu-hal/src/gles/adapter.rs                  |  4 ++
 wgpu-hal/src/metal/adapter.rs                 |  1 +
 wgpu-hal/src/vulkan/adapter.rs                |  5 ++
 wgpu-types/src/lib.rs                         | 55 +++++++++++++++----
 wgpu/src/backend/webgpu.rs                    |  6 +-
 .../webgpu/webgpu_sys/gen_GpuFeatureName.rs   |  1 +
 12 files changed, 101 insertions(+), 14 deletions(-)

diff --git a/deno_webgpu/01_webgpu.js b/deno_webgpu/01_webgpu.js
index f226c8ab5f..b5bf0afc7a 100644
--- a/deno_webgpu/01_webgpu.js
+++ b/deno_webgpu/01_webgpu.js
@@ -5071,6 +5071,7 @@ webidl.converters["GPUFeatureName"] = webidl.createEnumConverter(
     // texture formats
     "depth32float-stencil8",
     "texture-compression-bc",
+    "texture-compression-bc-sliced-3d",
     "texture-compression-etc2",
     "texture-compression-astc",
     "rg11b10ufloat-renderable",
diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index aafb225fb9..c1822ee2bc 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -248,6 +248,9 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> {
     if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_BC) {
         return_features.push("texture-compression-bc");
     }
+    if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_BC_SLICED_3D) {
+        return_features.push("texture-compression-bc-sliced-3d");
+    }
     if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_ETC2) {
         return_features.push("texture-compression-etc2");
     }
@@ -491,6 +494,12 @@ impl From<GpuRequiredFeatures> for wgpu_types::Features {
             wgpu_types::Features::TEXTURE_COMPRESSION_BC,
             required_features.0.contains("texture-compression-bc"),
         );
+        features.set(
+            wgpu_types::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            required_features
+                .0
+                .contains("texture-compression-bc-sliced-3d"),
+        );
         features.set(
             wgpu_types::Features::TEXTURE_COMPRESSION_ETC2,
             required_features.0.contains("texture-compression-etc2"),
diff --git a/deno_webgpu/webgpu.idl b/deno_webgpu/webgpu.idl
index 07d9d60ec7..41949feb1f 100644
--- a/deno_webgpu/webgpu.idl
+++ b/deno_webgpu/webgpu.idl
@@ -97,6 +97,7 @@ enum GPUFeatureName {
     // texture formats
     "depth32float-stencil8",
     "texture-compression-bc",
+    "texture-compression-bc-sliced-3d",
     "texture-compression-etc2",
     "texture-compression-astc",
     // api
diff --git a/tests/tests/clear_texture.rs b/tests/tests/clear_texture.rs
index 175c642b93..5e7d86ed88 100644
--- a/tests/tests/clear_texture.rs
+++ b/tests/tests/clear_texture.rs
@@ -273,7 +273,7 @@ async fn clear_texture_tests(ctx: TestingContext, formats: &'static [wgpu::Textu
         let is_compressed_or_depth_stencil_format =
             format.is_compressed() || format.is_depth_stencil_format();
         let supports_1d = !is_compressed_or_depth_stencil_format;
-        let supports_3d = !is_compressed_or_depth_stencil_format;
+        let supports_3d = format.is_bcn() || !is_compressed_or_depth_stencil_format;
 
         // 1D texture
         if supports_1d {
@@ -385,7 +385,15 @@ static CLEAR_TEXTURE_DEPTH32_STENCIL8: GpuTestConfiguration = GpuTestConfigurati
 static CLEAR_TEXTURE_COMPRESSED_BCN: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
-            .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_BC)
+            .features(
+                wgpu::Features::CLEAR_TEXTURE
+                    | wgpu::Features::TEXTURE_COMPRESSION_BC
+                    | wgpu::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            )
+            .limits(wgpu::Limits {
+                max_texture_dimension_3d: 1024,
+                ..wgpu::Limits::downlevel_defaults()
+            })
             // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
             .expect_fail(FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"))
             // compressed texture copy to buffer not yet implemented
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index c9105fd3a4..463ede2cde 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -745,8 +745,12 @@ impl<A: HalApi> Device<A> {
                     desc.dimension,
                 ));
             }
+        }
 
-            // Compressed textures can only be 2D
+        if desc.dimension != wgt::TextureDimension::D2
+            && desc.dimension != wgt::TextureDimension::D3
+        {
+            // Compressed textures can only be 2D or 3D
             if desc.format.is_compressed() {
                 return Err(CreateTextureError::InvalidCompressedDimension(
                     desc.dimension,
@@ -777,6 +781,19 @@ impl<A: HalApi> Device<A> {
                     },
                 ));
             }
+
+            if desc.dimension == wgt::TextureDimension::D3 {
+                // Only BCn formats with Sliced 3D feature can be used for 3D textures
+                if desc.format.is_bcn() {
+                    self.require_features(wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D)
+                        .map_err(|error| CreateTextureError::MissingFeatures(desc.format, error))?;
+                } else {
+                    return Err(CreateTextureError::InvalidCompressedDimension(
+                        desc.dimension,
+                        desc.format,
+                    ));
+                }
+            }
         }
 
         {
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index cb2636611b..72b9d04b71 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -299,6 +299,7 @@ impl super::Adapter {
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES
             | wgt::Features::TEXTURE_COMPRESSION_BC
+            | wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D
             | wgt::Features::CLEAR_TEXTURE
             | wgt::Features::TEXTURE_FORMAT_16BIT_NORM
             | wgt::Features::PUSH_CONSTANTS
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index 1cda99b338..bd2410e273 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -503,6 +503,10 @@ impl super::Adapter {
             wgt::Features::TEXTURE_COMPRESSION_BC,
             bcn_exts.iter().all(|&ext| extensions.contains(ext)),
         );
+        features.set(
+            wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            bcn_exts.iter().all(|&ext| extensions.contains(ext)), // BC guaranteed Sliced 3D
+        );
         let has_etc = if cfg!(any(webgl, Emscripten)) {
             extensions.contains("WEBGL_compressed_texture_etc")
         } else {
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index 924902517f..7e0043790c 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -876,6 +876,7 @@ impl super::PrivateCapabilities {
         features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
         features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
         features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc);
+        features.set(F::TEXTURE_COMPRESSION_BC_SLICED_3D, self.format_bc); // BC guarantees Sliced 3D
         features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc);
 
         features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control);
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 215c0dd958..22b897f09b 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -253,6 +253,7 @@ impl PhysicalDeviceFeatures {
                 )
                 .texture_compression_bc(
                     requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_BC),
+                    // BC provides formats for Sliced 3D
                 )
                 //.occlusion_query_precise(requested_features.contains(wgt::Features::PRECISE_OCCLUSION_QUERY))
                 .pipeline_statistics_query(
@@ -539,6 +540,10 @@ impl PhysicalDeviceFeatures {
             F::TEXTURE_COMPRESSION_BC,
             self.core.texture_compression_bc != 0,
         );
+        features.set(
+            F::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            self.core.texture_compression_bc != 0, // BC guarantees Sliced 3D
+        );
         features.set(
             F::PIPELINE_STATISTICS_QUERY,
             self.core.pipeline_statistics_query != 0,
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index da8e6ff495..6cf007e2f9 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -290,12 +290,28 @@ bitflags::bitflags! {
         /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for BCn formats.
         /// [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] may enable additional usages.
         ///
+        /// This feature guarantees availability of sliced-3d textures for BC formats when combined with TEXTURE_COMPRESSION_BC_SLICED_3D.
+        ///
         /// Supported Platforms:
         /// - desktops
+        /// - Mobile (All Apple9 and some Apple7 and Apple8 devices)
         ///
         /// This is a web and native feature.
         const TEXTURE_COMPRESSION_BC = 1 << 2;
 
+
+        /// Allows the 3d dimension for textures with BC compressed formats.
+        ///
+        /// This feature must be used in combination with TEXTURE_COMPRESSION_BC to enable 3D textures with BC compression.
+        /// It does not enable the BC formats by itself.
+        ///
+        /// Supported Platforms:
+        /// - desktops
+        /// - Mobile (All Apple9 and some Apple7 and Apple8 devices)
+        ///
+        /// This is a web and native feature.
+        const TEXTURE_COMPRESSION_BC_SLICED_3D = 1 << 3;
+
         /// Enables ETC family of compressed textures. All ETC textures use 4x4 pixel blocks.
         /// ETC2 RGB and RGBA1 are 8 bytes per block. RTC2 RGBA8 and EAC are 16 bytes per block.
         ///
@@ -310,7 +326,7 @@ bitflags::bitflags! {
         /// - Mobile (some)
         ///
         /// This is a web and native feature.
-        const TEXTURE_COMPRESSION_ETC2 = 1 << 3;
+        const TEXTURE_COMPRESSION_ETC2 = 1 << 4;
 
         /// Enables ASTC family of compressed textures. ASTC textures use pixel blocks varying from 4x4 to 12x12.
         /// Blocks are always 16 bytes.
@@ -326,7 +342,7 @@ bitflags::bitflags! {
         /// - Mobile (some)
         ///
         /// This is a web and native feature.
-        const TEXTURE_COMPRESSION_ASTC = 1 << 4;
+        const TEXTURE_COMPRESSION_ASTC = 1 << 5;
 
         /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when
         /// all work before the query is finished.
@@ -350,7 +366,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const TIMESTAMP_QUERY = 1 << 5;
+        const TIMESTAMP_QUERY = 1 << 6;
 
         /// Allows non-zero value for the `first_instance` member in indirect draw calls.
         ///
@@ -369,7 +385,7 @@ bitflags::bitflags! {
         /// - OpenGL ES / WebGL
         ///
         /// This is a web and native feature.
-        const INDIRECT_FIRST_INSTANCE = 1 << 6;
+        const INDIRECT_FIRST_INSTANCE = 1 << 7;
 
         /// Allows shaders to acquire the FP16 ability
         ///
@@ -380,7 +396,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const SHADER_F16 = 1 << 7;
+        const SHADER_F16 = 1 << 8;
 
 
         /// Allows for usage of textures of format [`TextureFormat::Rg11b10Float`] as a render target
@@ -391,7 +407,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const RG11B10UFLOAT_RENDERABLE = 1 << 8;
+        const RG11B10UFLOAT_RENDERABLE = 1 << 9;
 
         /// Allows the [`wgpu::TextureUsages::STORAGE_BINDING`] usage on textures with format [`TextureFormat::Bgra8unorm`]
         ///
@@ -401,7 +417,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const BGRA8UNORM_STORAGE = 1 << 9;
+        const BGRA8UNORM_STORAGE = 1 << 10;
 
 
         /// Allows textures with formats "r32float", "rg32float", and "rgba32float" to be filterable.
@@ -413,9 +429,9 @@ bitflags::bitflags! {
         /// - GL with one of `GL_ARB_color_buffer_float`/`GL_EXT_color_buffer_float`/`OES_texture_float_linear`
         ///
         /// This is a web and native feature.
-        const FLOAT32_FILTERABLE = 1 << 10;
+        const FLOAT32_FILTERABLE = 1 << 11;
 
-        // Bits 11-19 available for webgpu features. Should you chose to use some of them for
+        // Bits 12-19 available for webgpu features. Should you chose to use some of them for
         // for native features, don't forget to update `all_webgpu_mask` and `all_native_mask`
         // accordingly.
 
@@ -2562,13 +2578,14 @@ pub enum TextureFormat {
     /// [`Features::TEXTURE_FORMAT_NV12`] must be enabled to use this texture format.
     NV12,
 
-    // Compressed textures usable with `TEXTURE_COMPRESSION_BC` feature.
+    // Compressed textures usable with `TEXTURE_COMPRESSION_BC` feature. `TEXTURE_COMPRESSION_SLICED_3D` is required to use with 3D textures.
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 4 color + alpha pallet. 5 bit R + 6 bit G + 5 bit B + 1 bit alpha.
     /// [0, 63] ([0, 1] for alpha) converted to/from float [0, 1] in shader.
     ///
     /// Also known as DXT1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc1RgbaUnorm,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 4 color + alpha pallet. 5 bit R + 6 bit G + 5 bit B + 1 bit alpha.
     /// Srgb-color [0, 63] ([0, 1] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2576,6 +2593,7 @@ pub enum TextureFormat {
     /// Also known as DXT1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc1RgbaUnormSrgb,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet. 5 bit R + 6 bit G + 5 bit B + 4 bit alpha.
     /// [0, 63] ([0, 15] for alpha) converted to/from float [0, 1] in shader.
@@ -2583,6 +2601,7 @@ pub enum TextureFormat {
     /// Also known as DXT3.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc2RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet. 5 bit R + 6 bit G + 5 bit B + 4 bit alpha.
     /// Srgb-color [0, 63] ([0, 255] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2590,6 +2609,7 @@ pub enum TextureFormat {
     /// Also known as DXT3.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc2RgbaUnormSrgb,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet + 8 alpha pallet. 5 bit R + 6 bit G + 5 bit B + 8 bit alpha.
     /// [0, 63] ([0, 255] for alpha) converted to/from float [0, 1] in shader.
@@ -2597,6 +2617,7 @@ pub enum TextureFormat {
     /// Also known as DXT5.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc3RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet + 8 alpha pallet. 5 bit R + 6 bit G + 5 bit B + 8 bit alpha.
     /// Srgb-color [0, 63] ([0, 255] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2604,6 +2625,7 @@ pub enum TextureFormat {
     /// Also known as DXT5.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc3RgbaUnormSrgb,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 8 color pallet. 8 bit R.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2611,6 +2633,7 @@ pub enum TextureFormat {
     /// Also known as RGTC1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc4RUnorm,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 8 color pallet. 8 bit R.
     /// [-127, 127] converted to/from float [-1, 1] in shader.
@@ -2618,6 +2641,7 @@ pub enum TextureFormat {
     /// Also known as RGTC1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc4RSnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 8 color red pallet + 8 color green pallet. 8 bit RG.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2625,6 +2649,7 @@ pub enum TextureFormat {
     /// Also known as RGTC2.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc5RgUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 8 color red pallet + 8 color green pallet. 8 bit RG.
     /// [-127, 127] converted to/from float [-1, 1] in shader.
@@ -2632,18 +2657,21 @@ pub enum TextureFormat {
     /// Also known as RGTC2.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc5RgSnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 16 bit unsigned float RGB. Float in shader.
     ///
     /// Also known as BPTC (float).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc6hRgbUfloat,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 16 bit signed float RGB. Float in shader.
     ///
     /// Also known as BPTC (float).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc6hRgbFloat,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 8 bit integer RGBA.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2651,6 +2679,7 @@ pub enum TextureFormat {
     /// Also known as BPTC (unorm).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc7RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 8 bit integer RGBA.
     /// Srgb-color [0, 255] converted to/from linear-color float [0, 1] in shader.
@@ -2658,6 +2687,7 @@ pub enum TextureFormat {
     /// Also known as BPTC (unorm).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc7RgbaUnormSrgb,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). Complex pallet. 8 bit integer RGB.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -3201,6 +3231,11 @@ impl TextureFormat {
         self.block_dimensions() != (1, 1)
     }
 
+    /// Returns `true` for BCn compressed formats.
+    pub fn is_bcn(&self) -> bool {
+        self.required_features() == Features::TEXTURE_COMPRESSION_BC
+    }
+
     /// Returns the required features (if any) in order to use the texture.
     pub fn required_features(&self) -> Features {
         match *self {
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 573db58a83..d008093132 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -726,7 +726,7 @@ fn map_map_mode(mode: crate::MapMode) -> u32 {
     }
 }
 
-const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 11] = [
+const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 12] = [
     //TODO: update the name
     (
         wgt::Features::DEPTH_CLIP_CONTROL,
@@ -740,6 +740,10 @@ const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 11] = [
         wgt::Features::TEXTURE_COMPRESSION_BC,
         webgpu_sys::GpuFeatureName::TextureCompressionBc,
     ),
+    (
+        wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+        webgpu_sys::GpuFeatureName::TextureCompressionBcSliced3d,
+    ),
     (
         wgt::Features::TEXTURE_COMPRESSION_ETC2,
         webgpu_sys::GpuFeatureName::TextureCompressionEtc2,
diff --git a/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs b/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
index ed39a14c51..ef2119a88b 100644
--- a/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
+++ b/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
@@ -21,6 +21,7 @@ pub enum GpuFeatureName {
     DepthClipControl = "depth-clip-control",
     Depth32floatStencil8 = "depth32float-stencil8",
     TextureCompressionBc = "texture-compression-bc",
+    TextureCompressionBcSliced3d = "texture-compression-bc-sliced-3d",
     TextureCompressionEtc2 = "texture-compression-etc2",
     TextureCompressionAstc = "texture-compression-astc",
     TimestampQuery = "timestamp-query",

From 28be38c73b585f4f82b86fbd661c587b6726570d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 11 Aug 2024 14:21:29 +0200
Subject: [PATCH 145/226] build(deps): bump crate-ci/typos from 1.23.5 to
 1.23.6 (#6078)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.5 to 1.23.6.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.23.5...v1.23.6)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index baa6d1be8e..8034cd47cc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -632,7 +632,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.23.5
+        uses: crate-ci/typos@v1.23.6
 
   check-cts-runner:
     # runtime is normally 2 minutes

From 28e15dcce184a52646b4e8fd9c8258cd0880ef3b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 09:18:09 +0200
Subject: [PATCH 146/226] build(deps): bump the patch-updates group with 13
 updates (#6102)

Bumps the patch-updates group with 13 updates:

| Package | From | To |
| --- | --- | --- |
| [serde](https://github.com/serde-rs/serde) | `1.0.204` | `1.0.206` |
| [serde_json](https://github.com/serde-rs/json) | `1.0.122` | `1.0.124` |
| [syn](https://github.com/dtolnay/syn) | `2.0.72` | `2.0.74` |
| [cc](https://github.com/rust-lang/cc-rs) | `1.1.7` | `1.1.10` |
| [clap](https://github.com/clap-rs/clap) | `4.5.13` | `4.5.15` |
| [clap_builder](https://github.com/clap-rs/clap) | `4.5.13` | `4.5.15` |
| [core-foundation-sys](https://github.com/servo/core-foundation-rs) | `0.8.6` | `0.8.7` |
| [object](https://github.com/gimli-rs/object) | `0.36.2` | `0.36.3` |
| [polling](https://github.com/smol-rs/polling) | `3.7.2` | `3.7.3` |
| [serde_derive](https://github.com/serde-rs/serde) | `1.0.204` | `1.0.206` |
| [ttf-parser](https://github.com/RazrFalcon/ttf-parser) | `0.24.0` | `0.24.1` |
| [xcursor](https://github.com/esposm03/xcursor-rs) | `0.3.6` | `0.3.7` |
| [xml-rs](https://github.com/kornelski/xml-rs) | `0.8.20` | `0.8.21` |


Updates `serde` from 1.0.204 to 1.0.206
- [Release notes](https://github.com/serde-rs/serde/releases)
- [Commits](https://github.com/serde-rs/serde/compare/v1.0.204...v1.0.206)

Updates `serde_json` from 1.0.122 to 1.0.124
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.122...v1.0.124)

Updates `syn` from 2.0.72 to 2.0.74
- [Release notes](https://github.com/dtolnay/syn/releases)
- [Commits](https://github.com/dtolnay/syn/compare/2.0.72...2.0.74)

Updates `cc` from 1.1.7 to 1.1.10
- [Release notes](https://github.com/rust-lang/cc-rs/releases)
- [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.1.7...cc-v1.1.10)

Updates `clap` from 4.5.13 to 4.5.15
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.13...v4.5.15)

Updates `clap_builder` from 4.5.13 to 4.5.15
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.5.13...v4.5.15)

Updates `core-foundation-sys` from 0.8.6 to 0.8.7
- [Commits](https://github.com/servo/core-foundation-rs/compare/core-foundation-sys-v0.8.6...core-foundation-sys-v0.8.7)

Updates `object` from 0.36.2 to 0.36.3
- [Changelog](https://github.com/gimli-rs/object/blob/master/CHANGELOG.md)
- [Commits](https://github.com/gimli-rs/object/compare/0.36.2...0.36.3)

Updates `polling` from 3.7.2 to 3.7.3
- [Release notes](https://github.com/smol-rs/polling/releases)
- [Changelog](https://github.com/smol-rs/polling/blob/master/CHANGELOG.md)
- [Commits](https://github.com/smol-rs/polling/compare/v3.7.2...v3.7.3)

Updates `serde_derive` from 1.0.204 to 1.0.206
- [Release notes](https://github.com/serde-rs/serde/releases)
- [Commits](https://github.com/serde-rs/serde/compare/v1.0.204...v1.0.206)

Updates `ttf-parser` from 0.24.0 to 0.24.1
- [Changelog](https://github.com/RazrFalcon/ttf-parser/blob/master/CHANGELOG.md)
- [Commits](https://github.com/RazrFalcon/ttf-parser/commits)

Updates `xcursor` from 0.3.6 to 0.3.7
- [Commits](https://github.com/esposm03/xcursor-rs/commits)

Updates `xml-rs` from 0.8.20 to 0.8.21
- [Changelog](https://github.com/kornelski/xml-rs/blob/main/Changelog.md)
- [Commits](https://github.com/kornelski/xml-rs/compare/0.8.20...0.8.21)

---
updated-dependencies:
- dependency-name: serde
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: serde_json
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: syn
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: cc
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: clap_builder
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: core-foundation-sys
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: object
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: polling
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: serde_derive
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: ttf-parser
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: xcursor
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
- dependency-name: xml-rs
  dependency-type: indirect
  update-type: version-update:semver-patch
  dependency-group: patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock      | 115 ++++++++++++++++++++++++++----------------------
 Cargo.toml      |   2 +-
 naga/Cargo.toml |   2 +-
 3 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6040c0a442..c010f70450 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -186,7 +186,7 @@ dependencies = [
  "argh_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -242,7 +242,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -385,7 +385,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -448,9 +448,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.7"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
+checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
 dependencies = [
  "jobserver",
  "libc",
@@ -512,9 +512,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.13"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
+checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -522,9 +522,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.13"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
 dependencies = [
  "anstream",
  "anstyle",
@@ -541,7 +541,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -727,9 +727,9 @@ dependencies = [
 
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "core-graphics"
@@ -886,7 +886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1033,7 +1033,7 @@ dependencies = [
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.72",
+ "syn 2.0.74",
  "thiserror",
 ]
 
@@ -1106,7 +1106,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1119,7 +1119,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustc_version 0.4.0",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1207,7 +1207,7 @@ checksum = "b36f2ddfca91251bed7f931f24b192e4eaf0a0e0fa70cf81cfb1416a1973620e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1353,7 +1353,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1478,7 +1478,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2466,7 +2466,7 @@ dependencies = [
  "proc-macro-crate 3.1.0",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2526,9 +2526,9 @@ checksum = "d079845b37af429bfe5dfa76e6d087d788031045b25cfc6fd898486fd9847666"
 
 [[package]]
 name = "object"
-version = "0.36.2"
+version = "0.36.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e"
+checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
 dependencies = [
  "memchr",
 ]
@@ -2667,7 +2667,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2745,9 +2745,9 @@ dependencies = [
 
 [[package]]
 name = "polling"
-version = "3.7.2"
+version = "3.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3ed00ed3fbf728b5816498ecd316d1716eecaced9c0c8d2c5a6740ca214985b"
+checksum = "cc2790cd301dec6cd3b7a025e4815cf825724a51c98dccfe6a3e55f05ffb6511"
 dependencies = [
  "cfg-if",
  "concurrent-queue",
@@ -2755,7 +2755,7 @@ dependencies = [
  "pin-project-lite",
  "rustix",
  "tracing",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2806,7 +2806,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f"
 dependencies = [
  "proc-macro-rules-macros",
  "proc-macro2",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2818,7 +2818,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3156,29 +3156,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.204"
+version = "1.0.206"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
+checksum = "5b3e4cd94123dd520a128bcd11e34d9e9e423e7e3e50425cb1b4b1e3549d0284"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.204"
+version = "1.0.206"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
+checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.122"
+version = "1.0.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
+checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
 dependencies = [
  "indexmap",
  "itoa",
@@ -3439,7 +3439,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3455,9 +3455,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.72"
+version = "2.0.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3490,7 +3490,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3623,7 +3623,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3725,9 +3725,9 @@ dependencies = [
 
 [[package]]
 name = "ttf-parser"
-version = "0.24.0"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8686b91785aff82828ed725225925b33b4fde44c4bb15876e5f7c832724c420a"
+checksum = "5be21190ff5d38e8b4a2d3b6a3ae57f612cc39c96e83cedeaf7abc338a8bac4a"
 
 [[package]]
 name = "unic-char-property"
@@ -3932,7 +3932,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
  "wasm-bindgen-shared",
 ]
 
@@ -3966,7 +3966,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3999,7 +3999,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -4380,7 +4380,7 @@ version = "22.0.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -4510,7 +4510,7 @@ checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -4521,7 +4521,7 @@ checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -4583,6 +4583,15 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.42.2"
@@ -4924,9 +4933,9 @@ checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d"
 
 [[package]]
 name = "xcursor"
-version = "0.3.6"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d491ee231a51ae64a5b762114c3ac2104b967aadba1de45c86ca42cf051513b7"
+checksum = "f513f231f0810b04d988f0df4fb16ef0b6b25d23248f2c4b56b074e6b1b0ffe4"
 
 [[package]]
 name = "xkbcommon-dl"
@@ -4949,9 +4958,9 @@ checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56"
 
 [[package]]
 name = "xml-rs"
-version = "0.8.20"
+version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193"
+checksum = "539a77ee7c0de333dcc6da69b177380a0b81e0dacfa4f7344c465a36871ee601"
 
 [[package]]
 name = "zerocopy"
@@ -4970,5 +4979,5 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index f049821350..04b26b8044 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -121,7 +121,7 @@ renderdoc-sys = "1.1.0"
 ron = "0.8"
 rustc-hash = "1.1.0"
 serde = "1"
-serde_json = "1.0.122"
+serde_json = "1.0.124"
 smallvec = "1"
 static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 9a4182bc7e..cd6bd5e9af 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -72,7 +72,7 @@ indexmap.workspace = true
 log = "0.4"
 spirv = { version = "0.3", optional = true }
 thiserror.workspace = true
-serde = { version = "1.0.204", features = ["derive"], optional = true }
+serde = { version = "1.0.206", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }
 hexf-parse = { version = "0.2.1", optional = true }

From 94f54b3dc8bf527fe3cf23753d7614bb6caf21c7 Mon Sep 17 00:00:00 2001
From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
Date: Mon, 12 Aug 2024 09:20:36 +0200
Subject: [PATCH 147/226] Add a separate pipeline constants error (#6094)

---
 wgpu-core/src/device/resource.rs | 6 ++++++
 wgpu-core/src/pipeline.rs        | 7 +++++++
 wgpu-hal/src/dx12/device.rs      | 2 +-
 wgpu-hal/src/gles/device.rs      | 2 +-
 wgpu-hal/src/lib.rs              | 2 ++
 wgpu-hal/src/metal/device.rs     | 2 +-
 wgpu-hal/src/vulkan/device.rs    | 4 +++-
 7 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 463ede2cde..621149404f 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2762,6 +2762,9 @@ impl<A: HalApi> Device<A> {
             hal::PipelineError::EntryPoint(_stage) => {
                 pipeline::CreateComputePipelineError::Internal(ENTRYPOINT_FAILURE_ERROR.to_string())
             }
+            hal::PipelineError::PipelineConstants(_stages, msg) => {
+                pipeline::CreateComputePipelineError::PipelineConstants(msg)
+            }
         })?;
 
         let pipeline = pipeline::ComputePipeline {
@@ -3343,6 +3346,9 @@ impl<A: HalApi> Device<A> {
                     error: ENTRYPOINT_FAILURE_ERROR.to_string(),
                 }
             }
+            hal::PipelineError::PipelineConstants(stage, error) => {
+                pipeline::CreateRenderPipelineError::PipelineConstants { stage, error }
+            }
         })?;
 
         let pass_context = RenderPassContext {
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index da0a47eeee..68e92ca4b6 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -234,6 +234,8 @@ pub enum CreateComputePipelineError {
     Stage(#[from] validation::StageError),
     #[error("Internal error: {0}")]
     Internal(String),
+    #[error("Pipeline constant error: {0}")]
+    PipelineConstants(String),
     #[error(transparent)]
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
@@ -525,6 +527,11 @@ pub enum CreateRenderPipelineError {
         stage: wgt::ShaderStages,
         error: String,
     },
+    #[error("Pipeline constant error in {stage:?} shader: {error}")]
+    PipelineConstants {
+        stage: wgt::ShaderStages,
+        error: String,
+    },
     #[error("In the provided shader, the type given for group {group} binding {binding} has a size of {size}. As the device does not support `DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED`, the type must have a size that is a multiple of 16 bytes.")]
     UnalignedShader { group: u32, binding: u32, size: u64 },
     #[error("Using the blend factor {factor:?} for render target {target} is not possible. Only the first render target may be used when dual-source blending.")]
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 8012086a90..b3204a8cc0 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -234,7 +234,7 @@ impl super::Device {
             &stage.module.naga.info,
             stage.constants,
         )
-        .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?;
+        .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("HLSL: {e:?}")))?;
 
         let needs_temp_options = stage.zero_initialize_workgroup_memory
             != layout.naga_options.zero_initialize_workgroup_memory;
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 0f8c381b5a..77c08c8ce0 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -223,7 +223,7 @@ impl super::Device {
         )
         .map_err(|e| {
             let msg = format!("{e}");
-            crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg)
+            crate::PipelineError::PipelineConstants(map_naga_stage(naga_stage), msg)
         })?;
 
         let entry_point_index = module
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 9b6d49135e..bd60b029e0 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -321,6 +321,8 @@ pub enum PipelineError {
     EntryPoint(naga::ShaderStage),
     #[error(transparent)]
     Device(#[from] DeviceError),
+    #[error("Pipeline constant error for stage {0:?}: {1}")]
+    PipelineConstants(wgt::ShaderStages, String),
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 18b9c2dba5..4ca392bc1f 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -112,7 +112,7 @@ impl super::Device {
             &stage.module.naga.info,
             stage.constants,
         )
-        .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?;
+        .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("MSL: {:?}", e)))?;
 
         let ep_resources = &layout.per_stage_map[naga_stage];
 
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 2f2e045fda..a71263df50 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -764,7 +764,9 @@ impl super::Device {
                     &naga_shader.info,
                     stage.constants,
                 )
-                .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
+                .map_err(|e| {
+                    crate::PipelineError::PipelineConstants(stage_flags, format!("{e}"))
+                })?;
 
                 let spv = {
                     profiling::scope!("naga::spv::write_vec");

From 5617f0fd1783f2302370c8d73b9cee856e760d17 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 18:15:43 +0200
Subject: [PATCH 148/226] call `flush_mapped_ranges` when unmapping
 write-mapped buffers

I'm not sure how things worked without this.
---
 wgpu-core/src/device/global.rs   |  6 ++++--
 wgpu-core/src/device/life.rs     | 11 +++++++----
 wgpu-core/src/device/mod.rs      |  6 +++---
 wgpu-core/src/device/resource.rs |  9 ++++++---
 wgpu-core/src/resource.rs        | 15 +++++++++++----
 5 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 0aab087ad4..1f70ee09ed 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2418,7 +2418,9 @@ impl Global {
                 Ok((ptr, range_size))
             }
             resource::BufferMapState::Active {
-                ref ptr, ref range, ..
+                ref mapping,
+                ref range,
+                ..
             } => {
                 if offset < range.start {
                     return Err(BufferAccessError::OutOfBoundsUnderrun {
@@ -2437,7 +2439,7 @@ impl Global {
                 let relative_offset = (offset - range.start) as isize;
                 unsafe {
                     Ok((
-                        NonNull::new_unchecked(ptr.as_ptr().offset(relative_offset)),
+                        NonNull::new_unchecked(mapping.ptr.as_ptr().offset(relative_offset)),
                         range_size,
                     ))
                 }
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 1bb687d7e2..7408c184dc 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -391,10 +391,10 @@ impl<A: HalApi> LifetimeTracker<A> {
                     host,
                     snatch_guard,
                 ) {
-                    Ok(ptr) => {
+                    Ok(mapping) => {
                         *buffer.map_state.lock() = resource::BufferMapState::Active {
-                            ptr,
-                            range: pending_mapping.range.start..pending_mapping.range.start + size,
+                            mapping,
+                            range: pending_mapping.range.clone(),
                             host,
                         };
                         Ok(())
@@ -406,7 +406,10 @@ impl<A: HalApi> LifetimeTracker<A> {
                 }
             } else {
                 *buffer.map_state.lock() = resource::BufferMapState::Active {
-                    ptr: std::ptr::NonNull::dangling(),
+                    mapping: hal::BufferMapping {
+                        ptr: std::ptr::NonNull::dangling(),
+                        is_coherent: true,
+                    },
                     range: pending_mapping.range,
                     host: pending_mapping.op.host,
                 };
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index d33de22dac..693d8f8c8b 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -18,7 +18,7 @@ use std::os::raw::c_char;
 use thiserror::Error;
 use wgt::{BufferAddress, DeviceLostReason, TextureFormat};
 
-use std::{iter, num::NonZeroU32, ptr};
+use std::{iter, num::NonZeroU32};
 
 pub mod any_device;
 pub(crate) mod bgl;
@@ -307,7 +307,7 @@ fn map_buffer<A: HalApi>(
     size: BufferAddress,
     kind: HostMap,
     snatch_guard: &SnatchGuard,
-) -> Result<ptr::NonNull<u8>, BufferAccessError> {
+) -> Result<hal::BufferMapping, BufferAccessError> {
     let raw_buffer = buffer.try_raw(snatch_guard)?;
     let mapping = unsafe {
         raw.map_buffer(raw_buffer, offset..offset + size)
@@ -360,7 +360,7 @@ fn map_buffer<A: HalApi>(
         }
     }
 
-    Ok(mapping.ptr)
+    Ok(mapping)
 }
 
 #[derive(Clone, Debug)]
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 621149404f..96b4e9800f 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -611,8 +611,11 @@ impl<A: HalApi> Device<A> {
         } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) {
             // buffer is mappable, so we are just doing that at start
             let map_size = buffer.size;
-            let ptr = if map_size == 0 {
-                std::ptr::NonNull::dangling()
+            let mapping = if map_size == 0 {
+                hal::BufferMapping {
+                    ptr: std::ptr::NonNull::dangling(),
+                    is_coherent: true,
+                }
             } else {
                 let snatch_guard: SnatchGuard = self.snatchable_lock.read();
                 map_buffer(
@@ -625,7 +628,7 @@ impl<A: HalApi> Device<A> {
                 )?
             };
             *buffer.map_state.lock() = resource::BufferMapState::Active {
-                ptr,
+                mapping,
                 range: 0..map_size,
                 host: HostMap::Write,
             };
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 54f2b114ec..0b9a12125a 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -229,7 +229,7 @@ pub(crate) enum BufferMapState<A: HalApi> {
     Waiting(BufferPendingMapping<A>),
     /// Mapped
     Active {
-        ptr: NonNull<u8>,
+        mapping: hal::BufferMapping,
         range: hal::MemoryRange,
         host: HostMap,
     },
@@ -669,13 +669,18 @@ impl<A: HalApi> Buffer<A> {
             BufferMapState::Waiting(pending) => {
                 return Ok(Some((pending.op, Err(BufferAccessError::MapAborted))));
             }
-            BufferMapState::Active { ptr, range, host } => {
+            BufferMapState::Active {
+                mapping,
+                range,
+                host,
+            } => {
+                #[allow(clippy::collapsible_if)]
                 if host == HostMap::Write {
                     #[cfg(feature = "trace")]
                     if let Some(ref mut trace) = *device.trace.lock() {
                         let size = range.end - range.start;
                         let data = trace.make_binary("bin", unsafe {
-                            std::slice::from_raw_parts(ptr.as_ptr(), size as usize)
+                            std::slice::from_raw_parts(mapping.ptr.as_ptr(), size as usize)
                         });
                         trace.add(trace::Action::WriteBuffer {
                             id: buffer_id,
@@ -684,7 +689,9 @@ impl<A: HalApi> Buffer<A> {
                             queued: false,
                         });
                     }
-                    let _ = (ptr, range);
+                    if !mapping.is_coherent {
+                        unsafe { device.raw().flush_mapped_ranges(raw_buf, iter::once(range)) };
+                    }
                 }
                 unsafe { device.raw().unmap_buffer(raw_buf) };
             }

From f0875e8fdadad26dc812414721f7471229052a03 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 16:42:43 +0200
Subject: [PATCH 149/226] remove `Buffer.sync_mapped_writes`

`zero_init_needs_flush_now` was always equal to `mapping.is_coherent`
which is not correct but is fixed by the next commit.
---
 wgpu-core/src/device/mod.rs      | 16 +++++-----------
 wgpu-core/src/device/resource.rs |  2 --
 wgpu-core/src/lock/rank.rs       |  1 -
 wgpu-core/src/resource.rs        |  1 -
 4 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 693d8f8c8b..539dfdb3d2 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -314,14 +314,11 @@ fn map_buffer<A: HalApi>(
             .map_err(DeviceError::from)?
     };
 
-    *buffer.sync_mapped_writes.lock() = match kind {
-        HostMap::Read if !mapping.is_coherent => unsafe {
+    if !mapping.is_coherent && kind == HostMap::Read {
+        unsafe {
             raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size));
-            None
-        },
-        HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
-        _ => None,
-    };
+        }
+    }
 
     assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
     assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
@@ -339,9 +336,6 @@ fn map_buffer<A: HalApi>(
     // If this is a write mapping zeroing out the memory here is the only
     // reasonable way as all data is pushed to GPU anyways.
 
-    // No need to flush if it is flushed later anyways.
-    let zero_init_needs_flush_now =
-        mapping.is_coherent && buffer.sync_mapped_writes.lock().is_none();
     let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
 
     for uninitialized in buffer
@@ -355,7 +349,7 @@ fn map_buffer<A: HalApi>(
             (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
         mapped[fill_range].fill(0);
 
-        if zero_init_needs_flush_now {
+        if mapping.is_coherent {
             unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) };
         }
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 96b4e9800f..e3abccd886 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -597,7 +597,6 @@ impl<A: HalApi> Device<A> {
                 rank::BUFFER_INITIALIZATION_STATUS,
                 BufferInitTracker::new(aligned_size),
             ),
-            sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
             map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
@@ -697,7 +696,6 @@ impl<A: HalApi> Device<A> {
                 rank::BUFFER_INITIALIZATION_STATUS,
                 BufferInitTracker::new(0),
             ),
-            sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
             map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index 5e9bd37193..2539ffe16d 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -120,7 +120,6 @@ define_lock_ranks! {
 
     rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { }
     rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { }
-    rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { }
     rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { }
     rank DEVICE_FENCE "Device::fence" followed by { }
     #[allow(dead_code)]
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 0b9a12125a..b9d35a6012 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -431,7 +431,6 @@ pub struct Buffer<A: HalApi> {
     pub(crate) usage: wgt::BufferUsages,
     pub(crate) size: wgt::BufferAddress,
     pub(crate) initialization_status: RwLock<BufferInitTracker>,
-    pub(crate) sync_mapped_writes: Mutex<Option<hal::MemoryRange>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,

From a6bc2f6f533c50f7da50643aa03b907271b405df Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 18:13:42 +0200
Subject: [PATCH 150/226] fix check for `flush_mapped_ranges` in `map_buffer`

`flush_mapped_ranges` needs to be called when mappings are not coherent.
We can also omit flushing for write-mapped buffers since we always flush them on unmap.
---
 wgpu-core/src/device/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 539dfdb3d2..1f890de902 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -349,7 +349,7 @@ fn map_buffer<A: HalApi>(
             (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
         mapped[fill_range].fill(0);
 
-        if mapping.is_coherent {
+        if !mapping.is_coherent && kind == HostMap::Read {
             unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) };
         }
     }

From b594497f4a2cb2d22104b5071eea3ede67fcff4f Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 8 Aug 2024 17:29:50 +0200
Subject: [PATCH 151/226] [gl] fix usage of `glFlushMappedBufferRange`

`offset` is relative to the start of the mapping not the start of the buffer.
---
 wgpu-hal/src/gles/device.rs | 7 ++++++-
 wgpu-hal/src/gles/mod.rs    | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 77c08c8ce0..3e2e308259 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -536,6 +536,7 @@ impl crate::Device for super::Device {
                 size: desc.size,
                 map_flags: 0,
                 data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))),
+                offset_of_current_mapping: Arc::new(Mutex::new(0)),
             });
         }
 
@@ -635,6 +636,7 @@ impl crate::Device for super::Device {
             size: desc.size,
             map_flags,
             data,
+            offset_of_current_mapping: Arc::new(Mutex::new(0)),
         })
     }
 
@@ -668,6 +670,7 @@ impl crate::Device for super::Device {
                     unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) };
                     slice.as_mut_ptr()
                 } else {
+                    *buffer.offset_of_current_mapping.lock().unwrap() = range.start;
                     unsafe {
                         gl.map_buffer_range(
                             buffer.target,
@@ -693,6 +696,7 @@ impl crate::Device for super::Device {
                 unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
                 unsafe { gl.unmap_buffer(buffer.target) };
                 unsafe { gl.bind_buffer(buffer.target, None) };
+                *buffer.offset_of_current_mapping.lock().unwrap() = 0;
             }
         }
     }
@@ -704,10 +708,11 @@ impl crate::Device for super::Device {
             let gl = &self.shared.context.lock();
             unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
             for range in ranges {
+                let offset_of_current_mapping = *buffer.offset_of_current_mapping.lock().unwrap();
                 unsafe {
                     gl.flush_mapped_buffer_range(
                         buffer.target,
-                        range.start as i32,
+                        (range.start - offset_of_current_mapping) as i32,
                         (range.end - range.start) as i32,
                     )
                 };
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 73915d53e2..459600df7e 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -299,6 +299,7 @@ pub struct Buffer {
     size: wgt::BufferAddress,
     map_flags: u32,
     data: Option<Arc<std::sync::Mutex<Vec<u8>>>>,
+    offset_of_current_mapping: Arc<std::sync::Mutex<wgt::BufferAddress>>,
 }
 
 #[cfg(send_sync)]

From 7c917abf525b5c32c0b7345ec31788e316afa0cc Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:17:04 +0200
Subject: [PATCH 152/226] [gl] gate usage of `glFlushMappedBufferRange`

This is done in the same way as in `map_buffer` & `unmap_buffer`.
---
 wgpu-hal/src/gles/device.rs | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 3e2e308259..c651da6828 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -705,17 +705,20 @@ impl crate::Device for super::Device {
         I: Iterator<Item = crate::MemoryRange>,
     {
         if let Some(raw) = buffer.raw {
-            let gl = &self.shared.context.lock();
-            unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
-            for range in ranges {
-                let offset_of_current_mapping = *buffer.offset_of_current_mapping.lock().unwrap();
-                unsafe {
-                    gl.flush_mapped_buffer_range(
-                        buffer.target,
-                        (range.start - offset_of_current_mapping) as i32,
-                        (range.end - range.start) as i32,
-                    )
-                };
+            if buffer.data.is_none() {
+                let gl = &self.shared.context.lock();
+                unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
+                for range in ranges {
+                    let offset_of_current_mapping =
+                        *buffer.offset_of_current_mapping.lock().unwrap();
+                    unsafe {
+                        gl.flush_mapped_buffer_range(
+                            buffer.target,
+                            (range.start - offset_of_current_mapping) as i32,
+                            (range.end - range.start) as i32,
+                        )
+                    };
+                }
             }
         }
     }

From cf5706c24b31a0c595f6e792d7c3fda89acce6ef Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:28:18 +0200
Subject: [PATCH 153/226] use `Device.raw()` instead of
 `Device.raw.as_ref().unwrap()`

---
 wgpu-core/src/device/resource.rs | 157 +++++++++++--------------------
 1 file changed, 55 insertions(+), 102 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e3abccd886..4a7582fd60 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -436,9 +436,7 @@ impl<A: HalApi> Device<A> {
                 .last_successful_submission_index
                 .load(Ordering::Acquire),
             wgt::Maintain::Poll => unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
+                self.raw()
                     .get_fence_value(fence)
                     .map_err(DeviceError::from)?
             },
@@ -447,9 +445,7 @@ impl<A: HalApi> Device<A> {
         // If necessary, wait for that submission to complete.
         if maintain.is_wait() {
             unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
+                self.raw()
                     .wait(fence, submission_index, CLEANUP_WAIT_MS)
                     .map_err(DeviceError::from)?
             };
@@ -930,9 +926,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw_texture = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_texture(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -1283,9 +1277,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_texture_view(texture_raw, &hal_desc)
                 .map_err(|_| resource::CreateTextureViewError::OutOfMemory)?
         };
@@ -1420,9 +1412,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_sampler(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -1544,12 +1534,7 @@ impl<A: HalApi> Device<A> {
             label: desc.label.to_hal(self.instance_flags),
             runtime_checks: desc.shader_bound_checks.runtime_checks(),
         };
-        let raw = match unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_shader_module(&hal_desc, hal_shader)
-        } {
+        let raw = match unsafe { self.raw().create_shader_module(&hal_desc, hal_shader) } {
             Ok(raw) => raw,
             Err(error) => {
                 return Err(match error {
@@ -1590,12 +1575,7 @@ impl<A: HalApi> Device<A> {
             runtime_checks: desc.shader_bound_checks.runtime_checks(),
         };
         let hal_shader = hal::ShaderInput::SpirV(source);
-        let raw = match unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_shader_module(&hal_desc, hal_shader)
-        } {
+        let raw = match unsafe { self.raw().create_shader_module(&hal_desc, hal_shader) } {
             Ok(raw) => raw,
             Err(error) => {
                 return Err(match error {
@@ -1865,9 +1845,7 @@ impl<A: HalApi> Device<A> {
             entries: &hal_bindings,
         };
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_bind_group_layout(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -2291,9 +2269,7 @@ impl<A: HalApi> Device<A> {
             acceleration_structures: &[],
         };
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_bind_group(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -2592,9 +2568,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_pipeline_layout(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -2747,26 +2721,25 @@ impl<A: HalApi> Device<A> {
             cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
 
-        let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_compute_pipeline(&pipeline_desc)
-        }
-        .map_err(|err| match err {
-            hal::PipelineError::Device(error) => {
-                pipeline::CreateComputePipelineError::Device(error.into())
-            }
-            hal::PipelineError::Linkage(_stages, msg) => {
-                pipeline::CreateComputePipelineError::Internal(msg)
-            }
-            hal::PipelineError::EntryPoint(_stage) => {
-                pipeline::CreateComputePipelineError::Internal(ENTRYPOINT_FAILURE_ERROR.to_string())
-            }
-            hal::PipelineError::PipelineConstants(_stages, msg) => {
-                pipeline::CreateComputePipelineError::PipelineConstants(msg)
-            }
-        })?;
+        let raw =
+            unsafe { self.raw().create_compute_pipeline(&pipeline_desc) }.map_err(
+                |err| match err {
+                    hal::PipelineError::Device(error) => {
+                        pipeline::CreateComputePipelineError::Device(error.into())
+                    }
+                    hal::PipelineError::Linkage(_stages, msg) => {
+                        pipeline::CreateComputePipelineError::Internal(msg)
+                    }
+                    hal::PipelineError::EntryPoint(_stage) => {
+                        pipeline::CreateComputePipelineError::Internal(
+                            ENTRYPOINT_FAILURE_ERROR.to_string(),
+                        )
+                    }
+                    hal::PipelineError::PipelineConstants(_stages, msg) => {
+                        pipeline::CreateComputePipelineError::PipelineConstants(msg)
+                    }
+                },
+            )?;
 
         let pipeline = pipeline::ComputePipeline {
             raw: Some(raw),
@@ -3328,29 +3301,26 @@ impl<A: HalApi> Device<A> {
             multiview: desc.multiview,
             cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
-        let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_render_pipeline(&pipeline_desc)
-        }
-        .map_err(|err| match err {
-            hal::PipelineError::Device(error) => {
-                pipeline::CreateRenderPipelineError::Device(error.into())
-            }
-            hal::PipelineError::Linkage(stage, msg) => {
-                pipeline::CreateRenderPipelineError::Internal { stage, error: msg }
-            }
-            hal::PipelineError::EntryPoint(stage) => {
-                pipeline::CreateRenderPipelineError::Internal {
-                    stage: hal::auxil::map_naga_stage(stage),
-                    error: ENTRYPOINT_FAILURE_ERROR.to_string(),
-                }
-            }
-            hal::PipelineError::PipelineConstants(stage, error) => {
-                pipeline::CreateRenderPipelineError::PipelineConstants { stage, error }
-            }
-        })?;
+        let raw =
+            unsafe { self.raw().create_render_pipeline(&pipeline_desc) }.map_err(
+                |err| match err {
+                    hal::PipelineError::Device(error) => {
+                        pipeline::CreateRenderPipelineError::Device(error.into())
+                    }
+                    hal::PipelineError::Linkage(stage, msg) => {
+                        pipeline::CreateRenderPipelineError::Internal { stage, error: msg }
+                    }
+                    hal::PipelineError::EntryPoint(stage) => {
+                        pipeline::CreateRenderPipelineError::Internal {
+                            stage: hal::auxil::map_naga_stage(stage),
+                            error: ENTRYPOINT_FAILURE_ERROR.to_string(),
+                        }
+                    }
+                    hal::PipelineError::PipelineConstants(stage, error) => {
+                        pipeline::CreateRenderPipelineError::PipelineConstants { stage, error }
+                    }
+                },
+            )?;
 
         let pass_context = RenderPassContext {
             attachments: AttachmentData {
@@ -3519,14 +3489,9 @@ impl<A: HalApi> Device<A> {
     ) -> Result<(), DeviceError> {
         let guard = self.fence.read();
         let fence = guard.as_ref().unwrap();
-        let last_done_index = unsafe { self.raw.as_ref().unwrap().get_fence_value(fence)? };
+        let last_done_index = unsafe { self.raw().get_fence_value(fence)? };
         if last_done_index < submission_index {
-            unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .wait(fence, submission_index, !0)?
-            };
+            unsafe { self.raw().wait(fence, submission_index, !0)? };
             drop(guard);
             let closures = self
                 .lock_life()
@@ -3641,17 +3606,11 @@ impl<A: HalApi> Device<A> {
     }
 
     pub fn get_hal_counters(&self) -> wgt::HalCounters {
-        self.raw
-            .as_ref()
-            .map(|raw| raw.get_internal_counters())
-            .unwrap_or_default()
+        self.raw().get_internal_counters()
     }
 
     pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
-        self.raw
-            .as_ref()
-            .map(|raw| raw.generate_allocator_report())
-            .unwrap_or_default()
+        self.raw().generate_allocator_report()
     }
 }
 
@@ -3662,10 +3621,7 @@ impl<A: HalApi> Device<A> {
             baked.encoder.reset_all(baked.list.into_iter());
         }
         unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .destroy_command_encoder(baked.encoder);
+            self.raw().destroy_command_encoder(baked.encoder);
         }
     }
 
@@ -3678,10 +3634,7 @@ impl<A: HalApi> Device<A> {
         if let Err(error) = unsafe {
             let fence = self.fence.read();
             let fence = fence.as_ref().unwrap();
-            self.raw
-                .as_ref()
-                .unwrap()
-                .wait(fence, current_index, CLEANUP_WAIT_MS)
+            self.raw().wait(fence, current_index, CLEANUP_WAIT_MS)
         } {
             log::error!("failed to wait for the device: {error}");
         }

From b0cc0d2ebc8592db5a6029b755a99fd094bbe632 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:36:15 +0200
Subject: [PATCH 154/226] use `QuerySet.raw()` instead of
 `QuerySet.raw.as_ref().unwrap()`

---
 wgpu-core/src/command/compute.rs | 6 ++----
 wgpu-core/src/command/query.rs   | 2 +-
 wgpu-core/src/command/render.rs  | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 643e5ffa63..39fe1d91d1 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -507,14 +507,12 @@ impl Global {
             // But no point in erroring over that nuance here!
             if let Some(range) = range {
                 unsafe {
-                    state
-                        .raw_encoder
-                        .reset_queries(query_set.raw.as_ref().unwrap(), range);
+                    state.raw_encoder.reset_queries(query_set.raw(), range);
                 }
             }
 
             Some(hal::ComputePassTimestampWrites {
-                query_set: query_set.raw.as_ref().unwrap(),
+                query_set: query_set.raw(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
             })
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
index 35facbf260..382fa2d296 100644
--- a/wgpu-core/src/command/query.rs
+++ b/wgpu-core/src/command/query.rs
@@ -255,7 +255,7 @@ pub(super) fn end_occlusion_query<A: HalApi>(
     active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
-        unsafe { raw_encoder.end_query(query_set.raw.as_ref().unwrap(), query_index) };
+        unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
         Ok(())
     } else {
         Err(QueryUseError::AlreadyStopped)
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 8c00e0d302..86a9eef26f 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1193,7 +1193,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
             }
 
             Some(hal::RenderPassTimestampWrites {
-                query_set: query_set.raw.as_ref().unwrap(),
+                query_set: query_set.raw(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
             })
@@ -1203,7 +1203,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
         let occlusion_query_set_hal = if let Some(query_set) = occlusion_query_set.as_ref() {
             query_set.same_device(device)?;
-            Some(query_set.raw.as_ref().unwrap())
+            Some(query_set.raw())
         } else {
             None
         };

From c1bc0864c58e794b68d58305fb828da9ebf0f0cd Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:24:52 +0200
Subject: [PATCH 155/226] use `ManuallyDrop` for remaining resources

---
 wgpu-core/src/binding_model.rs   | 33 +++++++--------
 wgpu-core/src/device/global.rs   | 28 ++++++-------
 wgpu-core/src/device/resource.rs | 33 ++++++++-------
 wgpu-core/src/pipeline.rs        | 70 +++++++++++++++++---------------
 wgpu-core/src/resource.rs        | 32 +++++++--------
 5 files changed, 102 insertions(+), 94 deletions(-)

diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 08d9cda566..0687e6e0f0 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -26,6 +26,7 @@ use serde::Serialize;
 
 use std::{
     borrow::Cow,
+    mem::ManuallyDrop,
     ops::Range,
     sync::{Arc, Weak},
 };
@@ -498,7 +499,7 @@ impl<A: HalApi> std::fmt::Display for ExclusivePipeline<A> {
 /// Bind group layout.
 #[derive(Debug)]
 pub struct BindGroupLayout<A: HalApi> {
-    pub(crate) raw: Option<A::BindGroupLayout>,
+    pub(crate) raw: ManuallyDrop<A::BindGroupLayout>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) entries: bgl::EntryMap,
     /// It is very important that we know if the bind group comes from the BGL pool.
@@ -517,15 +518,15 @@ pub struct BindGroupLayout<A: HalApi> {
 
 impl<A: HalApi> Drop for BindGroupLayout<A> {
     fn drop(&mut self) {
+        resource_log!("Destroy raw {}", self.error_ident());
         if matches!(self.origin, bgl::Origin::Pool) {
             self.device.bgl_pool.remove(&self.entries);
         }
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_bind_group_layout(raw);
-            }
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_bind_group_layout(raw);
         }
     }
 }
@@ -537,7 +538,7 @@ crate::impl_storage_item!(BindGroupLayout);
 
 impl<A: HalApi> BindGroupLayout<A> {
     pub(crate) fn raw(&self) -> &A::BindGroupLayout {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 }
 
@@ -651,7 +652,7 @@ pub struct ResolvedPipelineLayoutDescriptor<'a, A: HalApi> {
 
 #[derive(Debug)]
 pub struct PipelineLayout<A: HalApi> {
-    pub(crate) raw: Option<A::PipelineLayout>,
+    pub(crate) raw: ManuallyDrop<A::PipelineLayout>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -661,19 +662,19 @@ pub struct PipelineLayout<A: HalApi> {
 
 impl<A: HalApi> Drop for PipelineLayout<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_pipeline_layout(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_pipeline_layout(raw);
         }
     }
 }
 
 impl<A: HalApi> PipelineLayout<A> {
     pub(crate) fn raw(&self) -> &A::PipelineLayout {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 
     pub(crate) fn get_binding_maps(&self) -> ArrayVec<&bgl::EntryMap, { hal::MAX_BIND_GROUPS }> {
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 1f70ee09ed..7fd82e8cee 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2191,23 +2191,21 @@ impl Global {
             if !cache.device.is_valid() {
                 return None;
             }
-            if let Some(raw_cache) = cache.raw.as_ref() {
-                let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) }?;
-                let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
-
-                let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
-                pipeline_cache::add_cache_header(
-                    &mut header_contents,
-                    &vec,
-                    &cache.device.adapter.raw.info,
-                    validation_key,
-                );
+            let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(cache.raw()) }?;
+            let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
+
+            let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
+            pipeline_cache::add_cache_header(
+                &mut header_contents,
+                &vec,
+                &cache.device.adapter.raw.info,
+                validation_key,
+            );
 
-                let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
-                debug_assert!(deleted.is_empty());
+            let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
+            debug_assert!(deleted.is_empty());
 
-                return Some(vec);
-            }
+            return Some(vec);
         }
         None
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 4a7582fd60..63c09831da 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -80,7 +80,7 @@ use super::{
 /// When locking pending_writes please check that trackers is not locked
 /// trackers should be locked only when needed for the shortest time possible
 pub struct Device<A: HalApi> {
-    raw: Option<A::Device>,
+    raw: ManuallyDrop<A::Device>,
     pub(crate) adapter: Arc<Adapter<A>>,
     pub(crate) queue: OnceCell<Weak<Queue<A>>>,
     queue_to_drop: OnceCell<A::Queue>,
@@ -169,7 +169,8 @@ impl<A: HalApi> std::fmt::Debug for Device<A> {
 impl<A: HalApi> Drop for Device<A> {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
-        let raw = self.raw.take().unwrap();
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
         let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
         pending_writes.dispose(&raw);
@@ -193,7 +194,7 @@ pub enum CreateDeviceError {
 
 impl<A: HalApi> Device<A> {
     pub(crate) fn raw(&self) -> &A::Device {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
     pub(crate) fn require_features(&self, feature: wgt::Features) -> Result<(), MissingFeatures> {
         if self.features.contains(feature) {
@@ -271,7 +272,7 @@ impl<A: HalApi> Device<A> {
         let downlevel = adapter.raw.capabilities.downlevel.clone();
 
         Ok(Self {
-            raw: Some(raw_device),
+            raw: ManuallyDrop::new(raw_device),
             adapter: adapter.clone(),
             queue: OnceCell::new(),
             queue_to_drop: OnceCell::new(),
@@ -1418,7 +1419,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let sampler = Sampler {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.samplers.clone()),
@@ -1550,7 +1551,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let module = pipeline::ShaderModule {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             interface: Some(interface),
             label: desc.label.to_string(),
@@ -1591,7 +1592,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let module = pipeline::ShaderModule {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             interface: None,
             label: desc.label.to_string(),
@@ -1861,7 +1862,7 @@ impl<A: HalApi> Device<A> {
             .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?;
 
         let bgl = BindGroupLayout {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             entries: entry_map,
             origin,
@@ -2576,7 +2577,7 @@ impl<A: HalApi> Device<A> {
         drop(raw_bind_group_layouts);
 
         let layout = binding_model::PipelineLayout {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             label: desc.label.to_string(),
             bind_group_layouts,
@@ -2718,7 +2719,7 @@ impl<A: HalApi> Device<A> {
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
             },
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
+            cache: cache.as_ref().map(|it| it.raw()),
         };
 
         let raw =
@@ -2742,7 +2743,7 @@ impl<A: HalApi> Device<A> {
             )?;
 
         let pipeline = pipeline::ComputePipeline {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             layout: pipeline_layout,
             device: self.clone(),
             _shader_module: shader_module,
@@ -3299,7 +3300,7 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
+            cache: cache.as_ref().map(|it| it.raw()),
         };
         let raw =
             unsafe { self.raw().create_render_pipeline(&pipeline_desc) }.map_err(
@@ -3363,7 +3364,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let pipeline = pipeline::RenderPipeline {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             layout: pipeline_layout,
             device: self.clone(),
             pass_context,
@@ -3434,7 +3435,7 @@ impl<A: HalApi> Device<A> {
             device: self.clone(),
             label: desc.label.to_string(),
             // This would be none in the error condition, which we don't implement yet
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
         };
 
         let cache = Arc::new(cache);
@@ -3535,8 +3536,10 @@ impl<A: HalApi> Device<A> {
 
         let hal_desc = desc.map_label(|label| label.to_hal(self.instance_flags));
 
+        let raw = unsafe { self.raw().create_query_set(&hal_desc).unwrap() };
+
         let query_set = QuerySet {
-            raw: Some(unsafe { self.raw().create_query_set(&hal_desc).unwrap() }),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.query_sets.clone()),
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 68e92ca4b6..59226051e5 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -10,7 +10,7 @@ use crate::{
 };
 use arrayvec::ArrayVec;
 use naga::error::ShaderError;
-use std::{borrow::Cow, marker::PhantomData, num::NonZeroU32, sync::Arc};
+use std::{borrow::Cow, marker::PhantomData, mem::ManuallyDrop, num::NonZeroU32, sync::Arc};
 use thiserror::Error;
 
 /// Information about buffer bindings, which
@@ -47,7 +47,7 @@ pub struct ShaderModuleDescriptor<'a> {
 
 #[derive(Debug)]
 pub struct ShaderModule<A: HalApi> {
-    pub(crate) raw: Option<A::ShaderModule>,
+    pub(crate) raw: ManuallyDrop<A::ShaderModule>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) interface: Option<validation::Interface>,
     /// The `label` from the descriptor used to create the resource.
@@ -56,12 +56,12 @@ pub struct ShaderModule<A: HalApi> {
 
 impl<A: HalApi> Drop for ShaderModule<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_shader_module(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_shader_module(raw);
         }
     }
 }
@@ -73,7 +73,7 @@ crate::impl_storage_item!(ShaderModule);
 
 impl<A: HalApi> ShaderModule<A> {
     pub(crate) fn raw(&self) -> &A::ShaderModule {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 
     pub(crate) fn finalize_entry_point_name(
@@ -242,7 +242,7 @@ pub enum CreateComputePipelineError {
 
 #[derive(Debug)]
 pub struct ComputePipeline<A: HalApi> {
-    pub(crate) raw: Option<A::ComputePipeline>,
+    pub(crate) raw: ManuallyDrop<A::ComputePipeline>,
     pub(crate) layout: Arc<PipelineLayout<A>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) _shader_module: Arc<ShaderModule<A>>,
@@ -254,12 +254,12 @@ pub struct ComputePipeline<A: HalApi> {
 
 impl<A: HalApi> Drop for ComputePipeline<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_compute_pipeline(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_compute_pipeline(raw);
         }
     }
 }
@@ -272,7 +272,7 @@ crate::impl_trackable!(ComputePipeline);
 
 impl<A: HalApi> ComputePipeline<A> {
     pub(crate) fn raw(&self) -> &A::ComputePipeline {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 }
 
@@ -301,7 +301,7 @@ impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
 
 #[derive(Debug)]
 pub struct PipelineCache<A: HalApi> {
-    pub(crate) raw: Option<A::PipelineCache>,
+    pub(crate) raw: ManuallyDrop<A::PipelineCache>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -309,12 +309,12 @@ pub struct PipelineCache<A: HalApi> {
 
 impl<A: HalApi> Drop for PipelineCache<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_pipeline_cache(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_pipeline_cache(raw);
         }
     }
 }
@@ -324,6 +324,12 @@ crate::impl_labeled!(PipelineCache);
 crate::impl_parent_device!(PipelineCache);
 crate::impl_storage_item!(PipelineCache);
 
+impl<A: HalApi> PipelineCache<A> {
+    pub(crate) fn raw(&self) -> &A::PipelineCache {
+        &self.raw
+    }
+}
+
 /// Describes how the vertex buffer is interpreted.
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -586,7 +592,7 @@ impl Default for VertexStep {
 
 #[derive(Debug)]
 pub struct RenderPipeline<A: HalApi> {
-    pub(crate) raw: Option<A::RenderPipeline>,
+    pub(crate) raw: ManuallyDrop<A::RenderPipeline>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) layout: Arc<PipelineLayout<A>>,
     pub(crate) _shader_modules:
@@ -603,12 +609,12 @@ pub struct RenderPipeline<A: HalApi> {
 
 impl<A: HalApi> Drop for RenderPipeline<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_render_pipeline(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_render_pipeline(raw);
         }
     }
 }
@@ -621,6 +627,6 @@ crate::impl_trackable!(RenderPipeline);
 
 impl<A: HalApi> RenderPipeline<A> {
     pub(crate) fn raw(&self) -> &A::RenderPipeline {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index b9d35a6012..f125fdfb39 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -1697,7 +1697,7 @@ pub struct SamplerDescriptor<'a> {
 
 #[derive(Debug)]
 pub struct Sampler<A: HalApi> {
-    pub(crate) raw: Option<A::Sampler>,
+    pub(crate) raw: ManuallyDrop<A::Sampler>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -1710,19 +1710,19 @@ pub struct Sampler<A: HalApi> {
 
 impl<A: HalApi> Drop for Sampler<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_sampler(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_sampler(raw);
         }
     }
 }
 
 impl<A: HalApi> Sampler<A> {
     pub(crate) fn raw(&self) -> &A::Sampler {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 }
 
@@ -1793,7 +1793,7 @@ pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
 
 #[derive(Debug)]
 pub struct QuerySet<A: HalApi> {
-    pub(crate) raw: Option<A::QuerySet>,
+    pub(crate) raw: ManuallyDrop<A::QuerySet>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -1803,12 +1803,12 @@ pub struct QuerySet<A: HalApi> {
 
 impl<A: HalApi> Drop for QuerySet<A> {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_query_set(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            use hal::Device;
+            self.device.raw().destroy_query_set(raw);
         }
     }
 }
@@ -1821,7 +1821,7 @@ crate::impl_trackable!(QuerySet);
 
 impl<A: HalApi> QuerySet<A> {
     pub(crate) fn raw(&self) -> &A::QuerySet {
-        self.raw.as_ref().unwrap()
+        &self.raw
     }
 }
 

From 728b288fdaadeec8a27cd2d5e6941a8cc0e7b624 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:43:57 +0200
Subject: [PATCH 156/226] use `ManuallyDrop` for `Device.zero_buffer`

---
 wgpu-core/src/command/clear.rs       | 2 +-
 wgpu-core/src/command/memory_init.rs | 4 ++--
 wgpu-core/src/command/transfer.rs    | 2 +-
 wgpu-core/src/device/queue.rs        | 4 ++--
 wgpu-core/src/device/resource.rs     | 8 +++++---
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index 6f51f73d57..f261e61d44 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -263,7 +263,7 @@ impl Global {
             encoder,
             &mut tracker.textures,
             &device.alignments,
-            device.zero_buffer.as_ref().unwrap(),
+            &device.zero_buffer,
             &snatch_guard,
         )
     }
diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs
index 895901d92f..96427eacc7 100644
--- a/wgpu-core/src/command/memory_init.rs
+++ b/wgpu-core/src/command/memory_init.rs
@@ -155,7 +155,7 @@ pub(crate) fn fixup_discarded_surfaces<
             encoder,
             texture_tracker,
             &device.alignments,
-            device.zero_buffer.as_ref().unwrap(),
+            &device.zero_buffer,
             snatch_guard,
         )
         .unwrap();
@@ -310,7 +310,7 @@ impl<A: HalApi> BakedCommands<A> {
                     &mut self.encoder,
                     &mut device_tracker.textures,
                     &device.alignments,
-                    device.zero_buffer.as_ref().unwrap(),
+                    &device.zero_buffer,
                     snatch_guard,
                 );
 
diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs
index 0e4c21f999..b8208f5dd0 100644
--- a/wgpu-core/src/command/transfer.rs
+++ b/wgpu-core/src/command/transfer.rs
@@ -445,7 +445,7 @@ fn handle_texture_init<A: HalApi>(
                 cmd_buf_raw,
                 &mut trackers.textures,
                 &device.alignments,
-                device.zero_buffer.as_ref().unwrap(),
+                &device.zero_buffer,
                 snatch_guard,
             )?;
         }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 1b562d560c..2c564d6ee7 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -723,7 +723,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        device.zero_buffer.as_ref().unwrap(),
+                        &device.zero_buffer,
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -990,7 +990,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        device.zero_buffer.as_ref().unwrap(),
+                        &device.zero_buffer,
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 63c09831da..667bb9ee5c 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -84,7 +84,7 @@ pub struct Device<A: HalApi> {
     pub(crate) adapter: Arc<Adapter<A>>,
     pub(crate) queue: OnceCell<Weak<Queue<A>>>,
     queue_to_drop: OnceCell<A::Queue>,
-    pub(crate) zero_buffer: Option<A::Buffer>,
+    pub(crate) zero_buffer: ManuallyDrop<A::Buffer>,
     /// The `label` from the descriptor used to create the resource.
     label: String,
 
@@ -171,12 +171,14 @@ impl<A: HalApi> Drop for Device<A> {
         resource_log!("Drop {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point.
+        let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) };
         // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
         let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
         pending_writes.dispose(&raw);
         self.command_allocator.dispose(&raw);
         unsafe {
-            raw.destroy_buffer(self.zero_buffer.take().unwrap());
+            raw.destroy_buffer(zero_buffer);
             raw.destroy_fence(self.fence.write().take().unwrap());
             let queue = self.queue_to_drop.take().unwrap();
             raw.exit(queue);
@@ -276,7 +278,7 @@ impl<A: HalApi> Device<A> {
             adapter: adapter.clone(),
             queue: OnceCell::new(),
             queue_to_drop: OnceCell::new(),
-            zero_buffer: Some(zero_buffer),
+            zero_buffer: ManuallyDrop::new(zero_buffer),
             label: desc.label.to_string(),
             command_allocator,
             active_submission_index: AtomicU64::new(0),

From 19843c9c5f9f14d6ed031e0077580b5c063cc047 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:53:49 +0200
Subject: [PATCH 157/226] use `ManuallyDrop` for `Device.fence`

---
 wgpu-core/src/device/queue.rs    |  7 +++----
 wgpu-core/src/device/resource.rs | 30 ++++++++++++++----------------
 wgpu-core/src/present.rs         |  7 +++----
 wgpu-core/src/resource.rs        |  5 ++---
 4 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 2c564d6ee7..8c076644f9 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1054,8 +1054,7 @@ impl Global {
             let snatch_guard = device.snatchable_lock.read();
 
             // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
-            let mut fence_guard = device.fence.write();
-            let fence = fence_guard.as_mut().unwrap();
+            let mut fence = device.fence.write();
             let submit_index = device
                 .active_submission_index
                 .fetch_add(1, Ordering::SeqCst)
@@ -1304,7 +1303,7 @@ impl Global {
                         .submit(
                             &hal_command_buffers,
                             &submit_surface_textures,
-                            (fence, submit_index),
+                            (&mut fence, submit_index),
                         )
                         .map_err(DeviceError::from)?;
                 }
@@ -1327,7 +1326,7 @@ impl Global {
 
             // This will schedule destruction of all resources that are no longer needed
             // by the user but used in the command stream, among other things.
-            let fence_guard = RwLockWriteGuard::downgrade(fence_guard);
+            let fence_guard = RwLockWriteGuard::downgrade(fence);
             let (closures, _) =
                 match device.maintain(fence_guard, wgt::Maintain::Poll, snatch_guard) {
                     Ok(closures) => closures,
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 667bb9ee5c..79000c4523 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -112,7 +112,7 @@ pub struct Device<A: HalApi> {
 
     // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
     // `fence` lock to avoid deadlocks.
-    pub(crate) fence: RwLock<Option<A::Fence>>,
+    pub(crate) fence: RwLock<ManuallyDrop<A::Fence>>,
     pub(crate) snatchable_lock: SnatchLock,
 
     /// Is this device valid? Valid is closely associated with "lose the device",
@@ -175,11 +175,13 @@ impl<A: HalApi> Drop for Device<A> {
         let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) };
         // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
         let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
+        // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point.
+        let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) };
         pending_writes.dispose(&raw);
         self.command_allocator.dispose(&raw);
         unsafe {
             raw.destroy_buffer(zero_buffer);
-            raw.destroy_fence(self.fence.write().take().unwrap());
+            raw.destroy_fence(fence);
             let queue = self.queue_to_drop.take().unwrap();
             raw.exit(queue);
         }
@@ -283,7 +285,7 @@ impl<A: HalApi> Device<A> {
             command_allocator,
             active_submission_index: AtomicU64::new(0),
             last_successful_submission_index: AtomicU64::new(0),
-            fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)),
+            fence: RwLock::new(rank::DEVICE_FENCE, ManuallyDrop::new(fence)),
             snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
             valid: AtomicBool::new(true),
             trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()),
@@ -409,14 +411,12 @@ impl<A: HalApi> Device<A> {
     ///   return it to our callers.)
     pub(crate) fn maintain<'this>(
         &'this self,
-        fence_guard: crate::lock::RwLockReadGuard<Option<A::Fence>>,
+        fence: crate::lock::RwLockReadGuard<ManuallyDrop<A::Fence>>,
         maintain: wgt::Maintain<crate::SubmissionIndex>,
         snatch_guard: SnatchGuard,
     ) -> Result<(UserClosures, bool), WaitIdleError> {
         profiling::scope!("Device::maintain");
 
-        let fence = fence_guard.as_ref().unwrap();
-
         // Determine which submission index `maintain` represents.
         let submission_index = match maintain {
             wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
@@ -440,7 +440,7 @@ impl<A: HalApi> Device<A> {
                 .load(Ordering::Acquire),
             wgt::Maintain::Poll => unsafe {
                 self.raw()
-                    .get_fence_value(fence)
+                    .get_fence_value(&fence)
                     .map_err(DeviceError::from)?
             },
         };
@@ -449,7 +449,7 @@ impl<A: HalApi> Device<A> {
         if maintain.is_wait() {
             unsafe {
                 self.raw()
-                    .wait(fence, submission_index, CLEANUP_WAIT_MS)
+                    .wait(&fence, submission_index, CLEANUP_WAIT_MS)
                     .map_err(DeviceError::from)?
             };
         }
@@ -490,7 +490,7 @@ impl<A: HalApi> Device<A> {
 
         // Don't hold the locks while calling release_gpu_resources.
         drop(life_tracker);
-        drop(fence_guard);
+        drop(fence);
         drop(snatch_guard);
 
         if should_release_gpu_resource {
@@ -3490,12 +3490,11 @@ impl<A: HalApi> Device<A> {
         &self,
         submission_index: crate::SubmissionIndex,
     ) -> Result<(), DeviceError> {
-        let guard = self.fence.read();
-        let fence = guard.as_ref().unwrap();
-        let last_done_index = unsafe { self.raw().get_fence_value(fence)? };
+        let fence = self.fence.read();
+        let last_done_index = unsafe { self.raw().get_fence_value(&fence)? };
         if last_done_index < submission_index {
-            unsafe { self.raw().wait(fence, submission_index, !0)? };
-            drop(guard);
+            unsafe { self.raw().wait(&fence, submission_index, !0)? };
+            drop(fence);
             let closures = self
                 .lock_life()
                 .triage_submissions(submission_index, &self.command_allocator);
@@ -3638,8 +3637,7 @@ impl<A: HalApi> Device<A> {
             .load(Ordering::Acquire);
         if let Err(error) = unsafe {
             let fence = self.fence.read();
-            let fence = fence.as_ref().unwrap();
-            self.raw().wait(fence, current_index, CLEANUP_WAIT_MS)
+            self.raw().wait(&fence, current_index, CLEANUP_WAIT_MS)
         } {
             log::error!("failed to wait for the device: {error}");
         }
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index e22a772680..9c4c9115fd 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -152,18 +152,17 @@ impl Global {
             });
         }
 
-        let fence_guard = device.fence.read();
-        let fence = fence_guard.as_ref().unwrap();
+        let fence = device.fence.read();
 
         let suf = A::surface_as_hal(surface.as_ref());
         let (texture_id, status) = match unsafe {
             suf.unwrap().acquire_texture(
                 Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
-                fence,
+                &fence,
             )
         } {
             Ok(Some(ast)) => {
-                drop(fence_guard);
+                drop(fence);
 
                 let texture_desc = wgt::TextureDescriptor {
                     label: Some(std::borrow::Cow::Borrowed("<Surface Texture>")),
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index f125fdfb39..25c351549e 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -1312,9 +1312,8 @@ impl Global {
         let hub = A::hub(self);
 
         if let Ok(device) = hub.devices.get(id) {
-            let hal_fence = device.fence.read();
-            let hal_fence = hal_fence.as_ref();
-            hal_fence_callback(hal_fence)
+            let fence = device.fence.read();
+            hal_fence_callback(Some(&fence))
         } else {
             hal_fence_callback(None)
         }

From ce9c9b76f63b90c94e982d1b1d6195fc2aa502da Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 15:15:32 +0200
Subject: [PATCH 158/226] remove `Option` around `A::SurfaceTexture`

We can rely on the snatching mechanism to take the surface texture.
---
 wgpu-core/src/command/mod.rs  |  2 +-
 wgpu-core/src/device/queue.rs | 29 +++++++++++++----------------
 wgpu-core/src/present.rs      | 22 ++++++++--------------
 wgpu-core/src/resource.rs     | 21 +++++++--------------
 wgpu-core/src/snatch.rs       |  5 -----
 5 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 7d4d86673f..7314e8f04c 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -409,7 +409,7 @@ impl<A: HalApi> CommandBuffer<A> {
         let texture_barriers = transitions
             .into_iter()
             .enumerate()
-            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap()));
+            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw()));
 
         unsafe {
             raw.transition_buffers(buffer_barriers);
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 8c076644f9..27f13e2f46 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1143,12 +1143,10 @@ impl Global {
                                 for texture in cmd_buf_trackers.textures.used_resources() {
                                     let should_extend = match texture.try_inner(&snatch_guard)? {
                                         TextureInner::Native { .. } => false,
-                                        TextureInner::Surface { ref raw, .. } => {
-                                            if raw.is_some() {
-                                                // Compare the Arcs by pointer as Textures don't implement Eq.
-                                                submit_surface_textures_owned
-                                                    .insert(Arc::as_ptr(&texture), texture.clone());
-                                            }
+                                        TextureInner::Surface { .. } => {
+                                            // Compare the Arcs by pointer as Textures don't implement Eq.
+                                            submit_surface_textures_owned
+                                                .insert(Arc::as_ptr(&texture), texture.clone());
 
                                             true
                                         }
@@ -1242,12 +1240,10 @@ impl Global {
                 for texture in pending_writes.dst_textures.values() {
                     match texture.try_inner(&snatch_guard)? {
                         TextureInner::Native { .. } => {}
-                        TextureInner::Surface { ref raw, .. } => {
-                            if raw.is_some() {
-                                // Compare the Arcs by pointer as Textures don't implement Eq
-                                submit_surface_textures_owned
-                                    .insert(Arc::as_ptr(texture), texture.clone());
-                            }
+                        TextureInner::Surface { .. } => {
+                            // Compare the Arcs by pointer as Textures don't implement Eq
+                            submit_surface_textures_owned
+                                .insert(Arc::as_ptr(texture), texture.clone());
 
                             unsafe {
                                 used_surface_textures
@@ -1291,10 +1287,11 @@ impl Global {
                     SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len());
 
                 for texture in submit_surface_textures_owned.values() {
-                    submit_surface_textures.extend(match texture.inner.get(&snatch_guard) {
-                        Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
-                        _ => None,
-                    });
+                    let raw = match texture.inner.get(&snatch_guard) {
+                        Some(TextureInner::Surface { raw, .. }) => raw,
+                        _ => unreachable!(),
+                    };
+                    submit_surface_textures.push(raw);
                 }
 
                 unsafe {
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 9c4c9115fd..38828f7643 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -208,7 +208,7 @@ impl Global {
                 let texture = resource::Texture::new(
                     &device,
                     resource::TextureInner::Surface {
-                        raw: Some(ast.texture),
+                        raw: ast.texture,
                         parent_id: surface_id,
                     },
                     hal_usage,
@@ -306,21 +306,15 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let mut exclusive_snatch_guard = device.snatchable_lock.write();
                 let suf = A::surface_as_hal(&surface);
-                let mut inner = texture.inner_mut(&mut exclusive_snatch_guard);
-                let inner = inner.as_mut().unwrap();
-
-                match *inner {
-                    resource::TextureInner::Surface {
-                        ref mut raw,
-                        ref parent_id,
-                    } => {
-                        if surface_id != *parent_id {
+                let exclusive_snatch_guard = device.snatchable_lock.write();
+                match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
+                    resource::TextureInner::Surface { raw, parent_id } => {
+                        if surface_id != parent_id {
                             log::error!("Presented frame is from a different surface");
                             Err(hal::SurfaceError::Lost)
                         } else {
-                            unsafe { queue.raw().present(suf.unwrap(), raw.take().unwrap()) }
+                            unsafe { queue.raw().present(suf.unwrap(), raw) }
                         }
                     }
                     _ => unreachable!(),
@@ -390,9 +384,9 @@ impl Global {
                 let suf = A::surface_as_hal(&surface);
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
-                    resource::TextureInner::Surface { mut raw, parent_id } => {
+                    resource::TextureInner::Surface { raw, parent_id } => {
                         if surface_id == parent_id {
-                            unsafe { suf.unwrap().discard_texture(raw.take().unwrap()) };
+                            unsafe { suf.unwrap().discard_texture(raw) };
                         } else {
                             log::warn!("Surface texture is outdated");
                         }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 25c351549e..d23290789a 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -12,7 +12,7 @@ use crate::{
     init_tracker::{BufferInitTracker, TextureInitTracker},
     lock::{rank, Mutex, RwLock},
     resource_log,
-    snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable},
+    snatch::{SnatchGuard, Snatchable},
     track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex},
     Label, LabelHelpers,
 };
@@ -953,17 +953,16 @@ pub(crate) enum TextureInner<A: HalApi> {
         raw: A::Texture,
     },
     Surface {
-        raw: Option<A::SurfaceTexture>,
+        raw: A::SurfaceTexture,
         parent_id: SurfaceId,
     },
 }
 
 impl<A: HalApi> TextureInner<A> {
-    pub(crate) fn raw(&self) -> Option<&A::Texture> {
+    pub(crate) fn raw(&self) -> &A::Texture {
         match self {
-            Self::Native { raw } => Some(raw),
-            Self::Surface { raw: Some(tex), .. } => Some(tex.borrow()),
-            _ => None,
+            Self::Native { raw } => raw,
+            Self::Surface { raw, .. } => raw.borrow(),
         }
     }
 }
@@ -1104,7 +1103,7 @@ impl<A: HalApi> Texture<A> {
     }
 
     pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::Texture> {
-        self.inner.get(snatch_guard)?.raw()
+        Some(self.inner.get(snatch_guard)?.raw())
     }
 
     pub(crate) fn try_raw<'a>(
@@ -1113,16 +1112,10 @@ impl<A: HalApi> Texture<A> {
     ) -> Result<&'a A::Texture, DestroyedResourceError> {
         self.inner
             .get(guard)
-            .and_then(|t| t.raw())
+            .map(|t| t.raw())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
-    pub(crate) fn inner_mut<'a>(
-        &'a self,
-        guard: &'a mut ExclusiveSnatchGuard,
-    ) -> Option<&'a mut TextureInner<A>> {
-        self.inner.get_mut(guard)
-    }
     pub(crate) fn get_clear_view<'a>(
         clear_mode: &'a TextureClearMode<A>,
         desc: &'a wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
diff --git a/wgpu-core/src/snatch.rs b/wgpu-core/src/snatch.rs
index 6f60f45d85..9866b77723 100644
--- a/wgpu-core/src/snatch.rs
+++ b/wgpu-core/src/snatch.rs
@@ -37,11 +37,6 @@ impl<T> Snatchable<T> {
         unsafe { (*self.value.get()).as_ref() }
     }
 
-    /// Get write access to the value. Requires a the snatchable lock's write guard.
-    pub fn get_mut<'a>(&'a self, _guard: &'a mut ExclusiveSnatchGuard) -> Option<&'a mut T> {
-        unsafe { (*self.value.get()).as_mut() }
-    }
-
     /// Take the value. Requires a the snatchable lock's write guard.
     pub fn snatch(&self, _guard: ExclusiveSnatchGuard) -> Option<T> {
         unsafe { (*self.value.get()).take() }

From 1aaaec22f69301eda815c28d00882d7e9fb3c780 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 15:26:39 +0200
Subject: [PATCH 159/226] use `ManuallyDrop` for texture views of
 `TextureClearMode`

---
 wgpu-core/src/device/resource.rs |  2 +-
 wgpu-core/src/present.rs         |  4 ++--
 wgpu-core/src/resource.rs        | 24 ++++++++++++------------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 79000c4523..927358ea2c 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -971,7 +971,7 @@ impl<A: HalApi> Device<A> {
                                     array_layer_count: Some(1),
                                 },
                             };
-                            clear_views.push(Some(
+                            clear_views.push(ManuallyDrop::new(
                                 unsafe { self.raw().create_texture_view(&raw_texture, &desc) }
                                     .map_err(DeviceError::from)?,
                             ));
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 38828f7643..3521f04388 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -9,7 +9,7 @@ When this texture is presented, we remove it from the device tracker as well as
 extract it from the hub.
 !*/
 
-use std::{borrow::Borrow, sync::Arc};
+use std::{borrow::Borrow, mem::ManuallyDrop, sync::Arc};
 
 #[cfg(feature = "trace")]
 use crate::device::trace::Action;
@@ -215,7 +215,7 @@ impl Global {
                     &texture_desc,
                     format_features,
                     resource::TextureClearMode::Surface {
-                        clear_view: Some(clear_view),
+                        clear_view: ManuallyDrop::new(clear_view),
                     },
                     true,
                 );
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index d23290789a..80ed66b310 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -972,11 +972,11 @@ pub enum TextureClearMode<A: HalApi> {
     BufferCopy,
     // View for clear via RenderPass for every subsurface (mip/layer/slice)
     RenderPass {
-        clear_views: SmallVec<[Option<A::TextureView>; 1]>,
+        clear_views: SmallVec<[ManuallyDrop<A::TextureView>; 1]>,
         is_color: bool,
     },
     Surface {
-        clear_view: Option<A::TextureView>,
+        clear_view: ManuallyDrop<A::TextureView>,
     },
     // Texture can't be cleared, attempting to do so will cause panic.
     // (either because it is impossible for the type of texture or it is being destroyed)
@@ -1062,10 +1062,10 @@ impl<A: HalApi> Drop for Texture<A> {
             TextureClearMode::Surface {
                 ref mut clear_view, ..
             } => {
-                if let Some(view) = clear_view.take() {
-                    unsafe {
-                        self.device.raw().destroy_texture_view(view);
-                    }
+                // SAFETY: We are in the Drop impl and we don't use clear_view anymore after this point.
+                let raw = unsafe { ManuallyDrop::take(clear_view) };
+                unsafe {
+                    self.device.raw().destroy_texture_view(raw);
                 }
             }
             TextureClearMode::RenderPass {
@@ -1073,10 +1073,10 @@ impl<A: HalApi> Drop for Texture<A> {
                 ..
             } => {
                 clear_views.iter_mut().for_each(|clear_view| {
-                    if let Some(view) = clear_view.take() {
-                        unsafe {
-                            self.device.raw().destroy_texture_view(view);
-                        }
+                    // SAFETY: We are in the Drop impl and we don't use clear_view anymore after this point.
+                    let raw = unsafe { ManuallyDrop::take(clear_view) };
+                    unsafe {
+                        self.device.raw().destroy_texture_view(raw);
                     }
                 });
             }
@@ -1129,7 +1129,7 @@ impl<A: HalApi> Texture<A> {
             TextureClearMode::None => {
                 panic!("Given texture can't be cleared")
             }
-            TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref().unwrap(),
+            TextureClearMode::Surface { ref clear_view, .. } => clear_view,
             TextureClearMode::RenderPass {
                 ref clear_views, ..
             } => {
@@ -1140,7 +1140,7 @@ impl<A: HalApi> Texture<A> {
                 } else {
                     mip_level * desc.size.depth_or_array_layers
                 } + depth_or_layer;
-                clear_views[index as usize].as_ref().unwrap()
+                &clear_views[index as usize]
             }
         }
     }

From c72bc7b84b094f70cc47dd8a1aa8175e3f3dc5b6 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Wed, 7 Aug 2024 15:30:19 +0200
Subject: [PATCH 160/226] remove unnecessary `RwLock` from `Texture.clear_mode`

---
 wgpu-core/src/command/clear.rs | 9 ++++-----
 wgpu-core/src/lock/rank.rs     | 1 -
 wgpu-core/src/resource.rs      | 8 +++-----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index f261e61d44..a93fe8345d 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -281,7 +281,7 @@ pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     let dst_raw = dst_texture.try_raw(snatch_guard)?;
 
     // Issue the right barrier.
-    let clear_usage = match *dst_texture.clear_mode.read() {
+    let clear_usage = match dst_texture.clear_mode {
         TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST,
         TextureClearMode::RenderPass {
             is_color: false, ..
@@ -322,7 +322,7 @@ pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     }
 
     // Record actual clearing
-    match *dst_texture.clear_mode.read() {
+    match dst_texture.clear_mode {
         TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::<A>(
             &dst_texture.desc,
             alignments,
@@ -453,7 +453,6 @@ fn clear_texture_via_render_passes<A: HalApi>(
         height: dst_texture.desc.size.height,
         depth_or_array_layers: 1, // Only one layer is cleared at a time.
     };
-    let clear_mode = &dst_texture.clear_mode.read();
 
     for mip_level in range.mip_range {
         let extent = extent_base.mip_level_size(mip_level, dst_texture.desc.dimension);
@@ -463,7 +462,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                 color_attachments_tmp = [Some(hal::ColorAttachment {
                     target: hal::Attachment {
                         view: Texture::get_clear_view(
-                            clear_mode,
+                            &dst_texture.clear_mode,
                             &dst_texture.desc,
                             mip_level,
                             depth_or_layer,
@@ -481,7 +480,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                     Some(hal::DepthStencilAttachment {
                         target: hal::Attachment {
                             view: Texture::get_clear_view(
-                                clear_mode,
+                                &dst_texture.clear_mode,
                                 &dst_texture.desc,
                                 mip_level,
                                 depth_or_layer,
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index 2539ffe16d..162d3d2604 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -133,7 +133,6 @@ define_lock_ranks! {
     rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
     rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
     rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
-    rank TEXTURE_CLEAR_MODE "Texture::clear_mode" followed by { }
     rank TEXTURE_VIEWS "Texture::views" followed by { }
 
     #[cfg(test)]
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 80ed66b310..f6742ba825 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -995,7 +995,7 @@ pub struct Texture<A: HalApi> {
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) clear_mode: RwLock<TextureClearMode<A>>,
+    pub(crate) clear_mode: TextureClearMode<A>,
     pub(crate) views: Mutex<Vec<Weak<TextureView<A>>>>,
     pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
 }
@@ -1030,7 +1030,7 @@ impl<A: HalApi> Texture<A> {
             },
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(device.tracker_indices.textures.clone()),
-            clear_mode: RwLock::new(rank::TEXTURE_CLEAR_MODE, clear_mode),
+            clear_mode,
             views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()),
             bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()),
         }
@@ -1056,9 +1056,7 @@ impl<A: HalApi> Texture<A> {
 impl<A: HalApi> Drop for Texture<A> {
     fn drop(&mut self) {
         use hal::Device;
-        let mut clear_mode = self.clear_mode.write();
-        let clear_mode = &mut *clear_mode;
-        match *clear_mode {
+        match self.clear_mode {
             TextureClearMode::Surface {
                 ref mut clear_view, ..
             } => {

From b1eb6db8cd05772cd0dbc82a16bf8755892ce17b Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 12:22:54 +0100
Subject: [PATCH 161/226] refactor: satisfy `clippy::manual_bits`

---
 wgpu-core/src/id.rs             | 2 +-
 wgpu-core/src/track/metadata.rs | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index 83b2494391..19baa2e6f0 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -11,7 +11,7 @@ type IdType = u64;
 type ZippedIndex = Index;
 type NonZeroId = std::num::NonZeroU64;
 
-const INDEX_BITS: usize = std::mem::size_of::<ZippedIndex>() * 8;
+const INDEX_BITS: usize = ZippedIndex::BITS as usize;
 const EPOCH_BITS: usize = INDEX_BITS - BACKEND_BITS;
 const BACKEND_BITS: usize = 3;
 const BACKEND_SHIFT: usize = INDEX_BITS * 2 - BACKEND_BITS;
diff --git a/wgpu-core/src/track/metadata.rs b/wgpu-core/src/track/metadata.rs
index 855282d72c..22576207ae 100644
--- a/wgpu-core/src/track/metadata.rs
+++ b/wgpu-core/src/track/metadata.rs
@@ -1,7 +1,6 @@
 //! The `ResourceMetadata` type.
 
 use bit_vec::BitVec;
-use std::mem;
 use wgt::strict_assert;
 
 /// A set of resources, holding a `Arc<T>` and epoch for each member.
@@ -191,7 +190,7 @@ fn resize_bitvec<B: bit_vec::BitBlock>(vec: &mut BitVec<B>, size: usize) {
 ///
 /// Will skip entire usize's worth of bits if they are all false.
 fn iterate_bitvec_indices(ownership: &BitVec<usize>) -> impl Iterator<Item = usize> + '_ {
-    const BITS_PER_BLOCK: usize = mem::size_of::<usize>() * 8;
+    const BITS_PER_BLOCK: usize = usize::BITS as usize;
 
     let size = ownership.len();
 

From ce23c02feb26e0bc6699e429994a616f703285aa Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 11:37:35 +0100
Subject: [PATCH 162/226] chore(naga): remove dead `"validation"` feat. refs.

Missed in a26e4a00, but we're fixing it now!
---
 naga/src/valid/expression.rs | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index 116560bb61..ba99211f07 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1696,7 +1696,7 @@ pub fn check_literal_value(literal: crate::Literal) -> Result<(), LiteralError>
     Ok(())
 }
 
-#[cfg(all(test, feature = "validate"))]
+#[cfg(test)]
 /// Validate a module containing the given expression, expecting an error.
 fn validate_with_expression(
     expr: crate::Expression,
@@ -1719,7 +1719,7 @@ fn validate_with_expression(
     validator.validate(&module)
 }
 
-#[cfg(all(test, feature = "validate"))]
+#[cfg(test)]
 /// Validate a module containing the given constant expression, expecting an error.
 fn validate_with_const_expression(
     expr: crate::Expression,
@@ -1736,7 +1736,6 @@ fn validate_with_const_expression(
 }
 
 /// Using F64 in a function's expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn f64_runtime_literals() {
     let result = validate_with_expression(
@@ -1768,7 +1767,6 @@ fn f64_runtime_literals() {
 }
 
 /// Using F64 in a module's constant expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn f64_const_literals() {
     let result = validate_with_const_expression(
@@ -1797,7 +1795,6 @@ fn f64_const_literals() {
 }
 
 /// Using I64 in a function's expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn i64_runtime_literals() {
     let result = validate_with_expression(
@@ -1821,7 +1818,6 @@ fn i64_runtime_literals() {
 }
 
 /// Using I64 in a module's constant expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn i64_const_literals() {
     let result = validate_with_const_expression(

From 7f881bd35fca297b46a4e5927d68dd1ffc92f57f Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 11:54:30 +0100
Subject: [PATCH 163/226] chore(naga): remove broken `Unsupported64Bit` tests

Seemingly missed in 4e6f873da5e25acef9898f0f3490591c8be2e0b6.
---
 naga/src/valid/expression.rs | 43 ------------------------------------
 1 file changed, 43 deletions(-)

diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index ba99211f07..09dd768e24 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1793,46 +1793,3 @@ fn f64_const_literals() {
     );
     assert!(result.is_ok());
 }
-
-/// Using I64 in a function's expression arena is forbidden.
-#[test]
-fn i64_runtime_literals() {
-    let result = validate_with_expression(
-        crate::Expression::Literal(crate::Literal::I64(1729)),
-        // There is no capability that enables this.
-        super::Capabilities::all(),
-    );
-    let error = result.unwrap_err().into_inner();
-    assert!(matches!(
-        error,
-        crate::valid::ValidationError::Function {
-            source: super::FunctionError::Expression {
-                source: super::ExpressionError::Literal(super::LiteralError::Width(
-                    super::r#type::WidthError::Unsupported64Bit
-                ),),
-                ..
-            },
-            ..
-        }
-    ));
-}
-
-/// Using I64 in a module's constant expression arena is forbidden.
-#[test]
-fn i64_const_literals() {
-    let result = validate_with_const_expression(
-        crate::Expression::Literal(crate::Literal::I64(1729)),
-        // There is no capability that enables this.
-        super::Capabilities::all(),
-    );
-    let error = result.unwrap_err().into_inner();
-    assert!(matches!(
-        error,
-        crate::valid::ValidationError::ConstExpression {
-            source: super::ConstExpressionError::Literal(super::LiteralError::Width(
-                super::r#type::WidthError::Unsupported64Bit,
-            ),),
-            ..
-        }
-    ));
-}

From 22b8f50987b60857e809b3e69c3b13ff8aac3c77 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 11:43:07 +0100
Subject: [PATCH 164/226] chore: satisfy `unused_qualifications` lint

---
 naga/src/valid/expression.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index 09dd768e24..1d1420aef6 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1747,7 +1747,7 @@ fn f64_runtime_literals() {
         error,
         crate::valid::ValidationError::Function {
             source: super::FunctionError::Expression {
-                source: super::ExpressionError::Literal(super::LiteralError::Width(
+                source: ExpressionError::Literal(LiteralError::Width(
                     super::r#type::WidthError::MissingCapability {
                         name: "f64",
                         flag: "FLOAT64",
@@ -1777,7 +1777,7 @@ fn f64_const_literals() {
     assert!(matches!(
         error,
         crate::valid::ValidationError::ConstExpression {
-            source: super::ConstExpressionError::Literal(super::LiteralError::Width(
+            source: ConstExpressionError::Literal(LiteralError::Width(
                 super::r#type::WidthError::MissingCapability {
                     name: "f64",
                     flag: "FLOAT64",

From bf051fb476b0b1227cbf2802b1baabe1c3aa5490 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 12:17:53 +0100
Subject: [PATCH 165/226] refactor(naga): use same Firefox commentary for
 `rust-version`

---
 naga/Cargo.toml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index cd6bd5e9af..e8415a3bc3 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -9,9 +9,14 @@ keywords = ["shader", "SPIR-V", "GLSL", "MSL"]
 license = "MIT OR Apache-2.0"
 exclude = ["bin/**/*", "tests/**/*", "Cargo.lock", "target/**/*"]
 resolver = "2"
-rust-version = "1.76"
 autotests = false
 
+# Override the workspace's `rust-version` key. Firefox uses `cargo vendor` to
+# copy the crates it actually uses out of the workspace, so it's meaningful for
+# them to have less restrictive MSRVs individually than the workspace as a
+# whole, if their code permits. See `../README.md` for details.
+rust-version = "1.76"
+
 [[test]]
 name = "naga-test"
 path = "tests/root.rs"

From 8231d31eab3f5c386698b21df14a2054c8610d2d Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 13:08:10 +0100
Subject: [PATCH 166/226] chore: update out-of-date docs build fix expectation

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8034cd47cc..1cb6a28a19 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -37,7 +37,7 @@ env:
   #
   # This needs to be newer to work around https://github.com/gfx-rs/wgpu/issues/4905.
   #
-  # Once 1.76 coes out, we can use that instead of nightly.
+  # Once this fix hits stable Rust, we can use that instead of nightly.
   DOCS_RUST_VERSION: "nightly-2023-12-17"
   # This is the MSRV used by `wgpu` itself and all surrounding infrastructure.
   REPO_MSRV: "1.76"

From 5533c377865731100ef716358c5d74bce5e54e74 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Mon, 12 Aug 2024 13:21:52 +0100
Subject: [PATCH 167/226] chore: satisfy `clippy::collapsible_match`

---
 naga/src/back/glsl/mod.rs     | 15 +++++++--------
 naga/src/valid/analyzer.rs    |  7 +++----
 naga/src/valid/mod.rs         |  7 +++----
 wgpu-hal/examples/raw-gles.rs | 25 +++++++++++++------------
 4 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index 99b0fc7150..d1c0fd75ec 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -1313,14 +1313,13 @@ impl<'a, W: Write> Writer<'a, W> {
                     crate::MathFunction::Dot => {
                         // if the expression is a Dot product with integer arguments,
                         // then the args needs baking as well
-                        if let TypeInner::Scalar(crate::Scalar { kind, .. }) = *inner {
-                            match kind {
-                                crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
-                                    self.need_bake_expressions.insert(arg);
-                                    self.need_bake_expressions.insert(arg1.unwrap());
-                                }
-                                _ => {}
-                            }
+                        if let TypeInner::Scalar(crate::Scalar {
+                            kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
+                            ..
+                        }) = *inner
+                        {
+                            self.need_bake_expressions.insert(arg);
+                            self.need_bake_expressions.insert(arg1.unwrap());
                         }
                     }
                     crate::MathFunction::Pack4xI8
diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs
index 0322200493..89b3da6a4c 100644
--- a/naga/src/valid/analyzer.rs
+++ b/naga/src/valid/analyzer.rs
@@ -593,15 +593,14 @@ impl FunctionInfo {
             E::FunctionArgument(index) => {
                 let arg = &resolve_context.arguments[index as usize];
                 let uniform = match arg.binding {
-                    Some(crate::Binding::BuiltIn(built_in)) => match built_in {
+                    Some(crate::Binding::BuiltIn(
                         // per-polygon built-ins are uniform
                         crate::BuiltIn::FrontFacing
                         // per-work-group built-ins are uniform
                         | crate::BuiltIn::WorkGroupId
                         | crate::BuiltIn::WorkGroupSize
-                        | crate::BuiltIn::NumWorkGroups => true,
-                        _ => false,
-                    },
+                        | crate::BuiltIn::NumWorkGroups)
+                    ) => true,
                     // only flat inputs are uniform
                     Some(crate::Binding::Location {
                         interpolation: Some(crate::Interpolation::Flat),
diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs
index d9a986df7e..c314ec2ac8 100644
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -533,14 +533,13 @@ impl Validator {
 
         let decl_ty = &gctx.types[o.ty].inner;
         match decl_ty {
-            &crate::TypeInner::Scalar(scalar) => match scalar {
+            &crate::TypeInner::Scalar(
                 crate::Scalar::BOOL
                 | crate::Scalar::I32
                 | crate::Scalar::U32
                 | crate::Scalar::F32
-                | crate::Scalar::F64 => {}
-                _ => return Err(OverrideError::TypeNotScalar),
-            },
+                | crate::Scalar::F64,
+            ) => {}
             _ => return Err(OverrideError::TypeNotScalar),
         }
 
diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs
index ceab5b065b..06df610658 100644
--- a/wgpu-hal/examples/raw-gles.rs
+++ b/wgpu-hal/examples/raw-gles.rs
@@ -49,18 +49,19 @@ fn main() {
 
         match event {
             Event::LoopDestroyed => (),
-            Event::WindowEvent { event, .. } => match event {
-                WindowEvent::CloseRequested
-                | WindowEvent::KeyboardInput {
-                    input:
-                        KeyboardInput {
-                            virtual_keycode: Some(VirtualKeyCode::Escape),
-                            ..
-                        },
-                    ..
-                } => *control_flow = ControlFlow::Exit,
-                _ => (),
-            },
+            Event::WindowEvent {
+                event:
+                    WindowEvent::CloseRequested
+                    | WindowEvent::KeyboardInput {
+                        input:
+                            KeyboardInput {
+                                virtual_keycode: Some(VirtualKeyCode::Escape),
+                                ..
+                            },
+                        ..
+                    },
+                ..
+            } => *control_flow = ControlFlow::Exit,
             _ => (),
         }
     });

From c6a3d927345a81eeb13e9e3720002c4cc6f25e54 Mon Sep 17 00:00:00 2001
From: Samson <16504129+sagudev@users.noreply.github.com>
Date: Tue, 13 Aug 2024 10:28:55 +0200
Subject: [PATCH 168/226] `Rg11b10Float` -> `Rg11b10UFloat` and deduplicate
 entries in `TEXTURE_FORMAT_LIST` (#6108)

* Resync `TEXTURE_FORMAT_LIST` to match `TextureFormat`

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>

* `Rg11b10Float` -> `Rg11b10UFloat`

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>

* Add changelog entry

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>

---------

Signed-off-by: sagudev <16504129+sagudev@users.noreply.github.com>
---
 CHANGELOG.md                        |  1 +
 naga/src/back/glsl/features.rs      |  2 +-
 naga/src/back/glsl/mod.rs           |  2 +-
 naga/src/back/hlsl/conv.rs          |  2 +-
 naga/src/back/spv/instructions.rs   |  2 +-
 naga/src/back/wgsl/writer.rs        |  2 +-
 naga/src/front/glsl/parser/types.rs |  2 +-
 naga/src/front/spv/convert.rs       |  2 +-
 naga/src/front/wgsl/parse/conv.rs   |  2 +-
 naga/src/front/wgsl/to_wgsl.rs      |  2 +-
 naga/src/lib.rs                     |  2 +-
 naga/src/proc/mod.rs                |  2 +-
 tests/tests/clear_texture.rs        |  2 +-
 wgpu-core/src/validation.rs         |  6 +++---
 wgpu-hal/src/auxil/dxgi/conv.rs     |  2 +-
 wgpu-hal/src/gles/adapter.rs        |  2 +-
 wgpu-hal/src/gles/conv.rs           |  2 +-
 wgpu-hal/src/metal/adapter.rs       |  4 ++--
 wgpu-hal/src/vulkan/conv.rs         |  2 +-
 wgpu-info/src/texture.rs            | 26 ++++++++++++++++++--------
 wgpu-types/src/lib.rs               | 28 ++++++++++++++--------------
 wgpu/src/backend/webgpu.rs          |  2 +-
 22 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5632defd47..9f0366ea2c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -62,6 +62,7 @@ Bottom level categories:
 ### Changes
 
 - Reduce the amount of debug and trace logs emitted by wgpu-core and wgpu-hal. By @nical in [#6065](https://github.com/gfx-rs/wgpu/issues/6065)
+- `Rg11b10Float` is renamed to `Rg11b10UFloat`. By @sagudev in [#6108](https://github.com/gfx-rs/wgpu/pull/6108)
 
 ### Dependency Updates
 
diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs
index b6ad1738fe..b22bcbe651 100644
--- a/naga/src/back/glsl/features.rs
+++ b/naga/src/back/glsl/features.rs
@@ -399,7 +399,7 @@ impl<'a, W> Writer<'a, W> {
                             | StorageFormat::Rg16Float
                             | StorageFormat::Rgb10a2Uint
                             | StorageFormat::Rgb10a2Unorm
-                            | StorageFormat::Rg11b10Float
+                            | StorageFormat::Rg11b10UFloat
                             | StorageFormat::Rg32Uint
                             | StorageFormat::Rg32Sint
                             | StorageFormat::Rg32Float => {
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index d1c0fd75ec..4c7f8b3251 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -4820,7 +4820,7 @@ fn glsl_storage_format(format: crate::StorageFormat) -> Result<&'static str, Err
         Sf::Rgba8Sint => "rgba8i",
         Sf::Rgb10a2Uint => "rgb10_a2ui",
         Sf::Rgb10a2Unorm => "rgb10_a2",
-        Sf::Rg11b10Float => "r11f_g11f_b10f",
+        Sf::Rg11b10UFloat => "r11f_g11f_b10f",
         Sf::Rg32Uint => "rg32ui",
         Sf::Rg32Sint => "rg32i",
         Sf::Rg32Float => "rg32f",
diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs
index 7d15f43f6c..6c0daf4762 100644
--- a/naga/src/back/hlsl/conv.rs
+++ b/naga/src/back/hlsl/conv.rs
@@ -132,7 +132,7 @@ impl crate::StorageFormat {
             Self::Rg8Sint | Self::Rg16Sint => "int2",
             Self::Rg8Uint | Self::Rg16Uint => "uint2",
 
-            Self::Rg11b10Float => "float3",
+            Self::Rg11b10UFloat => "float3",
 
             Self::Rgba16Float | Self::R32Float | Self::Rg32Float | Self::Rgba32Float => "float4",
             Self::Rgba8Unorm | Self::Bgra8Unorm | Self::Rgba16Unorm | Self::Rgb10a2Unorm => {
diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs
index df2774ab9c..9029c973de 100644
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -1170,7 +1170,7 @@ impl From<crate::StorageFormat> for spirv::ImageFormat {
             Sf::Bgra8Unorm => Self::Unknown,
             Sf::Rgb10a2Uint => Self::Rgb10a2ui,
             Sf::Rgb10a2Unorm => Self::Rgb10A2,
-            Sf::Rg11b10Float => Self::R11fG11fB10f,
+            Sf::Rg11b10UFloat => Self::R11fG11fB10f,
             Sf::Rg32Uint => Self::Rg32ui,
             Sf::Rg32Sint => Self::Rg32i,
             Sf::Rg32Float => Self::Rg32f,
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index 6a069113eb..e5a5e5f647 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -2015,7 +2015,7 @@ const fn storage_format_str(format: crate::StorageFormat) -> &'static str {
         Sf::Bgra8Unorm => "bgra8unorm",
         Sf::Rgb10a2Uint => "rgb10a2uint",
         Sf::Rgb10a2Unorm => "rgb10a2unorm",
-        Sf::Rg11b10Float => "rg11b10float",
+        Sf::Rg11b10UFloat => "rg11b10float",
         Sf::Rg32Uint => "rg32uint",
         Sf::Rg32Sint => "rg32sint",
         Sf::Rg32Float => "rg32float",
diff --git a/naga/src/front/glsl/parser/types.rs b/naga/src/front/glsl/parser/types.rs
index 1b612b298d..d22387f375 100644
--- a/naga/src/front/glsl/parser/types.rs
+++ b/naga/src/front/glsl/parser/types.rs
@@ -397,7 +397,7 @@ fn map_image_format(word: &str) -> Option<crate::StorageFormat> {
         "rgba16f" => Sf::Rgba16Float,
         "rg32f" => Sf::Rg32Float,
         "rg16f" => Sf::Rg16Float,
-        "r11f_g11f_b10f" => Sf::Rg11b10Float,
+        "r11f_g11f_b10f" => Sf::Rg11b10UFloat,
         "r32f" => Sf::R32Float,
         "r16f" => Sf::R16Float,
         "rgba16" => Sf::Rgba16Unorm,
diff --git a/naga/src/front/spv/convert.rs b/naga/src/front/spv/convert.rs
index a6bf0e0451..88d171b5b7 100644
--- a/naga/src/front/spv/convert.rs
+++ b/naga/src/front/spv/convert.rs
@@ -104,7 +104,7 @@ pub(super) fn map_image_format(word: spirv::Word) -> Result<crate::StorageFormat
         Some(spirv::ImageFormat::Rgba8i) => Ok(crate::StorageFormat::Rgba8Sint),
         Some(spirv::ImageFormat::Rgb10a2ui) => Ok(crate::StorageFormat::Rgb10a2Uint),
         Some(spirv::ImageFormat::Rgb10A2) => Ok(crate::StorageFormat::Rgb10a2Unorm),
-        Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10Float),
+        Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10UFloat),
         Some(spirv::ImageFormat::Rg32ui) => Ok(crate::StorageFormat::Rg32Uint),
         Some(spirv::ImageFormat::Rg32i) => Ok(crate::StorageFormat::Rg32Sint),
         Some(spirv::ImageFormat::Rg32f) => Ok(crate::StorageFormat::Rg32Float),
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 80f05db59a..4718b85e5e 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -92,7 +92,7 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<crate::StorageFormat
         "rgba8sint" => Sf::Rgba8Sint,
         "rgb10a2uint" => Sf::Rgb10a2Uint,
         "rgb10a2unorm" => Sf::Rgb10a2Unorm,
-        "rg11b10float" => Sf::Rg11b10Float,
+        "rg11b10float" => Sf::Rg11b10UFloat,
         "rg32uint" => Sf::Rg32Uint,
         "rg32sint" => Sf::Rg32Sint,
         "rg32float" => Sf::Rg32Float,
diff --git a/naga/src/front/wgsl/to_wgsl.rs b/naga/src/front/wgsl/to_wgsl.rs
index 63bc9f7317..ec3af8edd4 100644
--- a/naga/src/front/wgsl/to_wgsl.rs
+++ b/naga/src/front/wgsl/to_wgsl.rs
@@ -175,7 +175,7 @@ impl crate::StorageFormat {
             Sf::Bgra8Unorm => "bgra8unorm",
             Sf::Rgb10a2Uint => "rgb10a2uint",
             Sf::Rgb10a2Unorm => "rgb10a2unorm",
-            Sf::Rg11b10Float => "rg11b10float",
+            Sf::Rg11b10UFloat => "rg11b10float",
             Sf::Rg32Uint => "rg32uint",
             Sf::Rg32Sint => "rg32sint",
             Sf::Rg32Float => "rg32float",
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 4f80345bba..60e5a1f47b 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -615,7 +615,7 @@ pub enum StorageFormat {
     // Packed 32-bit formats
     Rgb10a2Uint,
     Rgb10a2Unorm,
-    Rg11b10Float,
+    Rg11b10UFloat,
 
     // 64-bit formats
     Rg32Uint,
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index 41273c5c72..642c016615 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -48,7 +48,7 @@ impl From<super::StorageFormat> for super::ScalarKind {
             Sf::Bgra8Unorm => Sk::Float,
             Sf::Rgb10a2Uint => Sk::Uint,
             Sf::Rgb10a2Unorm => Sk::Float,
-            Sf::Rg11b10Float => Sk::Float,
+            Sf::Rg11b10UFloat => Sk::Float,
             Sf::Rg32Uint => Sk::Uint,
             Sf::Rg32Sint => Sk::Sint,
             Sf::Rg32Float => Sk::Float,
diff --git a/tests/tests/clear_texture.rs b/tests/tests/clear_texture.rs
index 5e7d86ed88..f62e2be219 100644
--- a/tests/tests/clear_texture.rs
+++ b/tests/tests/clear_texture.rs
@@ -26,7 +26,7 @@ static TEXTURE_FORMATS_UNCOMPRESSED_GLES_COMPAT: &[wgpu::TextureFormat] = &[
     wgpu::TextureFormat::Bgra8UnormSrgb,
     wgpu::TextureFormat::Rgb10a2Uint,
     wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Rg11b10UFloat,
     wgpu::TextureFormat::Rg32Uint,
     wgpu::TextureFormat::Rg32Sint,
     wgpu::TextureFormat::Rg32Float,
diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs
index 8fc6340eb1..ea2608d755 100644
--- a/wgpu-core/src/validation.rs
+++ b/wgpu-core/src/validation.rs
@@ -275,7 +275,7 @@ fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option<naga::Storag
 
         Tf::Rgb10a2Uint => Sf::Rgb10a2Uint,
         Tf::Rgb10a2Unorm => Sf::Rgb10a2Unorm,
-        Tf::Rg11b10Float => Sf::Rg11b10Float,
+        Tf::Rg11b10UFloat => Sf::Rg11b10UFloat,
 
         Tf::Rg32Uint => Sf::Rg32Uint,
         Tf::Rg32Sint => Sf::Rg32Sint,
@@ -331,7 +331,7 @@ fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureForm
 
         Sf::Rgb10a2Uint => Tf::Rgb10a2Uint,
         Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm,
-        Sf::Rg11b10Float => Tf::Rg11b10Float,
+        Sf::Rg11b10UFloat => Tf::Rg11b10UFloat,
 
         Sf::Rg32Uint => Tf::Rg32Uint,
         Sf::Rg32Sint => Tf::Rg32Sint,
@@ -658,7 +658,7 @@ impl NumericType {
             Tf::Rgba8Sint | Tf::Rgba16Sint | Tf::Rgba32Sint => {
                 (NumericDimension::Vector(Vs::Quad), Scalar::I32)
             }
-            Tf::Rg11b10Float => (NumericDimension::Vector(Vs::Tri), Scalar::F32),
+            Tf::Rg11b10UFloat => (NumericDimension::Vector(Vs::Tri), Scalar::F32),
             Tf::Stencil8
             | Tf::Depth16Unorm
             | Tf::Depth32Float
diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs
index e5162362f7..d84e082df1 100644
--- a/wgpu-hal/src/auxil/dxgi/conv.rs
+++ b/wgpu-hal/src/auxil/dxgi/conv.rs
@@ -44,7 +44,7 @@ pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgifor
         Tf::Rgb9e5Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP,
         Tf::Rgb10a2Uint => DXGI_FORMAT_R10G10B10A2_UINT,
         Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM,
-        Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT,
+        Tf::Rg11b10UFloat => DXGI_FORMAT_R11G11B10_FLOAT,
         Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT,
         Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT,
         Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT,
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index bd2410e273..e7ecacebe0 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -1097,7 +1097,7 @@ impl crate::Adapter for super::Adapter {
             Tf::Rgba8Sint => renderable | storage,
             Tf::Rgb10a2Uint => renderable,
             Tf::Rgb10a2Unorm => filterable_renderable,
-            Tf::Rg11b10Float => filterable | float_renderable,
+            Tf::Rg11b10UFloat => filterable | float_renderable,
             Tf::Rg32Uint => renderable,
             Tf::Rg32Sint => renderable,
             Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear,
diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs
index a6c924f162..8733d54957 100644
--- a/wgpu-hal/src/gles/conv.rs
+++ b/wgpu-hal/src/gles/conv.rs
@@ -45,7 +45,7 @@ impl super::AdapterShared {
                 glow::RGBA,
                 glow::UNSIGNED_INT_2_10_10_10_REV,
             ),
-            Tf::Rg11b10Float => (
+            Tf::Rg11b10UFloat => (
                 glow::R11F_G11F_B10F,
                 glow::RGB,
                 glow::UNSIGNED_INT_10F_11F_11F_REV,
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index 7e0043790c..5ef6d358b8 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -178,7 +178,7 @@ impl crate::Adapter for super::Adapter {
                 flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all);
                 flags
             }
-            Tf::Rg11b10Float => {
+            Tf::Rg11b10UFloat => {
                 let mut flags = all_caps;
                 flags.set(Tfc::STORAGE, pc.format_rg11b10_all);
                 flags
@@ -1036,7 +1036,7 @@ impl super::PrivateCapabilities {
             Tf::Rgba8Sint => RGBA8Sint,
             Tf::Rgb10a2Uint => RGB10A2Uint,
             Tf::Rgb10a2Unorm => RGB10A2Unorm,
-            Tf::Rg11b10Float => RG11B10Float,
+            Tf::Rg11b10UFloat => RG11B10Float,
             Tf::Rg32Uint => RG32Uint,
             Tf::Rg32Sint => RG32Sint,
             Tf::Rg32Float => RG32Float,
diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs
index fe284f32a9..87757c42d5 100644
--- a/wgpu-hal/src/vulkan/conv.rs
+++ b/wgpu-hal/src/vulkan/conv.rs
@@ -36,7 +36,7 @@ impl super::PrivateCapabilities {
             Tf::Rgba8Sint => F::R8G8B8A8_SINT,
             Tf::Rgb10a2Uint => F::A2B10G10R10_UINT_PACK32,
             Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32,
-            Tf::Rg11b10Float => F::B10G11R11_UFLOAT_PACK32,
+            Tf::Rg11b10UFloat => F::B10G11R11_UFLOAT_PACK32,
             Tf::Rg32Uint => F::R32G32_UINT,
             Tf::Rg32Sint => F::R32G32_SINT,
             Tf::Rg32Float => F::R32G32_SFLOAT,
diff --git a/wgpu-info/src/texture.rs b/wgpu-info/src/texture.rs
index b6f79c0482..40771d067d 100644
--- a/wgpu-info/src/texture.rs
+++ b/wgpu-info/src/texture.rs
@@ -1,6 +1,6 @@
 // Lets keep these on one line
 #[rustfmt::skip]
-pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
+pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [
     wgpu::TextureFormat::R8Unorm,
     wgpu::TextureFormat::R8Snorm,
     wgpu::TextureFormat::R8Uint,
@@ -29,9 +29,10 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Rgba8Sint,
     wgpu::TextureFormat::Bgra8Unorm,
     wgpu::TextureFormat::Bgra8UnormSrgb,
+    wgpu::TextureFormat::Rgb9e5Ufloat,
     wgpu::TextureFormat::Rgb10a2Uint,
     wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Rg11b10UFloat,
     wgpu::TextureFormat::Rg32Uint,
     wgpu::TextureFormat::Rg32Sint,
     wgpu::TextureFormat::Rg32Float,
@@ -45,14 +46,10 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Rgba32Float,
     wgpu::TextureFormat::Stencil8,
     wgpu::TextureFormat::Depth16Unorm,
-    wgpu::TextureFormat::Depth32Float,
-    wgpu::TextureFormat::Depth32FloatStencil8,
     wgpu::TextureFormat::Depth24Plus,
     wgpu::TextureFormat::Depth24PlusStencil8,
-    wgpu::TextureFormat::Rgb9e5Ufloat,
-    wgpu::TextureFormat::Rgb10a2Uint,
-    wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Depth32Float,
+    wgpu::TextureFormat::Depth32FloatStencil8,
     wgpu::TextureFormat::NV12,
     wgpu::TextureFormat::Bc1RgbaUnorm,
     wgpu::TextureFormat::Bc1RgbaUnormSrgb,
@@ -122,6 +119,19 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Astc { block: wgpu::AstcBlock::B12x12, channel: wgpu::AstcChannel::Hdr },
 ];
 
+#[test]
+fn test_uniqueness_in_texture_format_list() {
+    use std::collections::HashSet;
+
+    let uniq: HashSet<wgpu::TextureFormat> = TEXTURE_FORMAT_LIST.into_iter().collect();
+    let mut duplicated = TEXTURE_FORMAT_LIST.to_vec();
+    uniq.iter().for_each(|u| {
+        let first_occurrence = duplicated.iter().position(|el| u == el).unwrap();
+        duplicated.remove(first_occurrence);
+    });
+    assert_eq!(duplicated, vec![]);
+}
+
 pub fn max_texture_format_string_size() -> usize {
     TEXTURE_FORMAT_LIST
         .into_iter()
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 6cf007e2f9..de64dfa120 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -399,7 +399,7 @@ bitflags::bitflags! {
         const SHADER_F16 = 1 << 8;
 
 
-        /// Allows for usage of textures of format [`TextureFormat::Rg11b10Float`] as a render target
+        /// Allows for usage of textures of format [`TextureFormat::Rg11b10UFloat`] as a render target
         ///
         /// Supported platforms:
         /// - Vulkan
@@ -2515,7 +2515,7 @@ pub enum TextureFormat {
     /// Red, green, blue, and alpha channels. 10 bit integer for RGB channels, 2 bit integer for alpha channel. [0, 1023] ([0, 3] for alpha) converted to/from float [0, 1] in shader.
     Rgb10a2Unorm,
     /// Red, green, and blue channels. 11 bit float with no sign bit for RG channels. 10 bit float with no sign bit for blue channel. Float in shader.
-    Rg11b10Float,
+    Rg11b10UFloat,
 
     // Normal 64 bit formats
     /// Red and green channels. 32 bit integer per channel. Unsigned in shader.
@@ -2803,7 +2803,7 @@ impl<'de> Deserialize<'de> for TextureFormat {
                     "bgra8unorm-srgb" => TextureFormat::Bgra8UnormSrgb,
                     "rgb10a2uint" => TextureFormat::Rgb10a2Uint,
                     "rgb10a2unorm" => TextureFormat::Rgb10a2Unorm,
-                    "rg11b10ufloat" => TextureFormat::Rg11b10Float,
+                    "rg11b10ufloat" => TextureFormat::Rg11b10UFloat,
                     "rg32uint" => TextureFormat::Rg32Uint,
                     "rg32sint" => TextureFormat::Rg32Sint,
                     "rg32float" => TextureFormat::Rg32Float,
@@ -2931,7 +2931,7 @@ impl Serialize for TextureFormat {
             TextureFormat::Bgra8UnormSrgb => "bgra8unorm-srgb",
             TextureFormat::Rgb10a2Uint => "rgb10a2uint",
             TextureFormat::Rgb10a2Unorm => "rgb10a2unorm",
-            TextureFormat::Rg11b10Float => "rg11b10ufloat",
+            TextureFormat::Rg11b10UFloat => "rg11b10ufloat",
             TextureFormat::Rg32Uint => "rg32uint",
             TextureFormat::Rg32Sint => "rg32sint",
             TextureFormat::Rg32Float => "rg32float",
@@ -3161,7 +3161,7 @@ impl TextureFormat {
             | Self::Rgb9e5Ufloat
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float
+            | Self::Rg11b10UFloat
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
@@ -3266,7 +3266,7 @@ impl TextureFormat {
             | Self::Rgb9e5Ufloat
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float
+            | Self::Rg11b10UFloat
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
@@ -3384,7 +3384,7 @@ impl TextureFormat {
             Self::Bgra8UnormSrgb =>       (msaa_resolve, attachment),
             Self::Rgb10a2Uint =>          (        msaa, attachment),
             Self::Rgb10a2Unorm =>         (msaa_resolve, attachment),
-            Self::Rg11b10Float =>         (        msaa,   rg11b10f),
+            Self::Rg11b10UFloat =>         (        msaa,   rg11b10f),
             Self::Rg32Uint =>             (        noaa,  all_flags),
             Self::Rg32Sint =>             (        noaa,  all_flags),
             Self::Rg32Float =>            (        noaa,  all_flags),
@@ -3494,7 +3494,7 @@ impl TextureFormat {
             | Self::Rg16Float
             | Self::Rgba16Float
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(float),
+            | Self::Rg11b10UFloat => Some(float),
 
             Self::R32Float | Self::Rg32Float | Self::Rgba32Float => Some(float32_sample_type),
 
@@ -3624,7 +3624,7 @@ impl TextureFormat {
             | Self::Rg16Sint
             | Self::Rg16Float => Some(4),
             Self::R32Uint | Self::R32Sint | Self::R32Float => Some(4),
-            Self::Rgb9e5Ufloat | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10Float => {
+            Self::Rgb9e5Ufloat | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10UFloat => {
                 Some(4)
             }
 
@@ -3726,7 +3726,7 @@ impl TextureFormat {
             | Self::Rg32Float
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(8),
+            | Self::Rg11b10UFloat => Some(8),
             Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16),
             Self::Stencil8
             | Self::Depth16Unorm
@@ -3808,7 +3808,7 @@ impl TextureFormat {
             | Self::Rgba32Float
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(4),
+            | Self::Rg11b10UFloat => Some(4),
             Self::Stencil8
             | Self::Depth16Unorm
             | Self::Depth24Plus
@@ -3897,7 +3897,7 @@ impl TextureFormat {
             | Self::Rgba32Sint
             | Self::Rgba32Float => 4,
 
-            Self::Rgb9e5Ufloat | Self::Rg11b10Float => 3,
+            Self::Rgb9e5Ufloat | Self::Rg11b10UFloat => 3,
             Self::Rgb10a2Uint | Self::Rgb10a2Unorm => 4,
 
             Self::Stencil8 | Self::Depth16Unorm | Self::Depth24Plus | Self::Depth32Float => 1,
@@ -4112,7 +4112,7 @@ fn texture_format_serialize() {
         "\"rgb10a2unorm\"".to_string()
     );
     assert_eq!(
-        serde_json::to_string(&TextureFormat::Rg11b10Float).unwrap(),
+        serde_json::to_string(&TextureFormat::Rg11b10UFloat).unwrap(),
         "\"rg11b10ufloat\"".to_string()
     );
     assert_eq!(
@@ -4409,7 +4409,7 @@ fn texture_format_deserialize() {
     );
     assert_eq!(
         serde_json::from_str::<TextureFormat>("\"rg11b10ufloat\"").unwrap(),
-        TextureFormat::Rg11b10Float
+        TextureFormat::Rg11b10UFloat
     );
     assert_eq!(
         serde_json::from_str::<TextureFormat>("\"rg32uint\"").unwrap(),
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index d008093132..6865c439a1 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -261,7 +261,7 @@ fn map_texture_format(texture_format: wgt::TextureFormat) -> webgpu_sys::GpuText
             unimplemented!("Current version of web_sys is missing {texture_format:?}")
         }
         TextureFormat::Rgb10a2Unorm => tf::Rgb10a2unorm,
-        TextureFormat::Rg11b10Float => tf::Rg11b10ufloat,
+        TextureFormat::Rg11b10UFloat => tf::Rg11b10ufloat,
         // 64-bit formats
         TextureFormat::Rg32Uint => tf::Rg32uint,
         TextureFormat::Rg32Sint => tf::Rg32sint,

From d2508d9ad67562545ef41d72d860aabd6b833d6d Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Thu, 11 Jul 2024 00:35:36 +0200
Subject: [PATCH 169/226] introduce DynResource & DynBuffer as first user

---
 wgpu-hal/src/dx12/mod.rs    | 21 ++++++++++
 wgpu-hal/src/dynamic/mod.rs | 83 +++++++++++++++++++++++++++++++++++++
 wgpu-hal/src/empty.rs       |  4 ++
 wgpu-hal/src/gles/mod.rs    | 20 +++++++++
 wgpu-hal/src/lib.rs         |  7 +++-
 wgpu-hal/src/metal/mod.rs   | 20 +++++++++
 wgpu-hal/src/vulkan/mod.rs  | 21 ++++++++++
 7 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 wgpu-hal/src/dynamic/mod.rs

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 8d08b8f72d..165a970efd 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -87,6 +87,25 @@ impl crate::Api for Api {
     type AccelerationStructure = AccelerationStructure;
 }
 
+crate::impl_dyn_resource!(
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Fence,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
 const MAX_ROOT_ELEMENTS: usize = 64;
 const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
@@ -407,6 +426,8 @@ pub struct Buffer {
 unsafe impl Send for Buffer {}
 unsafe impl Sync for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 impl crate::BufferBinding<'_, Api> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
new file mode 100644
index 0000000000..9b0ffe6d1c
--- /dev/null
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -0,0 +1,83 @@
+use std::any::Any;
+
+use wgt::WasmNotSendSync;
+
+/// Base trait for all resources, allows downcasting via [`Any`].
+pub trait DynResource: Any + WasmNotSendSync + 'static {
+    fn as_any(&self) -> &dyn Any;
+    fn as_any_mut(&mut self) -> &mut dyn Any;
+}
+
+/// Utility macro for implementing `DynResource` for a list of types.
+macro_rules! impl_dyn_resource {
+    ($($type:ty),*) => {
+        $(
+            impl crate::DynResource for $type {
+                fn as_any(&self) -> &dyn ::std::any::Any {
+                    self
+                }
+
+                fn as_any_mut(&mut self) -> &mut dyn ::std::any::Any {
+                    self
+                }
+            }
+        )*
+    };
+}
+pub(crate) use impl_dyn_resource;
+
+/// Extension trait for `DynResource` used by implementations of various dynamic resource traits.
+trait DynResourceExt {
+    /// # Panics
+    ///
+    /// - Panics if `self` is not downcastable to `T`.
+    fn expect_downcast_ref<T: DynResource>(&self) -> &T;
+    /// # Panics
+    ///
+    /// - Panics if `self` is not downcastable to `T`.
+    fn expect_downcast_mut<T: DynResource>(&mut self) -> &mut T;
+
+    /// Unboxes a `Box<dyn DynResource>` to a concrete type.
+    ///
+    /// # Safety
+    ///
+    /// - `self` must be the correct concrete type.
+    unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T;
+}
+
+impl<R: DynResource + ?Sized> DynResourceExt for R {
+    fn expect_downcast_ref<'a, T: DynResource>(&'a self) -> &'a T {
+        self.as_any()
+            .downcast_ref()
+            .expect("Resource doesn't have the expected backend type.")
+    }
+
+    fn expect_downcast_mut<'a, T: DynResource>(&'a mut self) -> &'a mut T {
+        self.as_any_mut()
+            .downcast_mut()
+            .expect("Resource doesn't have the expected backend type.")
+    }
+
+    unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T {
+        debug_assert!(
+            <Self as Any>::type_id(self.as_ref()) == std::any::TypeId::of::<T>(),
+            "Resource doesn't have the expected type, expected {:?}, got {:?}",
+            std::any::TypeId::of::<T>(),
+            <Self as Any>::type_id(self.as_ref())
+        );
+
+        let casted_ptr = Box::into_raw(self).cast::<T>();
+        // SAFETY: This is adheres to the safety contract of `Box::from_raw` because:
+        //
+        // - We are casting the value of a previously `Box`ed value, which guarantees:
+        //   - `casted_ptr` is not null.
+        //   - `casted_ptr` is valid for reads and writes, though by itself this does not mean
+        //     valid reads and writes for `T` (read on for that).
+        // - We don't change the allocator.
+        // - The contract of `Box::from_raw` requires that an initialized and aligned `T` is stored
+        //   within `casted_ptr`.
+        *unsafe { Box::from_raw(casted_ptr) }
+    }
+}
+
+pub trait DynBuffer: DynResource + std::fmt::Debug {}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 956b7b08a5..4cee4501ea 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -40,6 +40,10 @@ impl crate::Api for Api {
     type ComputePipeline = Resource;
 }
 
+crate::impl_dyn_resource!(Context, Encoder, Resource);
+
+impl crate::DynBuffer for Resource {}
+
 impl crate::Instance for Context {
     type A = Api;
 
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 459600df7e..bf04c3a63c 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -164,6 +164,24 @@ impl crate::Api for Api {
     type ComputePipeline = ComputePipeline;
 }
 
+crate::impl_dyn_resource!(
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Fence,
+    PipelineLayout,
+    QuerySet,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 bitflags::bitflags! {
     /// Flags that affect internal code paths but do not
     /// change the exposed feature set.
@@ -307,6 +325,8 @@ unsafe impl Sync for Buffer {}
 #[cfg(send_sync)]
 unsafe impl Send for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 #[derive(Clone, Debug)]
 pub enum TextureInner {
     Renderbuffer {
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index bd60b029e0..89053b2a74 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -262,6 +262,11 @@ pub mod api {
     pub use super::vulkan::Api as Vulkan;
 }
 
+mod dynamic;
+
+pub use dynamic::DynBuffer;
+pub(crate) use dynamic::{impl_dyn_resource, DynResource};
+
 use std::{
     borrow::{Borrow, Cow},
     fmt,
@@ -399,7 +404,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// [`CommandEncoder`]: Api::CommandEncoder
     type CommandBuffer: WasmNotSendSync + fmt::Debug;
 
-    type Buffer: fmt::Debug + WasmNotSendSync + 'static;
+    type Buffer: DynBuffer;
     type Texture: fmt::Debug + WasmNotSendSync + 'static;
     type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>;
     type TextureView: fmt::Debug + WasmNotSendSync;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 0003983706..c57137d73a 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -71,6 +71,24 @@ impl crate::Api for Api {
     type AccelerationStructure = AccelerationStructure;
 }
 
+crate::impl_dyn_resource!(
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Fence,
+    PipelineLayout,
+    QuerySet,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 pub struct Instance {
     managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate,
 }
@@ -460,6 +478,8 @@ pub struct Buffer {
 unsafe impl Send for Buffer {}
 unsafe impl Sync for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 impl Buffer {
     fn as_raw(&self) -> BufferPtr {
         unsafe { NonNull::new_unchecked(self.raw.as_ptr()) }
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index d4be64548a..f329551771 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -78,6 +78,25 @@ impl crate::Api for Api {
     type ComputePipeline = ComputePipeline;
 }
 
+crate::impl_dyn_resource!(
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Fence,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 struct DebugUtils {
     extension: ext::debug_utils::Instance,
     messenger: vk::DebugUtilsMessengerEXT,
@@ -631,6 +650,8 @@ pub struct Buffer {
     block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
 }
 
+impl crate::DynBuffer for Buffer {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure {
     raw: vk::AccelerationStructureKHR,

From cda9d9af65636b68a1f1a0c310da5f020e7bddb6 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 14 Jul 2024 23:05:38 +0200
Subject: [PATCH 170/226] Buffer bindings no longer depend on hal api struct,
 but directly on buffer type

---
 wgpu-core/src/device/resource.rs |  2 +-
 wgpu-hal/src/dx12/command.rs     |  4 ++--
 wgpu-hal/src/dx12/mod.rs         |  2 +-
 wgpu-hal/src/empty.rs            |  8 ++++++--
 wgpu-hal/src/gles/command.rs     |  4 ++--
 wgpu-hal/src/lib.rs              | 16 ++++++++++------
 wgpu-hal/src/metal/command.rs    |  4 ++--
 wgpu-hal/src/metal/mod.rs        |  2 +-
 wgpu-hal/src/vulkan/command.rs   |  4 ++--
 9 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 927358ea2c..de21f0a39a 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1889,7 +1889,7 @@ impl<A: HalApi> Device<A> {
         used: &mut BindGroupStates<A>,
         limits: &wgt::Limits,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::BufferBinding<'a, A>, binding_model::CreateBindGroupError> {
+    ) -> Result<hal::BufferBinding<'a, A::Buffer>, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
         let (binding_ty, dynamic, min_size) = match decl.ty {
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index 5e05a3bcf5..ad7c70e285 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -970,7 +970,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         self.list.as_ref().unwrap().set_index_buffer(
@@ -982,7 +982,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let vb = &mut self.pass.vertex_buffers[index as usize];
         vb.BufferLocation = binding.resolve_address();
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 165a970efd..d3a9bd0c65 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -428,7 +428,7 @@ unsafe impl Sync for Buffer {}
 
 impl crate::DynBuffer for Buffer {}
 
-impl crate::BufferBinding<'_, Api> {
+impl crate::BufferBinding<'_, Buffer> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
             Some(size) => size.get(),
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 4cee4501ea..65cc488b46 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -391,11 +391,15 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, Api>,
+        binding: crate::BufferBinding<'a, Resource>,
         format: wgt::IndexFormat,
     ) {
     }
-    unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) {
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: crate::BufferBinding<'a, Resource>,
+    ) {
     }
     unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {}
     unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {}
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index 2fcbc7cffe..daad78e52b 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -978,7 +978,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         self.state.index_offset = binding.offset;
@@ -990,7 +990,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         self.state.dirty_vbuf_mask |= 1 << index;
         let (_, ref mut vb) = self.state.vertex_buffers[index as usize];
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 89053b2a74..d8c658c071 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1235,10 +1235,14 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: BufferBinding<'a, Self::A>,
+        binding: BufferBinding<'a, <Self::A as Api>::Buffer>,
         format: wgt::IndexFormat,
     );
-    unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: BufferBinding<'a, Self::A>);
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, <Self::A as Api>::Buffer>,
+    );
     unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>);
     unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>);
     unsafe fn set_stencil_reference(&mut self, value: u32);
@@ -1736,9 +1740,9 @@ pub struct PipelineLayoutDescriptor<'a, A: Api> {
 }
 
 #[derive(Debug)]
-pub struct BufferBinding<'a, A: Api> {
+pub struct BufferBinding<'a, B: DynBuffer + ?Sized> {
     /// The buffer being bound.
-    pub buffer: &'a A::Buffer,
+    pub buffer: &'a B,
 
     /// The offset at which the bound region starts.
     ///
@@ -1762,7 +1766,7 @@ pub struct BufferBinding<'a, A: Api> {
 }
 
 // Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for BufferBinding<'_, A> {
+impl<B: DynBuffer> Clone for BufferBinding<'_, B> {
     fn clone(&self) -> Self {
         Self {
             buffer: self.buffer,
@@ -1808,7 +1812,7 @@ pub struct BindGroupEntry {
 pub struct BindGroupDescriptor<'a, A: Api> {
     pub label: Label<'a>,
     pub layout: &'a A::BindGroupLayout,
-    pub buffers: &'a [BufferBinding<'a, A>],
+    pub buffers: &'a [BufferBinding<'a, A::Buffer>],
     pub samplers: &'a [&'a A::Sampler],
     pub textures: &'a [TextureBinding<'a, A>],
     pub entries: &'a [BindGroupEntry],
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index fafe3478fd..5aeb8fa524 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -915,7 +915,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         let (stride, raw_type) = match format {
@@ -933,7 +933,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64;
         let encoder = self.state.render.as_ref().unwrap();
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index c57137d73a..a7937731d2 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -486,7 +486,7 @@ impl Buffer {
     }
 }
 
-impl crate::BufferBinding<'_, Api> {
+impl crate::BufferBinding<'_, Buffer> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
             Some(size) => size.get(),
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 4f36e6f86c..2f24efeace 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -870,7 +870,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         unsafe {
@@ -885,7 +885,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let vk_buffers = [binding.buffer.raw];
         let vk_offsets = [binding.offset];

From a4d9d38d035a7624a828753987379b642402fad7 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 14 Jul 2024 23:18:18 +0200
Subject: [PATCH 171/226] BufferBarrier no longer depend on hal api struct, but
 directly on buffer type

---
 wgpu-core/src/track/buffer.rs  | 4 ++--
 wgpu-core/src/track/mod.rs     | 2 +-
 wgpu-hal/src/dx12/command.rs   | 2 +-
 wgpu-hal/src/empty.rs          | 2 +-
 wgpu-hal/src/gles/command.rs   | 2 +-
 wgpu-hal/src/lib.rs            | 6 +++---
 wgpu-hal/src/metal/command.rs  | 2 +-
 wgpu-hal/src/vulkan/command.rs | 2 +-
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index 8fdcf31674..0f2bc8cef9 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -284,7 +284,7 @@ impl<A: HalApi> BufferTracker<A> {
     pub fn drain_transitions<'a, 'b: 'a>(
         &'b mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, A::Buffer>> {
         let buffer_barriers = self.temp.drain(..).map(|pending| {
             let buf = unsafe { self.metadata.get_resource_unchecked(pending.id as _) };
             pending.into_hal(buf, snatch_guard)
@@ -557,7 +557,7 @@ impl<A: HalApi> DeviceBufferTracker<A> {
         &'a mut self,
         tracker: &'a BufferTracker<A>,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, A::Buffer>> {
         for index in tracker.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index bb0d8cee78..5f7a868251 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -261,7 +261,7 @@ impl PendingTransition<hal::BufferUses> {
         self,
         buf: &'a resource::Buffer<A>,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> hal::BufferBarrier<'a, A> {
+    ) -> hal::BufferBarrier<'a, A::Buffer> {
         let buffer = buf.raw(snatch_guard).expect("Buffer is destroyed");
         hal::BufferBarrier {
             buffer,
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index ad7c70e285..c5978e55e4 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -310,7 +310,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         self.temp.barriers.clear();
 
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 65cc488b46..c4e2af8567 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -301,7 +301,7 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, Resource>>,
     {
     }
 
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index daad78e52b..e7e3714038 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -273,7 +273,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         if !self
             .private_caps
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index d8c658c071..a3a21f7bf9 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1103,7 +1103,7 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = BufferBarrier<'a, Self::A>>;
+        T: Iterator<Item = BufferBarrier<'a, <Self::A as Api>::Buffer>>;
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
@@ -1975,8 +1975,8 @@ pub struct Rect<T> {
 }
 
 #[derive(Debug, Clone)]
-pub struct BufferBarrier<'a, A: Api> {
-    pub buffer: &'a A::Buffer,
+pub struct BufferBarrier<'a, B: DynBuffer + ?Sized> {
+    pub buffer: &'a B,
     pub usage: Range<BufferUses>,
 }
 
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index 5aeb8fa524..db19727a5f 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -241,7 +241,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
     }
 
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 2f24efeace..ddce6c24ed 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -116,7 +116,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         //Note: this is done so that we never end up with empty stage flags
         let mut src_stages = vk::PipelineStageFlags::TOP_OF_PIPE;

From 256ec6e447170b012f86c2cb22d64362b31205d0 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 14 Jul 2024 23:13:55 +0200
Subject: [PATCH 172/226] Introduce DynCommandEncoder, implement index & vertex
 buffer ops on it

---
 wgpu-hal/src/dynamic/command.rs | 37 +++++++++++++++++++++++++++++++++
 wgpu-hal/src/dynamic/mod.rs     | 16 ++++++++++++++
 wgpu-hal/src/lib.rs             |  4 ++--
 3 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 wgpu-hal/src/dynamic/command.rs

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
new file mode 100644
index 0000000000..899bd805a6
--- /dev/null
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -0,0 +1,37 @@
+use crate::{BufferBinding, CommandEncoder};
+
+use super::DynBuffer;
+
+pub trait DynCommandEncoder {
+    unsafe fn set_index_buffer<'a>(
+        &mut self,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+        format: wgt::IndexFormat,
+    );
+
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+    );
+}
+
+impl<C: CommandEncoder> DynCommandEncoder for C {
+    unsafe fn set_index_buffer<'a>(
+        &mut self,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+        format: wgt::IndexFormat,
+    ) {
+        let binding = binding.expect_downcast();
+        unsafe { self.set_index_buffer(binding, format) };
+    }
+
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+    ) {
+        let binding = binding.expect_downcast();
+        unsafe { self.set_vertex_buffer(index, binding) };
+    }
+}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 9b0ffe6d1c..f93cc82a9a 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -1,7 +1,13 @@
+mod command;
+
+pub use self::command::DynCommandEncoder;
+
 use std::any::Any;
 
 use wgt::WasmNotSendSync;
 
+use crate::BufferBinding;
+
 /// Base trait for all resources, allows downcasting via [`Any`].
 pub trait DynResource: Any + WasmNotSendSync + 'static {
     fn as_any(&self) -> &dyn Any;
@@ -81,3 +87,13 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
 }
 
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
+
+impl<'a> BufferBinding<'a, dyn DynBuffer> {
+    pub fn expect_downcast<B: DynBuffer>(self) -> BufferBinding<'a, B> {
+        BufferBinding {
+            buffer: self.buffer.expect_downcast_ref(),
+            offset: self.offset,
+            size: self.size,
+        }
+    }
+}
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index a3a21f7bf9..ba943c5f8b 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -264,8 +264,8 @@ pub mod api {
 
 mod dynamic;
 
-pub use dynamic::DynBuffer;
 pub(crate) use dynamic::{impl_dyn_resource, DynResource};
+pub use dynamic::{DynBuffer, DynCommandEncoder};
 
 use std::{
     borrow::{Borrow, Cow},
@@ -392,7 +392,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type Device: Device<A = Self>;
 
     type Queue: Queue<A = Self>;
-    type CommandEncoder: CommandEncoder<A = Self>;
+    type CommandEncoder: DynCommandEncoder + CommandEncoder<A = Self>;
 
     /// This API's command buffer type.
     ///

From 9b20fd8507767fb1edac0e5203e3380efecd4a6b Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 15 Jul 2024 20:05:31 +0200
Subject: [PATCH 173/226] DynCommandEncoder implement begin/end encoding, debug
 markers, various buffer operations

---
 wgpu-hal/src/dynamic/command.rs | 79 +++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 3 deletions(-)

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 899bd805a6..fbeedd1753 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,8 +1,29 @@
-use crate::{BufferBinding, CommandEncoder};
+use crate::{
+    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, Label, MemoryRange,
+};
 
-use super::DynBuffer;
+use super::{DynBuffer, DynResourceExt as _};
+
+pub trait DynCommandEncoder: std::fmt::Debug {
+    unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>;
+
+    unsafe fn discard_encoding(&mut self);
+
+    unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]);
+
+    unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange);
+
+    unsafe fn copy_buffer_to_buffer(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynBuffer,
+        regions: &[BufferCopy],
+    );
+
+    unsafe fn insert_debug_marker(&mut self, label: &str);
+    unsafe fn begin_debug_marker(&mut self, group_label: &str);
+    unsafe fn end_debug_marker(&mut self);
 
-pub trait DynCommandEncoder {
     unsafe fn set_index_buffer<'a>(
         &mut self,
         binding: BufferBinding<'a, dyn DynBuffer>,
@@ -17,6 +38,58 @@ pub trait DynCommandEncoder {
 }
 
 impl<C: CommandEncoder> DynCommandEncoder for C {
+    unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError> {
+        unsafe { C::begin_encoding(self, label) }
+    }
+
+    unsafe fn discard_encoding(&mut self) {
+        unsafe { C::discard_encoding(self) }
+    }
+
+    unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]) {
+        let barriers = barriers.iter().map(|barrier| BufferBarrier {
+            buffer: barrier.buffer.expect_downcast_ref(),
+            usage: barrier.usage.clone(),
+        });
+        unsafe { self.transition_buffers(barriers) };
+    }
+
+    unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::clear_buffer(self, buffer, range) };
+    }
+
+    unsafe fn copy_buffer_to_buffer(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynBuffer,
+        regions: &[BufferCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_buffer_to_buffer(self, src, dst, regions.iter().copied());
+        }
+    }
+
+    unsafe fn insert_debug_marker(&mut self, label: &str) {
+        unsafe {
+            C::insert_debug_marker(self, label);
+        }
+    }
+
+    unsafe fn begin_debug_marker(&mut self, group_label: &str) {
+        unsafe {
+            C::begin_debug_marker(self, group_label);
+        }
+    }
+
+    unsafe fn end_debug_marker(&mut self) {
+        unsafe {
+            C::end_debug_marker(self);
+        }
+    }
+
     unsafe fn set_index_buffer<'a>(
         &mut self,
         binding: BufferBinding<'a, dyn DynBuffer>,

From f8871e6ed1c85be9bce0edcd6472e219f1b4231c Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 15 Jul 2024 20:05:31 +0200
Subject: [PATCH 174/226] introduce DynQuerySet, associated DynCommandEncoder
 methods

---
 wgpu-hal/src/dx12/mod.rs        |  2 ++
 wgpu-hal/src/dynamic/command.rs | 51 ++++++++++++++++++++++++++++++++-
 wgpu-hal/src/dynamic/mod.rs     |  1 +
 wgpu-hal/src/empty.rs           |  1 +
 wgpu-hal/src/gles/mod.rs        |  2 ++
 wgpu-hal/src/lib.rs             |  4 +--
 wgpu-hal/src/metal/mod.rs       |  2 ++
 wgpu-hal/src/vulkan/mod.rs      |  2 ++
 8 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index d3a9bd0c65..21fd1e367c 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -508,6 +508,8 @@ pub struct QuerySet {
     raw_ty: d3d12_ty::D3D12_QUERY_TYPE,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 unsafe impl Send for QuerySet {}
 unsafe impl Sync for QuerySet {}
 
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index fbeedd1753..ef4cffb2b9 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,5 +1,8 @@
+use std::ops::Range;
+
 use crate::{
-    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, Label, MemoryRange,
+    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, DynQuerySet, Label,
+    MemoryRange,
 };
 
 use super::{DynBuffer, DynResourceExt as _};
@@ -24,6 +27,19 @@ pub trait DynCommandEncoder: std::fmt::Debug {
     unsafe fn begin_debug_marker(&mut self, group_label: &str);
     unsafe fn end_debug_marker(&mut self);
 
+    unsafe fn begin_query(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn end_query(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn write_timestamp(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn reset_queries(&mut self, set: &dyn DynQuerySet, range: Range<u32>);
+    unsafe fn copy_query_results(
+        &mut self,
+        set: &dyn DynQuerySet,
+        range: Range<u32>,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        stride: wgt::BufferSize,
+    );
+
     unsafe fn set_index_buffer<'a>(
         &mut self,
         binding: BufferBinding<'a, dyn DynBuffer>,
@@ -90,6 +106,39 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         }
     }
 
+    unsafe fn begin_query(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::begin_query(self, set, index) };
+    }
+
+    unsafe fn end_query(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::end_query(self, set, index) };
+    }
+
+    unsafe fn write_timestamp(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::write_timestamp(self, set, index) };
+    }
+
+    unsafe fn reset_queries(&mut self, set: &dyn DynQuerySet, range: Range<u32>) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::reset_queries(self, set, range) };
+    }
+
+    unsafe fn copy_query_results(
+        &mut self,
+        set: &dyn DynQuerySet,
+        range: Range<u32>,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        stride: wgt::BufferSize,
+    ) {
+        let set = set.expect_downcast_ref();
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::copy_query_results(self, set, range, buffer, offset, stride) };
+    }
+
     unsafe fn set_index_buffer<'a>(
         &mut self,
         binding: BufferBinding<'a, dyn DynBuffer>,
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index f93cc82a9a..b3fa3d0965 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -87,6 +87,7 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
 }
 
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
+pub trait DynQuerySet: DynResource + std::fmt::Debug {}
 
 impl<'a> BufferBinding<'a, dyn DynBuffer> {
     pub fn expect_downcast<B: DynBuffer>(self) -> BufferBinding<'a, B> {
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index c4e2af8567..4dfb901551 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -43,6 +43,7 @@ impl crate::Api for Api {
 crate::impl_dyn_resource!(Context, Encoder, Resource);
 
 impl crate::DynBuffer for Resource {}
+impl crate::DynQuerySet for Resource {}
 
 impl crate::Instance for Context {
     type A = Api;
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index bf04c3a63c..8e3ff84b11 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -670,6 +670,8 @@ pub struct QuerySet {
     target: BindTarget,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 #[derive(Debug)]
 pub struct Fence {
     last_completed: crate::FenceValue,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index ba943c5f8b..511da781d5 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -265,7 +265,7 @@ pub mod api {
 mod dynamic;
 
 pub(crate) use dynamic::{impl_dyn_resource, DynResource};
-pub use dynamic::{DynBuffer, DynCommandEncoder};
+pub use dynamic::{DynBuffer, DynCommandEncoder, DynQuerySet};
 
 use std::{
     borrow::{Borrow, Cow},
@@ -409,7 +409,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>;
     type TextureView: fmt::Debug + WasmNotSendSync;
     type Sampler: fmt::Debug + WasmNotSendSync;
-    type QuerySet: fmt::Debug + WasmNotSendSync;
+    type QuerySet: DynQuerySet;
 
     /// A value you can block on to wait for something to finish.
     ///
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index a7937731d2..82e8cd39e5 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -784,6 +784,8 @@ pub struct QuerySet {
     ty: wgt::QueryType,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 unsafe impl Send for QuerySet {}
 unsafe impl Sync for QuerySet {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index f329551771..cb352382f3 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -833,6 +833,8 @@ pub struct QuerySet {
     raw: vk::QueryPool,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 /// The [`Api::Fence`] type for [`vulkan::Api`].
 ///
 /// This is an `enum` because there are two possible implementations of

From 50a181122939bc5997a2a8c6c0bbd6c80c4be6f4 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 15 Jul 2024 23:28:49 +0200
Subject: [PATCH 175/226] Introduce DynPipelineLayout & DynBindGroup

---
 wgpu-hal/src/dx12/mod.rs        |  4 +++
 wgpu-hal/src/dynamic/command.rs | 43 +++++++++++++++++++++++++++++++--
 wgpu-hal/src/dynamic/mod.rs     |  2 ++
 wgpu-hal/src/empty.rs           |  2 ++
 wgpu-hal/src/gles/mod.rs        |  4 +++
 wgpu-hal/src/lib.rs             |  6 ++---
 wgpu-hal/src/metal/mod.rs       |  4 +++
 wgpu-hal/src/vulkan/mod.rs      |  4 +++
 8 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 21fd1e367c..aed7f1e9c6 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -550,6 +550,8 @@ pub struct BindGroup {
     dynamic_buffers: Vec<d3d12::GpuAddress>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 bitflags::bitflags! {
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
     struct TableTypes: u8 {
@@ -594,6 +596,8 @@ pub struct PipelineLayout {
     naga_options: naga::back::hlsl::Options,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 pub struct ShaderModule {
     naga: crate::NagaShader,
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index ef4cffb2b9..05e231f1a0 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,8 +1,8 @@
 use std::ops::Range;
 
 use crate::{
-    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, DynQuerySet, Label,
-    MemoryRange,
+    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, DynBindGroup,
+    DynPipelineLayout, DynQuerySet, Label, MemoryRange,
 };
 
 use super::{DynBuffer, DynResourceExt as _};
@@ -23,6 +23,22 @@ pub trait DynCommandEncoder: std::fmt::Debug {
         regions: &[BufferCopy],
     );
 
+    unsafe fn set_bind_group(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        index: u32,
+        group: &dyn DynBindGroup,
+        dynamic_offsets: &[wgt::DynamicOffset],
+    );
+
+    unsafe fn set_push_constants(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        stages: wgt::ShaderStages,
+        offset_bytes: u32,
+        data: &[u32],
+    );
+
     unsafe fn insert_debug_marker(&mut self, label: &str);
     unsafe fn begin_debug_marker(&mut self, group_label: &str);
     unsafe fn end_debug_marker(&mut self);
@@ -88,6 +104,29 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         }
     }
 
+    unsafe fn set_bind_group(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        index: u32,
+        group: &dyn DynBindGroup,
+        dynamic_offsets: &[wgt::DynamicOffset],
+    ) {
+        let layout = layout.expect_downcast_ref();
+        let group = group.expect_downcast_ref();
+        unsafe { C::set_bind_group(self, layout, index, group, dynamic_offsets) };
+    }
+
+    unsafe fn set_push_constants(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        stages: wgt::ShaderStages,
+        offset_bytes: u32,
+        data: &[u32],
+    ) {
+        let layout = layout.expect_downcast_ref();
+        unsafe { C::set_push_constants(self, layout, stages, offset_bytes, data) };
+    }
+
     unsafe fn insert_debug_marker(&mut self, label: &str) {
         unsafe {
             C::insert_debug_marker(self, label);
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index b3fa3d0965..7275c176be 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -86,7 +86,9 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
     }
 }
 
+pub trait DynBindGroup: DynResource + std::fmt::Debug {}
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
+pub trait DynPipelineLayout: DynResource + std::fmt::Debug {}
 pub trait DynQuerySet: DynResource + std::fmt::Debug {}
 
 impl<'a> BufferBinding<'a, dyn DynBuffer> {
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 4dfb901551..51f751c213 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -42,7 +42,9 @@ impl crate::Api for Api {
 
 crate::impl_dyn_resource!(Context, Encoder, Resource);
 
+impl crate::DynBindGroup for Resource {}
 impl crate::DynBuffer for Resource {}
+impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 
 impl crate::Instance for Context {
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 8e3ff84b11..e79a32097b 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -487,6 +487,8 @@ pub struct PipelineLayout {
     naga_options: naga::back::glsl::Options,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 impl PipelineLayout {
     fn get_slot(&self, br: &naga::ResourceBinding) -> u8 {
         let group_info = &self.group_infos[br.group as usize];
@@ -525,6 +527,8 @@ pub struct BindGroup {
     contents: Box<[RawBinding]>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 type ShaderId = u32;
 
 #[derive(Debug)]
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 511da781d5..29e6e4a35f 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -265,7 +265,7 @@ pub mod api {
 mod dynamic;
 
 pub(crate) use dynamic::{impl_dyn_resource, DynResource};
-pub use dynamic::{DynBuffer, DynCommandEncoder, DynQuerySet};
+pub use dynamic::{DynBindGroup, DynBuffer, DynCommandEncoder, DynPipelineLayout, DynQuerySet};
 
 use std::{
     borrow::{Borrow, Cow},
@@ -431,8 +431,8 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type Fence: fmt::Debug + WasmNotSendSync;
 
     type BindGroupLayout: fmt::Debug + WasmNotSendSync;
-    type BindGroup: fmt::Debug + WasmNotSendSync;
-    type PipelineLayout: fmt::Debug + WasmNotSendSync;
+    type BindGroup: DynBindGroup;
+    type PipelineLayout: DynPipelineLayout;
     type ShaderModule: fmt::Debug + WasmNotSendSync;
     type RenderPipeline: fmt::Debug + WasmNotSendSync;
     type ComputePipeline: fmt::Debug + WasmNotSendSync;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 82e8cd39e5..e25563a43f 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -624,6 +624,8 @@ pub struct PipelineLayout {
     per_stage_map: MultiStageResources,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 trait AsNative {
     type Native;
     fn from(native: &Self::Native) -> Self;
@@ -697,6 +699,8 @@ pub struct BindGroup {
     textures: Vec<TexturePtr>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 unsafe impl Send for BindGroup {}
 unsafe impl Sync for BindGroup {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index cb352382f3..ada874d81a 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -716,11 +716,15 @@ pub struct PipelineLayout {
     binding_arrays: naga::back::spv::BindingMap,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 pub struct BindGroup {
     set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 /// Miscellaneous allocation recycling pool for `CommandAllocator`.
 #[derive(Default)]
 struct Temp {

From fc764b4b1742617b0b90a36ef2df6a6332524edd Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 15 Jul 2024 23:34:17 +0200
Subject: [PATCH 176/226] Introduce DynComputePipeline & DynRenderPipeline

---
 wgpu-hal/src/dx12/mod.rs        |   4 +
 wgpu-hal/src/dynamic/command.rs | 215 +++++++++++++++++++++++++++++++-
 wgpu-hal/src/dynamic/mod.rs     |   2 +
 wgpu-hal/src/empty.rs           |   2 +
 wgpu-hal/src/gles/mod.rs        |   4 +
 wgpu-hal/src/lib.rs             |   9 +-
 wgpu-hal/src/metal/mod.rs       |   4 +
 wgpu-hal/src/vulkan/mod.rs      |   4 +
 8 files changed, 238 insertions(+), 6 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index aed7f1e9c6..c9ca02f977 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -629,6 +629,8 @@ pub struct RenderPipeline {
     vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS],
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 unsafe impl Send for RenderPipeline {}
 unsafe impl Sync for RenderPipeline {}
 
@@ -638,6 +640,8 @@ pub struct ComputePipeline {
     layout: PipelineLayoutShared,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 05e231f1a0..b95aefa066 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,11 +1,13 @@
 use std::ops::Range;
 
 use crate::{
-    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, DynBindGroup,
-    DynPipelineLayout, DynQuerySet, Label, MemoryRange,
+    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, Label, MemoryRange, Rect,
 };
 
-use super::{DynBuffer, DynResourceExt as _};
+use super::{
+    DynBindGroup, DynBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet, DynRenderPipeline,
+    DynResourceExt as _,
+};
 
 pub trait DynCommandEncoder: std::fmt::Debug {
     unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>;
@@ -56,6 +58,8 @@ pub trait DynCommandEncoder: std::fmt::Debug {
         stride: wgt::BufferSize,
     );
 
+    unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline);
+
     unsafe fn set_index_buffer<'a>(
         &mut self,
         binding: BufferBinding<'a, dyn DynBuffer>,
@@ -67,6 +71,75 @@ pub trait DynCommandEncoder: std::fmt::Debug {
         index: u32,
         binding: BufferBinding<'a, dyn DynBuffer>,
     );
+    unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>);
+    unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>);
+    unsafe fn set_stencil_reference(&mut self, value: u32);
+    unsafe fn set_blend_constants(&mut self, color: &[f32; 4]);
+
+    unsafe fn draw(
+        &mut self,
+        first_vertex: u32,
+        vertex_count: u32,
+        first_instance: u32,
+        instance_count: u32,
+    );
+    unsafe fn draw_indexed(
+        &mut self,
+        first_index: u32,
+        index_count: u32,
+        base_vertex: i32,
+        first_instance: u32,
+        instance_count: u32,
+    );
+    unsafe fn draw_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    );
+    unsafe fn draw_indexed_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    );
+    unsafe fn draw_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    );
+    unsafe fn draw_indexed_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    );
+
+    // unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<Self::A>);
+    // unsafe fn end_compute_pass(&mut self);
+
+    unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline);
+
+    unsafe fn dispatch(&mut self, count: [u32; 3]);
+    unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress);
+
+    // unsafe fn build_acceleration_structures<'a, T>(
+    //     &mut self,
+    //     descriptor_count: u32,
+    //     descriptors: T,
+    // ) where
+    //     Self::A: 'a,
+    //     T: IntoIterator<Item = BuildAccelerationStructureDescriptor<'a, Self::A>>;
+
+    // unsafe fn place_acceleration_structure_barrier(
+    //     &mut self,
+    //     barrier: AccelerationStructureBarrier,
+    // );
 }
 
 impl<C: CommandEncoder> DynCommandEncoder for C {
@@ -178,6 +251,142 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         unsafe { C::copy_query_results(self, set, range, buffer, offset, stride) };
     }
 
+    unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>) {
+        unsafe {
+            C::set_viewport(self, rect, depth_range);
+        }
+    }
+
+    unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>) {
+        unsafe {
+            C::set_scissor_rect(self, rect);
+        }
+    }
+
+    unsafe fn set_stencil_reference(&mut self, value: u32) {
+        unsafe {
+            C::set_stencil_reference(self, value);
+        }
+    }
+
+    unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {
+        unsafe { C::set_blend_constants(self, color) };
+    }
+
+    unsafe fn draw(
+        &mut self,
+        first_vertex: u32,
+        vertex_count: u32,
+        first_instance: u32,
+        instance_count: u32,
+    ) {
+        unsafe {
+            C::draw(
+                self,
+                first_vertex,
+                vertex_count,
+                first_instance,
+                instance_count,
+            )
+        };
+    }
+
+    unsafe fn draw_indexed(
+        &mut self,
+        first_index: u32,
+        index_count: u32,
+        base_vertex: i32,
+        first_instance: u32,
+        instance_count: u32,
+    ) {
+        unsafe {
+            C::draw_indexed(
+                self,
+                first_index,
+                index_count,
+                base_vertex,
+                first_instance,
+                instance_count,
+            )
+        };
+    }
+
+    unsafe fn draw_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::draw_indirect(self, buffer, offset, draw_count) };
+    }
+
+    unsafe fn draw_indexed_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::draw_indexed_indirect(self, buffer, offset, draw_count) };
+    }
+
+    unsafe fn draw_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        let count_buffer = count_buffer.expect_downcast_ref();
+        unsafe {
+            C::draw_indirect_count(self, buffer, offset, count_buffer, count_offset, max_count)
+        };
+    }
+
+    unsafe fn draw_indexed_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        let count_buffer = count_buffer.expect_downcast_ref();
+        unsafe {
+            C::draw_indexed_indirect_count(
+                self,
+                buffer,
+                offset,
+                count_buffer,
+                count_offset,
+                max_count,
+            )
+        };
+    }
+
+    unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline) {
+        let pipeline = pipeline.expect_downcast_ref();
+        unsafe { C::set_compute_pipeline(self, pipeline) };
+    }
+
+    unsafe fn dispatch(&mut self, count: [u32; 3]) {
+        unsafe { C::dispatch(self, count) };
+    }
+
+    unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::dispatch_indirect(self, buffer, offset) };
+    }
+
+    unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline) {
+        let pipeline = pipeline.expect_downcast_ref();
+        unsafe { C::set_render_pipeline(self, pipeline) };
+    }
+
     unsafe fn set_index_buffer<'a>(
         &mut self,
         binding: BufferBinding<'a, dyn DynBuffer>,
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 7275c176be..be342c42e9 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -90,6 +90,8 @@ pub trait DynBindGroup: DynResource + std::fmt::Debug {}
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
 pub trait DynPipelineLayout: DynResource + std::fmt::Debug {}
 pub trait DynQuerySet: DynResource + std::fmt::Debug {}
+pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
+pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
 
 impl<'a> BufferBinding<'a, dyn DynBuffer> {
     pub fn expect_downcast<B: DynBuffer>(self) -> BufferBinding<'a, B> {
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 51f751c213..06325e9a96 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -46,6 +46,8 @@ impl crate::DynBindGroup for Resource {}
 impl crate::DynBuffer for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
+impl crate::DynRenderPipeline for Resource {}
+impl crate::DynComputePipeline for Resource {}
 
 impl crate::Instance for Context {
     type A = Api;
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index e79a32097b..940a8e6791 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -653,6 +653,8 @@ pub struct RenderPipeline {
     alpha_to_coverage_enabled: bool,
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[cfg(send_sync)]
 unsafe impl Sync for RenderPipeline {}
 #[cfg(send_sync)]
@@ -663,6 +665,8 @@ pub struct ComputePipeline {
     inner: Arc<PipelineInner>,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[cfg(send_sync)]
 unsafe impl Sync for ComputePipeline {}
 #[cfg(send_sync)]
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 29e6e4a35f..e348d86c22 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -265,7 +265,10 @@ pub mod api {
 mod dynamic;
 
 pub(crate) use dynamic::{impl_dyn_resource, DynResource};
-pub use dynamic::{DynBindGroup, DynBuffer, DynCommandEncoder, DynPipelineLayout, DynQuerySet};
+pub use dynamic::{
+    DynBindGroup, DynBuffer, DynCommandEncoder, DynComputePipeline, DynPipelineLayout, DynQuerySet,
+    DynRenderPipeline,
+};
 
 use std::{
     borrow::{Borrow, Cow},
@@ -434,8 +437,8 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type BindGroup: DynBindGroup;
     type PipelineLayout: DynPipelineLayout;
     type ShaderModule: fmt::Debug + WasmNotSendSync;
-    type RenderPipeline: fmt::Debug + WasmNotSendSync;
-    type ComputePipeline: fmt::Debug + WasmNotSendSync;
+    type RenderPipeline: DynRenderPipeline;
+    type ComputePipeline: DynComputePipeline;
     type PipelineCache: fmt::Debug + WasmNotSendSync;
 
     type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index e25563a43f..be4b7c23bd 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -767,6 +767,8 @@ pub struct RenderPipeline {
 unsafe impl Send for RenderPipeline {}
 unsafe impl Sync for RenderPipeline {}
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[derive(Debug)]
 pub struct ComputePipeline {
     raw: metal::ComputePipelineState,
@@ -780,6 +782,8 @@ pub struct ComputePipeline {
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[derive(Debug, Clone)]
 pub struct QuerySet {
     raw_buffer: metal::Buffer,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index ada874d81a..b8b6690574 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -822,11 +822,15 @@ pub struct RenderPipeline {
     raw: vk::Pipeline,
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[derive(Debug)]
 pub struct ComputePipeline {
     raw: vk::Pipeline,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[derive(Debug)]
 pub struct PipelineCache {
     raw: vk::PipelineCache,

From bea9a10f9092b87c2b3efb3161200d60f07d35a7 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 15 Jul 2024 23:42:39 +0200
Subject: [PATCH 177/226] fold ComputePassTimestampWrites &
 RenderPassTimestampWrites and make PassTimestampWrites usable with
 DynQuerySet

---
 wgpu-core/src/command/compute.rs |  2 +-
 wgpu-core/src/command/render.rs  |  2 +-
 wgpu-hal/src/lib.rs              | 39 ++++----------------------------
 3 files changed, 7 insertions(+), 36 deletions(-)

diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 39fe1d91d1..c31db544d1 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -511,7 +511,7 @@ impl Global {
                 }
             }
 
-            Some(hal::ComputePassTimestampWrites {
+            Some(hal::PassTimestampWrites {
                 query_set: query_set.raw(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 86a9eef26f..a943a902af 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1192,7 +1192,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                 pending_query_resets.use_query_set(query_set, index);
             }
 
-            Some(hal::RenderPassTimestampWrites {
+            Some(hal::PassTimestampWrites {
                 query_set: query_set.raw(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index e348d86c22..07ce7c7e83 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -2074,24 +2074,13 @@ pub struct DepthStencilAttachment<'a, A: Api> {
     pub clear_value: (f32, u32),
 }
 
-#[derive(Debug)]
-pub struct RenderPassTimestampWrites<'a, A: Api> {
-    pub query_set: &'a A::QuerySet,
+#[derive(Clone, Debug)]
+pub struct PassTimestampWrites<'a, Q: DynQuerySet + ?Sized> {
+    pub query_set: &'a Q,
     pub beginning_of_pass_write_index: Option<u32>,
     pub end_of_pass_write_index: Option<u32>,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for RenderPassTimestampWrites<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            query_set: self.query_set,
-            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
-            end_of_pass_write_index: self.end_of_pass_write_index,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
 pub struct RenderPassDescriptor<'a, A: Api> {
     pub label: Label<'a>,
@@ -2100,32 +2089,14 @@ pub struct RenderPassDescriptor<'a, A: Api> {
     pub color_attachments: &'a [Option<ColorAttachment<'a, A>>],
     pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>,
     pub multiview: Option<NonZeroU32>,
-    pub timestamp_writes: Option<RenderPassTimestampWrites<'a, A>>,
+    pub timestamp_writes: Option<PassTimestampWrites<'a, A::QuerySet>>,
     pub occlusion_query_set: Option<&'a A::QuerySet>,
 }
 
-#[derive(Debug)]
-pub struct ComputePassTimestampWrites<'a, A: Api> {
-    pub query_set: &'a A::QuerySet,
-    pub beginning_of_pass_write_index: Option<u32>,
-    pub end_of_pass_write_index: Option<u32>,
-}
-
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ComputePassTimestampWrites<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            query_set: self.query_set,
-            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
-            end_of_pass_write_index: self.end_of_pass_write_index,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
 pub struct ComputePassDescriptor<'a, A: Api> {
     pub label: Label<'a>,
-    pub timestamp_writes: Option<ComputePassTimestampWrites<'a, A>>,
+    pub timestamp_writes: Option<PassTimestampWrites<'a, A::QuerySet>>,
 }
 
 /// Stores the text of any validation errors that have occurred since

From bdf6710d588754b38ee976023acfac1e9cb022a8 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Wed, 17 Jul 2024 09:39:29 +0200
Subject: [PATCH 178/226] introduce DynTexture & DynTextureView

---
 wgpu-hal/src/dx12/mod.rs    | 4 ++++
 wgpu-hal/src/dynamic/mod.rs | 4 +++-
 wgpu-hal/src/empty.rs       | 4 +++-
 wgpu-hal/src/gles/mod.rs    | 4 ++++
 wgpu-hal/src/lib.rs         | 6 +++---
 wgpu-hal/src/metal/mod.rs   | 4 ++++
 wgpu-hal/src/vulkan/mod.rs  | 4 ++++
 7 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index c9ca02f977..3dbb58abb0 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -452,6 +452,8 @@ pub struct Texture {
     allocation: Option<suballocation::AllocationWrapper>,
 }
 
+impl crate::DynTexture for Texture {}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
@@ -491,6 +493,8 @@ pub struct TextureView {
     handle_dsv_rw: Option<descriptor::Handle>,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 unsafe impl Send for TextureView {}
 unsafe impl Sync for TextureView {}
 
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index be342c42e9..10cea187c5 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -88,10 +88,12 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
 
 pub trait DynBindGroup: DynResource + std::fmt::Debug {}
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
+pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
 pub trait DynPipelineLayout: DynResource + std::fmt::Debug {}
 pub trait DynQuerySet: DynResource + std::fmt::Debug {}
 pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
-pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
+pub trait DynTexture: DynResource + std::fmt::Debug {}
+pub trait DynTextureView: DynResource + std::fmt::Debug {}
 
 impl<'a> BufferBinding<'a, dyn DynBuffer> {
     pub fn expect_downcast<B: DynBuffer>(self) -> BufferBinding<'a, B> {
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 06325e9a96..f63df91e9d 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -44,10 +44,12 @@ crate::impl_dyn_resource!(Context, Encoder, Resource);
 
 impl crate::DynBindGroup for Resource {}
 impl crate::DynBuffer for Resource {}
+impl crate::DynComputePipeline for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 impl crate::DynRenderPipeline for Resource {}
-impl crate::DynComputePipeline for Resource {}
+impl crate::DynTexture for Resource {}
+impl crate::DynTextureView for Resource {}
 
 impl crate::Instance for Context {
     type A = Api;
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 940a8e6791..5709735202 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -373,6 +373,8 @@ pub struct Texture {
     pub copy_size: CopyExtent,
 }
 
+impl crate::DynTexture for Texture {}
+
 impl Texture {
     pub fn default_framebuffer(format: wgt::TextureFormat) -> Self {
         Self {
@@ -460,6 +462,8 @@ pub struct TextureView {
     format: wgt::TextureFormat,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 #[derive(Debug)]
 pub struct Sampler {
     raw: glow::Sampler,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 07ce7c7e83..ab119fc357 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -267,7 +267,7 @@ mod dynamic;
 pub(crate) use dynamic::{impl_dyn_resource, DynResource};
 pub use dynamic::{
     DynBindGroup, DynBuffer, DynCommandEncoder, DynComputePipeline, DynPipelineLayout, DynQuerySet,
-    DynRenderPipeline,
+    DynRenderPipeline, DynTexture, DynTextureView,
 };
 
 use std::{
@@ -408,9 +408,9 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type CommandBuffer: WasmNotSendSync + fmt::Debug;
 
     type Buffer: DynBuffer;
-    type Texture: fmt::Debug + WasmNotSendSync + 'static;
+    type Texture: DynTexture;
     type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>;
-    type TextureView: fmt::Debug + WasmNotSendSync;
+    type TextureView: DynTextureView;
     type Sampler: fmt::Debug + WasmNotSendSync;
     type QuerySet: DynQuerySet;
 
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index be4b7c23bd..a7282ca4cd 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -505,6 +505,8 @@ pub struct Texture {
     copy_size: crate::CopyExtent,
 }
 
+impl crate::DynTexture for Texture {}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
@@ -514,6 +516,8 @@ pub struct TextureView {
     aspects: crate::FormatAspects,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 unsafe impl Send for TextureView {}
 unsafe impl Sync for TextureView {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index b8b6690574..2d4a971739 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -671,6 +671,8 @@ pub struct Texture {
     view_formats: Vec<wgt::TextureFormat>,
 }
 
+impl crate::DynTexture for Texture {}
+
 impl Texture {
     /// # Safety
     ///
@@ -687,6 +689,8 @@ pub struct TextureView {
     attachment: FramebufferAttachment,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 impl TextureView {
     /// # Safety
     ///

From a47a0cb3d9de5f91439751b6a7ce5273d4265436 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Wed, 17 Jul 2024 09:51:53 +0200
Subject: [PATCH 179/226] render/compute pass descriptors work now with dyn
 types

---
 wgpu-core/src/command/render.rs |  2 +-
 wgpu-hal/src/dx12/command.rs    |  7 ++--
 wgpu-hal/src/empty.rs           |  5 +--
 wgpu-hal/src/gles/command.rs    |  7 ++--
 wgpu-hal/src/lib.rs             | 64 +++++++++++++--------------------
 wgpu-hal/src/metal/command.rs   |  7 ++--
 wgpu-hal/src/vulkan/command.rs  | 10 ++++--
 wgpu-hal/src/vulkan/conv.rs     |  4 +--
 8 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index a943a902af..73ce837ba9 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1041,7 +1041,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         }
 
         let mut color_attachments_hal =
-            ArrayVec::<Option<hal::ColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>::new();
+            ArrayVec::<Option<hal::ColorAttachment<_>>, { hal::MAX_COLOR_ATTACHMENTS }>::new();
         for (index, attachment) in color_attachments.iter().enumerate() {
             let at = if let Some(attachment) = attachment.as_ref() {
                 attachment
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index c5978e55e4..bb50720f9a 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -661,7 +661,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         unsafe { self.begin_pass(super::PassKind::Render, desc.label) };
 
         // Start timestamp if any (before all other commands but after debug marker)
@@ -1130,7 +1133,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn begin_compute_pass<'a>(
         &mut self,
-        desc: &crate::ComputePassDescriptor<'a, super::Api>,
+        desc: &crate::ComputePassDescriptor<'a, super::QuerySet>,
     ) {
         unsafe { self.begin_pass(super::PassKind::Compute, desc.label) };
 
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index f63df91e9d..7a9cc9e714 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -370,7 +370,8 @@ impl crate::CommandEncoder for Encoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Api>) {}
+    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Resource, Resource>) {
+    }
     unsafe fn end_render_pass(&mut self) {}
 
     unsafe fn set_bind_group(
@@ -465,7 +466,7 @@ impl crate::CommandEncoder for Encoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Api>) {}
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Resource>) {}
     unsafe fn end_compute_pass(&mut self) {}
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {}
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index e7e3714038..eb452e598b 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -494,7 +494,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         debug_assert!(self.state.end_of_pass_timestamp.is_none());
         if let Some(ref t) = desc.timestamp_writes {
             if let Some(index) = t.beginning_of_pass_write_index {
@@ -1137,7 +1140,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::QuerySet>) {
         debug_assert!(self.state.end_of_pass_timestamp.is_none());
         if let Some(ref t) = desc.timestamp_writes {
             if let Some(index) = t.beginning_of_pass_write_index {
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index ab119fc357..d14d33771a 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1231,7 +1231,10 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     // render passes
 
     // Begins a render pass, clears all active bindings.
-    unsafe fn begin_render_pass(&mut self, desc: &RenderPassDescriptor<Self::A>);
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<<Self::A as Api>::QuerySet, <Self::A as Api>::TextureView>,
+    );
     unsafe fn end_render_pass(&mut self);
 
     unsafe fn set_render_pipeline(&mut self, pipeline: &<Self::A as Api>::RenderPipeline);
@@ -1298,7 +1301,10 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     // compute passes
 
     // Begins a compute pass, clears all active bindings.
-    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<Self::A>);
+    unsafe fn begin_compute_pass(
+        &mut self,
+        desc: &ComputePassDescriptor<<Self::A as Api>::QuerySet>,
+    );
     unsafe fn end_compute_pass(&mut self);
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &<Self::A as Api>::ComputePipeline);
@@ -2028,47 +2034,25 @@ pub struct BufferTextureCopy {
     pub size: CopyExtent,
 }
 
-#[derive(Debug)]
-pub struct Attachment<'a, A: Api> {
-    pub view: &'a A::TextureView,
+#[derive(Clone, Debug)]
+pub struct Attachment<'a, T: DynTextureView + ?Sized> {
+    pub view: &'a T,
     /// Contains either a single mutating usage as a target,
     /// or a valid combination of read-only usages.
     pub usage: TextureUses,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for Attachment<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            view: self.view,
-            usage: self.usage,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct ColorAttachment<'a, A: Api> {
-    pub target: Attachment<'a, A>,
-    pub resolve_target: Option<Attachment<'a, A>>,
+#[derive(Clone, Debug)]
+pub struct ColorAttachment<'a, T: DynTextureView + ?Sized> {
+    pub target: Attachment<'a, T>,
+    pub resolve_target: Option<Attachment<'a, T>>,
     pub ops: AttachmentOps,
     pub clear_value: wgt::Color,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ColorAttachment<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            target: self.target.clone(),
-            resolve_target: self.resolve_target.clone(),
-            ops: self.ops,
-            clear_value: self.clear_value,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
-pub struct DepthStencilAttachment<'a, A: Api> {
-    pub target: Attachment<'a, A>,
+pub struct DepthStencilAttachment<'a, T: DynTextureView + ?Sized> {
+    pub target: Attachment<'a, T>,
     pub depth_ops: AttachmentOps,
     pub stencil_ops: AttachmentOps,
     pub clear_value: (f32, u32),
@@ -2082,21 +2066,21 @@ pub struct PassTimestampWrites<'a, Q: DynQuerySet + ?Sized> {
 }
 
 #[derive(Clone, Debug)]
-pub struct RenderPassDescriptor<'a, A: Api> {
+pub struct RenderPassDescriptor<'a, Q: DynQuerySet + ?Sized, T: DynTextureView + ?Sized> {
     pub label: Label<'a>,
     pub extent: wgt::Extent3d,
     pub sample_count: u32,
-    pub color_attachments: &'a [Option<ColorAttachment<'a, A>>],
-    pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>,
+    pub color_attachments: &'a [Option<ColorAttachment<'a, T>>],
+    pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, T>>,
     pub multiview: Option<NonZeroU32>,
-    pub timestamp_writes: Option<PassTimestampWrites<'a, A::QuerySet>>,
-    pub occlusion_query_set: Option<&'a A::QuerySet>,
+    pub timestamp_writes: Option<PassTimestampWrites<'a, Q>>,
+    pub occlusion_query_set: Option<&'a Q>,
 }
 
 #[derive(Clone, Debug)]
-pub struct ComputePassDescriptor<'a, A: Api> {
+pub struct ComputePassDescriptor<'a, Q: DynQuerySet + ?Sized> {
     pub label: Label<'a>,
-    pub timestamp_writes: Option<PassTimestampWrites<'a, A::QuerySet>>,
+    pub timestamp_writes: Option<PassTimestampWrites<'a, Q>>,
 }
 
 /// Stores the text of any validation errors that have occurred since
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index db19727a5f..204f5328c5 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -501,7 +501,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         self.begin_pass();
         self.state.index = None;
 
@@ -1128,7 +1131,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::QuerySet>) {
         self.begin_pass();
 
         debug_assert!(self.state.blit.is_none());
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index ddce6c24ed..79cafac7c0 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -644,7 +644,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         let mut vk_clear_values =
             ArrayVec::<vk::ClearValue, { super::MAX_TOTAL_ATTACHMENTS }>::new();
         let mut vk_image_views = ArrayVec::<vk::ImageView, { super::MAX_TOTAL_ATTACHMENTS }>::new();
@@ -1067,7 +1070,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<'_, super::Api>) {
+    unsafe fn begin_compute_pass(
+        &mut self,
+        desc: &crate::ComputePassDescriptor<'_, super::QuerySet>,
+    ) {
         self.bind_point = vk::PipelineBindPoint::COMPUTE;
         if let Some(label) = desc.label {
             unsafe { self.begin_debug_marker(label) };
diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs
index 87757c42d5..38642ba082 100644
--- a/wgpu-hal/src/vulkan/conv.rs
+++ b/wgpu-hal/src/vulkan/conv.rs
@@ -178,7 +178,7 @@ pub fn map_vk_surface_formats(sf: vk::SurfaceFormatKHR) -> Option<wgt::TextureFo
     })
 }
 
-impl crate::Attachment<'_, super::Api> {
+impl crate::Attachment<'_, super::TextureView> {
     pub(super) fn make_attachment_key(
         &self,
         ops: crate::AttachmentOps,
@@ -192,7 +192,7 @@ impl crate::Attachment<'_, super::Api> {
     }
 }
 
-impl crate::ColorAttachment<'_, super::Api> {
+impl crate::ColorAttachment<'_, super::TextureView> {
     pub(super) unsafe fn make_vk_clear_color(&self) -> vk::ClearColorValue {
         let cv = &self.clear_value;
         match self

From eeaf27749c437d6c0bc9eac5024f2662ee032f0d Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 10:13:17 +0200
Subject: [PATCH 180/226] implement begin/end render/computepass for
 dyncommandencoder

---
 wgpu-hal/src/dynamic/command.rs | 114 ++++++++++++++++++++++++++++++--
 1 file changed, 110 insertions(+), 4 deletions(-)

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index b95aefa066..6b5b54fd2b 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,12 +1,14 @@
 use std::ops::Range;
 
 use crate::{
-    BufferBarrier, BufferBinding, BufferCopy, CommandEncoder, DeviceError, Label, MemoryRange, Rect,
+    Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, ColorAttachment, CommandEncoder,
+    ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
+    PassTimestampWrites, Rect, RenderPassDescriptor,
 };
 
 use super::{
     DynBindGroup, DynBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet, DynRenderPipeline,
-    DynResourceExt as _,
+    DynResourceExt as _, DynTextureView,
 };
 
 pub trait DynCommandEncoder: std::fmt::Debug {
@@ -58,6 +60,12 @@ pub trait DynCommandEncoder: std::fmt::Debug {
         stride: wgt::BufferSize,
     );
 
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<dyn DynQuerySet, dyn DynTextureView>,
+    );
+    unsafe fn end_render_pass(&mut self);
+
     unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline);
 
     unsafe fn set_index_buffer<'a>(
@@ -120,8 +128,8 @@ pub trait DynCommandEncoder: std::fmt::Debug {
         max_count: u32,
     );
 
-    // unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<Self::A>);
-    // unsafe fn end_compute_pass(&mut self);
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<dyn DynQuerySet>);
+    unsafe fn end_compute_pass(&mut self);
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline);
 
@@ -251,6 +259,48 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         unsafe { C::copy_query_results(self, set, range, buffer, offset, stride) };
     }
 
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<dyn DynQuerySet, dyn DynTextureView>,
+    ) {
+        let color_attachments = desc
+            .color_attachments
+            .iter()
+            .map(|attachment| {
+                attachment
+                    .as_ref()
+                    .map(|attachment| attachment.expect_downcast())
+            })
+            .collect::<Vec<_>>();
+
+        let desc: RenderPassDescriptor<<C::A as Api>::QuerySet, <C::A as Api>::TextureView> =
+            RenderPassDescriptor {
+                label: desc.label,
+                extent: desc.extent,
+                sample_count: desc.sample_count,
+                color_attachments: &color_attachments,
+                depth_stencil_attachment: desc
+                    .depth_stencil_attachment
+                    .as_ref()
+                    .map(|ds| ds.expect_downcast()),
+                multiview: desc.multiview,
+                timestamp_writes: desc
+                    .timestamp_writes
+                    .as_ref()
+                    .map(|writes| writes.expect_downcast()),
+                occlusion_query_set: desc
+                    .occlusion_query_set
+                    .map(|set| set.expect_downcast_ref()),
+            };
+        unsafe { C::begin_render_pass(self, &desc) };
+    }
+
+    unsafe fn end_render_pass(&mut self) {
+        unsafe {
+            C::end_render_pass(self);
+        }
+    }
+
     unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>) {
         unsafe {
             C::set_viewport(self, rect, depth_range);
@@ -368,6 +418,21 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         };
     }
 
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<dyn DynQuerySet>) {
+        let desc = ComputePassDescriptor {
+            label: desc.label,
+            timestamp_writes: desc
+                .timestamp_writes
+                .as_ref()
+                .map(|writes| writes.expect_downcast()),
+        };
+        unsafe { C::begin_compute_pass(self, &desc) };
+    }
+
+    unsafe fn end_compute_pass(&mut self) {
+        unsafe { C::end_compute_pass(self) };
+    }
+
     unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline) {
         let pipeline = pipeline.expect_downcast_ref();
         unsafe { C::set_compute_pipeline(self, pipeline) };
@@ -405,3 +470,44 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         unsafe { self.set_vertex_buffer(index, binding) };
     }
 }
+
+impl<'a> PassTimestampWrites<'a, dyn DynQuerySet> {
+    pub fn expect_downcast<B: DynQuerySet>(&self) -> PassTimestampWrites<'a, B> {
+        PassTimestampWrites {
+            query_set: self.query_set.expect_downcast_ref(),
+            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
+            end_of_pass_write_index: self.end_of_pass_write_index,
+        }
+    }
+}
+
+impl<'a> Attachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> Attachment<'a, B> {
+        Attachment {
+            view: self.view.expect_downcast_ref(),
+            usage: self.usage,
+        }
+    }
+}
+
+impl<'a> ColorAttachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> ColorAttachment<'a, B> {
+        ColorAttachment {
+            target: self.target.expect_downcast(),
+            resolve_target: self.resolve_target.as_ref().map(|rt| rt.expect_downcast()),
+            ops: self.ops,
+            clear_value: self.clear_value,
+        }
+    }
+}
+
+impl<'a> DepthStencilAttachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> DepthStencilAttachment<'a, B> {
+        DepthStencilAttachment {
+            target: self.target.expect_downcast(),
+            depth_ops: self.depth_ops,
+            stencil_ops: self.stencil_ops,
+            clear_value: self.clear_value,
+        }
+    }
+}

From 39b408218f83bcf2ac076e28e704621f780638b4 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 10:46:13 +0200
Subject: [PATCH 181/226] implement transition_textures for DynCommandEncoder

---
 wgpu-core/src/track/mod.rs      |  5 ++++-
 wgpu-core/src/track/texture.rs  |  4 ++--
 wgpu-hal/src/dx12/command.rs    |  2 +-
 wgpu-hal/src/dynamic/command.rs | 14 ++++++++++++--
 wgpu-hal/src/empty.rs           |  2 +-
 wgpu-hal/src/gles/command.rs    |  2 +-
 wgpu-hal/src/lib.rs             |  6 +++---
 wgpu-hal/src/metal/command.rs   |  2 +-
 wgpu-hal/src/vulkan/command.rs  |  2 +-
 9 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 5f7a868251..4fccb24abe 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -272,7 +272,10 @@ impl PendingTransition<hal::BufferUses> {
 
 impl PendingTransition<hal::TextureUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, A: HalApi>(self, texture: &'a A::Texture) -> hal::TextureBarrier<'a, A> {
+    pub fn into_hal<'a, T: hal::DynTexture + ?Sized>(
+        self,
+        texture: &'a T,
+    ) -> hal::TextureBarrier<'a, T> {
         // These showing up in a barrier is always a bug
         strict_assert_ne!(self.usage.start, hal::TextureUses::UNKNOWN);
         strict_assert_ne!(self.usage.end, hal::TextureUses::UNKNOWN);
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index 243bd25207..f454c3e225 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -754,7 +754,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
         &'a mut self,
         tracker: &'a TextureTracker<A>,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = TextureBarrier<'a, A>> {
+    ) -> impl Iterator<Item = TextureBarrier<'a, A::Texture>> {
         for index in tracker.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
@@ -798,7 +798,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
         &'a mut self,
         scope: &'a TextureUsageScope<A>,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = TextureBarrier<'a, A>> {
+    ) -> impl Iterator<Item = TextureBarrier<'a, A::Texture>> {
         for index in scope.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index bb50720f9a..cf2147d0ca 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -359,7 +359,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         self.temp.barriers.clear();
 
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 6b5b54fd2b..7f670b3fc7 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -3,12 +3,12 @@ use std::ops::Range;
 use crate::{
     Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, ColorAttachment, CommandEncoder,
     ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
-    PassTimestampWrites, Rect, RenderPassDescriptor,
+    PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier,
 };
 
 use super::{
     DynBindGroup, DynBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet, DynRenderPipeline,
-    DynResourceExt as _, DynTextureView,
+    DynResourceExt as _, DynTexture, DynTextureView,
 };
 
 pub trait DynCommandEncoder: std::fmt::Debug {
@@ -17,6 +17,7 @@ pub trait DynCommandEncoder: std::fmt::Debug {
     unsafe fn discard_encoding(&mut self);
 
     unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]);
+    unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]);
 
     unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange);
 
@@ -167,6 +168,15 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         unsafe { self.transition_buffers(barriers) };
     }
 
+    unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]) {
+        let barriers = barriers.iter().map(|barrier| TextureBarrier {
+            texture: barrier.texture.expect_downcast_ref(),
+            usage: barrier.usage.clone(),
+            range: barrier.range,
+        });
+        unsafe { self.transition_textures(barriers) };
+    }
+
     unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange) {
         let buffer = buffer.expect_downcast_ref();
         unsafe { C::clear_buffer(self, buffer, range) };
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 7a9cc9e714..d39d41adca 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -314,7 +314,7 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, Resource>>,
     {
     }
 
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index eb452e598b..cd62726050 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -298,7 +298,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         if !self
             .private_caps
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index d14d33771a..60829f5e43 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1110,7 +1110,7 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = TextureBarrier<'a, Self::A>>;
+        T: Iterator<Item = TextureBarrier<'a, <Self::A as Api>::Texture>>;
 
     // copy operations
 
@@ -1990,8 +1990,8 @@ pub struct BufferBarrier<'a, B: DynBuffer + ?Sized> {
 }
 
 #[derive(Debug, Clone)]
-pub struct TextureBarrier<'a, A: Api> {
-    pub texture: &'a A::Texture,
+pub struct TextureBarrier<'a, T: DynTexture + ?Sized> {
+    pub texture: &'a T,
     pub range: wgt::ImageSubresourceRange,
     pub usage: Range<TextureUses>,
 }
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index 204f5328c5..22a72739d6 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -247,7 +247,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, _barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
     }
 
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 79cafac7c0..b7f4306f69 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -156,7 +156,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         let mut src_stages = vk::PipelineStageFlags::empty();
         let mut dst_stages = vk::PipelineStageFlags::empty();

From c02ee6270b7d3b49fb5336daaadcfa2c787ab6e1 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 10:55:03 +0200
Subject: [PATCH 182/226] buffer / texture copy operations for
 DynCommandEncoder

---
 wgpu-hal/src/dynamic/command.rs | 70 +++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 3 deletions(-)

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 7f670b3fc7..4100f33ac9 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,9 +1,9 @@
 use std::ops::Range;
 
 use crate::{
-    Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, ColorAttachment, CommandEncoder,
-    ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
-    PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier,
+    Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, BufferTextureCopy, ColorAttachment,
+    CommandEncoder, ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
+    PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, TextureUses,
 };
 
 use super::{
@@ -28,6 +28,29 @@ pub trait DynCommandEncoder: std::fmt::Debug {
         regions: &[BufferCopy],
     );
 
+    unsafe fn copy_texture_to_texture(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynTexture,
+        regions: &[TextureCopy],
+    );
+
+    unsafe fn copy_buffer_to_texture(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynTexture,
+        regions: &[BufferTextureCopy],
+    );
+
+    unsafe fn copy_texture_to_buffer(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynBuffer,
+        regions: &[BufferTextureCopy],
+    );
+
     unsafe fn set_bind_group(
         &mut self,
         layout: &dyn DynPipelineLayout,
@@ -195,6 +218,47 @@ impl<C: CommandEncoder> DynCommandEncoder for C {
         }
     }
 
+    unsafe fn copy_texture_to_texture(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynTexture,
+        regions: &[TextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_texture_to_texture(self, src, src_usage, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn copy_buffer_to_texture(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynTexture,
+        regions: &[BufferTextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_buffer_to_texture(self, src, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn copy_texture_to_buffer(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynBuffer,
+        regions: &[BufferTextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_texture_to_buffer(self, src, src_usage, dst, regions.iter().cloned());
+        }
+    }
+
     unsafe fn set_bind_group(
         &mut self,
         layout: &dyn DynPipelineLayout,

From b7e11c76df46eda6d3f44c020d5c133a08ece182 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 11:45:22 +0200
Subject: [PATCH 183/226] texture & buffer transitioning always uses
 DynCommandEncoder now collateral: ComputePass uses DynCommandEncoder during
 recording

---
 wgpu-hal/src/dynamic/command.rs | 6 +++---
 wgpu-hal/src/lib.rs             | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 4100f33ac9..ae41b1bb7b 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -8,10 +8,10 @@ use crate::{
 
 use super::{
     DynBindGroup, DynBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet, DynRenderPipeline,
-    DynResourceExt as _, DynTexture, DynTextureView,
+    DynResource, DynResourceExt as _, DynTexture, DynTextureView,
 };
 
-pub trait DynCommandEncoder: std::fmt::Debug {
+pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
     unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>;
 
     unsafe fn discard_encoding(&mut self);
@@ -174,7 +174,7 @@ pub trait DynCommandEncoder: std::fmt::Debug {
     // );
 }
 
-impl<C: CommandEncoder> DynCommandEncoder for C {
+impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
     unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError> {
         unsafe { C::begin_encoding(self, label) }
     }
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 60829f5e43..ca6313342c 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -264,10 +264,10 @@ pub mod api {
 
 mod dynamic;
 
-pub(crate) use dynamic::{impl_dyn_resource, DynResource};
+pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
     DynBindGroup, DynBuffer, DynCommandEncoder, DynComputePipeline, DynPipelineLayout, DynQuerySet,
-    DynRenderPipeline, DynTexture, DynTextureView,
+    DynRenderPipeline, DynResource, DynTexture, DynTextureView,
 };
 
 use std::{

From add54f19d885a7eb422e13d8003f799b0d4cace4 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 20:57:42 +0200
Subject: [PATCH 184/226] Introduce DynDevice

---
 wgpu-hal/src/dynamic/device.rs | 13 +++++++++++++
 wgpu-hal/src/dynamic/mod.rs    |  2 ++
 wgpu-hal/src/lib.rs            |  4 ++--
 3 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 wgpu-hal/src/dynamic/device.rs

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
new file mode 100644
index 0000000000..30e245867c
--- /dev/null
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -0,0 +1,13 @@
+use crate::{Device, DynBuffer};
+
+use super::DynResourceExt;
+
+pub trait DynDevice {
+    unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>);
+}
+
+impl<D: Device> DynDevice for D {
+    unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>) {
+        unsafe { D::destroy_buffer(self, buffer.unbox()) };
+    }
+}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 10cea187c5..405f5c5e89 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -1,6 +1,8 @@
 mod command;
+mod device;
 
 pub use self::command::DynCommandEncoder;
+pub use self::device::DynDevice;
 
 use std::any::Any;
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index ca6313342c..2b628b2bc1 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -266,8 +266,8 @@ mod dynamic;
 
 pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
-    DynBindGroup, DynBuffer, DynCommandEncoder, DynComputePipeline, DynPipelineLayout, DynQuerySet,
-    DynRenderPipeline, DynResource, DynTexture, DynTextureView,
+    DynBindGroup, DynBuffer, DynCommandEncoder, DynComputePipeline, DynDevice, DynPipelineLayout,
+    DynQuerySet, DynRenderPipeline, DynResource, DynTexture, DynTextureView,
 };
 
 use std::{

From b4c674197041d2510961dd277a4b3d5388de2d70 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 21:12:42 +0200
Subject: [PATCH 185/226] add most remaining dyn type traits

---
 wgpu-hal/src/dynamic/mod.rs | 6 ++++++
 wgpu-hal/src/lib.rs         | 5 +++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 405f5c5e89..cb114af9f1 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -88,12 +88,18 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
     }
 }
 
+pub trait DynAccelerationStructure: DynResource + std::fmt::Debug {}
 pub trait DynBindGroup: DynResource + std::fmt::Debug {}
+pub trait DynBindGroupLayout: DynResource + std::fmt::Debug {}
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
 pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
+pub trait DynFence: DynResource + std::fmt::Debug {}
+pub trait DynPipelineCache: DynResource + std::fmt::Debug {}
 pub trait DynPipelineLayout: DynResource + std::fmt::Debug {}
 pub trait DynQuerySet: DynResource + std::fmt::Debug {}
 pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
+pub trait DynSampler: DynResource + std::fmt::Debug {}
+pub trait DynShaderModule: DynResource + std::fmt::Debug {}
 pub trait DynTexture: DynResource + std::fmt::Debug {}
 pub trait DynTextureView: DynResource + std::fmt::Debug {}
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 2b628b2bc1..67e24a70e9 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -266,8 +266,9 @@ mod dynamic;
 
 pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
-    DynBindGroup, DynBuffer, DynCommandEncoder, DynComputePipeline, DynDevice, DynPipelineLayout,
-    DynQuerySet, DynRenderPipeline, DynResource, DynTexture, DynTextureView,
+    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandEncoder,
+    DynComputePipeline, DynDevice, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet,
+    DynRenderPipeline, DynResource, DynSampler, DynShaderModule, DynTexture, DynTextureView,
 };
 
 use std::{

From a9cb7fc855f23971d0e3dce09596f139040fd6ae Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 20 Jul 2024 21:14:48 +0200
Subject: [PATCH 186/226] impl DynShaderModule for all backends

---
 wgpu-hal/src/dx12/mod.rs   | 2 ++
 wgpu-hal/src/empty.rs      | 1 +
 wgpu-hal/src/gles/mod.rs   | 2 ++
 wgpu-hal/src/lib.rs        | 2 +-
 wgpu-hal/src/metal/mod.rs  | 2 ++
 wgpu-hal/src/vulkan/mod.rs | 2 ++
 6 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 3dbb58abb0..8f2e57d6e7 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -608,6 +608,8 @@ pub struct ShaderModule {
     raw_name: Option<ffi::CString>,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 pub(super) enum CompiledShader {
     #[allow(unused)]
     Dxc(Vec<u8>),
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index d39d41adca..068874948c 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -48,6 +48,7 @@ impl crate::DynComputePipeline for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 impl crate::DynRenderPipeline for Resource {}
+impl crate::DynShaderModule for Resource {}
 impl crate::DynTexture for Resource {}
 impl crate::DynTextureView for Resource {}
 
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 5709735202..433ff48f08 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -542,6 +542,8 @@ pub struct ShaderModule {
     id: ShaderId,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Clone, Debug, Default)]
 struct VertexFormatDesc {
     element_count: i32,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 67e24a70e9..1716357c2b 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -437,7 +437,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type BindGroupLayout: fmt::Debug + WasmNotSendSync;
     type BindGroup: DynBindGroup;
     type PipelineLayout: DynPipelineLayout;
-    type ShaderModule: fmt::Debug + WasmNotSendSync;
+    type ShaderModule: DynShaderModule;
     type RenderPipeline: DynRenderPipeline;
     type ComputePipeline: DynComputePipeline;
     type PipelineCache: fmt::Debug + WasmNotSendSync;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index a7282ca4cd..d8d9b3fcb6 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -714,6 +714,8 @@ pub struct ShaderModule {
     runtime_checks: bool,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Debug, Default)]
 struct PipelineStageInfo {
     push_constants: Option<PushConstantsInfo>,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 2d4a971739..8d007b9f70 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -821,6 +821,8 @@ pub enum ShaderModule {
     },
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Debug)]
 pub struct RenderPipeline {
     raw: vk::Pipeline,

From df5cc1293577c86fb37c3e44852d4e0690b233da Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 11:18:31 +0200
Subject: [PATCH 187/226] impl DynCommandBuffer

---
 wgpu-hal/src/dx12/mod.rs    | 2 ++
 wgpu-hal/src/dynamic/mod.rs | 1 +
 wgpu-hal/src/empty.rs       | 1 +
 wgpu-hal/src/gles/mod.rs    | 2 ++
 wgpu-hal/src/lib.rs         | 9 +++++----
 wgpu-hal/src/metal/mod.rs   | 2 ++
 wgpu-hal/src/vulkan/mod.rs  | 2 ++
 7 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 8f2e57d6e7..4519e5bc3e 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -413,6 +413,8 @@ pub struct CommandBuffer {
     raw: d3d12::GraphicsCommandList,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index cb114af9f1..0262d4fbb1 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -92,6 +92,7 @@ pub trait DynAccelerationStructure: DynResource + std::fmt::Debug {}
 pub trait DynBindGroup: DynResource + std::fmt::Debug {}
 pub trait DynBindGroupLayout: DynResource + std::fmt::Debug {}
 pub trait DynBuffer: DynResource + std::fmt::Debug {}
+pub trait DynCommandBuffer: DynResource + std::fmt::Debug {}
 pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
 pub trait DynFence: DynResource + std::fmt::Debug {}
 pub trait DynPipelineCache: DynResource + std::fmt::Debug {}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 068874948c..6dbe60c767 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -44,6 +44,7 @@ crate::impl_dyn_resource!(Context, Encoder, Resource);
 
 impl crate::DynBindGroup for Resource {}
 impl crate::DynBuffer for Resource {}
+impl crate::DynCommandBuffer for Resource {}
 impl crate::DynComputePipeline for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 433ff48f08..93b7ba92f4 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -988,6 +988,8 @@ pub struct CommandBuffer {
     queries: Vec<glow::Query>,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 impl fmt::Debug for CommandBuffer {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let mut builder = f.debug_struct("CommandBuffer");
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 1716357c2b..91883e3421 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -266,9 +266,10 @@ mod dynamic;
 
 pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
-    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandEncoder,
-    DynComputePipeline, DynDevice, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet,
-    DynRenderPipeline, DynResource, DynSampler, DynShaderModule, DynTexture, DynTextureView,
+    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandBuffer,
+    DynCommandEncoder, DynComputePipeline, DynDevice, DynFence, DynPipelineCache,
+    DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynSampler, DynShaderModule,
+    DynTexture, DynTextureView,
 };
 
 use std::{
@@ -406,7 +407,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// them to [`CommandEncoder::reset_all`].
     ///
     /// [`CommandEncoder`]: Api::CommandEncoder
-    type CommandBuffer: WasmNotSendSync + fmt::Debug;
+    type CommandBuffer: DynCommandBuffer + fmt::Debug;
 
     type Buffer: DynBuffer;
     type Texture: DynTexture;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index d8d9b3fcb6..6ed68e9121 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -907,6 +907,8 @@ pub struct CommandBuffer {
     raw: metal::CommandBuffer,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 8d007b9f70..1b22a9628b 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -811,6 +811,8 @@ pub struct CommandBuffer {
     raw: vk::CommandBuffer,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 #[derive(Debug)]
 #[allow(clippy::large_enum_variant)]
 pub enum ShaderModule {

From 3faed2bf522261deb7c57e43cfcb75b8fba6be29 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 11:51:45 +0200
Subject: [PATCH 188/226] Device now has to implement DynResource

---
 wgpu-hal/src/dx12/mod.rs       | 1 +
 wgpu-hal/src/dynamic/device.rs | 6 +++---
 wgpu-hal/src/gles/mod.rs       | 1 +
 wgpu-hal/src/lib.rs            | 2 +-
 wgpu-hal/src/metal/mod.rs      | 1 +
 wgpu-hal/src/vulkan/mod.rs     | 1 +
 6 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 4519e5bc3e..ec8e79e822 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -94,6 +94,7 @@ crate::impl_dyn_resource!(
     CommandBuffer,
     CommandEncoder,
     ComputePipeline,
+    Device,
     Fence,
     PipelineCache,
     PipelineLayout,
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 30e245867c..b92139d417 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -1,12 +1,12 @@
-use crate::{Device, DynBuffer};
+use crate::{Device, DynBuffer, DynResource};
 
 use super::DynResourceExt;
 
-pub trait DynDevice {
+pub trait DynDevice: DynResource {
     unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>);
 }
 
-impl<D: Device> DynDevice for D {
+impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>) {
         unsafe { D::destroy_buffer(self, buffer.unbox()) };
     }
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 93b7ba92f4..c1abe2d2c0 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -171,6 +171,7 @@ crate::impl_dyn_resource!(
     CommandBuffer,
     CommandEncoder,
     ComputePipeline,
+    Device,
     Fence,
     PipelineLayout,
     QuerySet,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 91883e3421..6dbff8db78 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -394,7 +394,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type Instance: Instance<A = Self>;
     type Surface: Surface<A = Self>;
     type Adapter: Adapter<A = Self>;
-    type Device: Device<A = Self>;
+    type Device: DynDevice + Device<A = Self>;
 
     type Queue: Queue<A = Self>;
     type CommandEncoder: DynCommandEncoder + CommandEncoder<A = Self>;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 6ed68e9121..4263e0d488 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -78,6 +78,7 @@ crate::impl_dyn_resource!(
     CommandBuffer,
     CommandEncoder,
     ComputePipeline,
+    Device,
     Fence,
     PipelineLayout,
     QuerySet,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 1b22a9628b..5ba81a17ae 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -85,6 +85,7 @@ crate::impl_dyn_resource!(
     CommandBuffer,
     CommandEncoder,
     ComputePipeline,
+    Device,
     Fence,
     PipelineCache,
     PipelineLayout,

From 1d19a205a5caec00d7ac1bea840013c4f6f4fedb Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 11:59:03 +0200
Subject: [PATCH 189/226] impl DynFence for all fences

---
 wgpu-hal/src/dx12/mod.rs   | 2 ++
 wgpu-hal/src/empty.rs      | 1 +
 wgpu-hal/src/gles/mod.rs   | 2 ++
 wgpu-hal/src/lib.rs        | 2 +-
 wgpu-hal/src/metal/mod.rs  | 2 ++
 wgpu-hal/src/vulkan/mod.rs | 2 ++
 6 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index ec8e79e822..8267f7f4bf 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -525,6 +525,8 @@ pub struct Fence {
     raw: d3d12::Fence,
 }
 
+impl crate::DynFence for Fence {}
+
 unsafe impl Send for Fence {}
 unsafe impl Sync for Fence {}
 
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 6dbe60c767..0e7f43073b 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -46,6 +46,7 @@ impl crate::DynBindGroup for Resource {}
 impl crate::DynBuffer for Resource {}
 impl crate::DynCommandBuffer for Resource {}
 impl crate::DynComputePipeline for Resource {}
+impl crate::DynFence for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 impl crate::DynRenderPipeline for Resource {}
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index c1abe2d2c0..412424a851 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -693,6 +693,8 @@ pub struct Fence {
     pending: Vec<(crate::FenceValue, glow::Fence)>,
 }
 
+impl crate::DynFence for Fence {}
+
 #[cfg(any(
     not(target_arch = "wasm32"),
     all(
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 6dbff8db78..6ae5d6bdc7 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -433,7 +433,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// before a lower-valued operation, then waiting for the fence to reach the
     /// lower value could return before the lower-valued operation has actually
     /// finished.
-    type Fence: fmt::Debug + WasmNotSendSync;
+    type Fence: DynFence + fmt::Debug;
 
     type BindGroupLayout: fmt::Debug + WasmNotSendSync;
     type BindGroup: DynBindGroup;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 4263e0d488..fdcf6550b6 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -811,6 +811,8 @@ pub struct Fence {
     pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>,
 }
 
+impl crate::DynFence for Fence {}
+
 unsafe impl Send for Fence {}
 unsafe impl Sync for Fence {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 5ba81a17ae..360abdfee6 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -901,6 +901,8 @@ pub enum Fence {
     },
 }
 
+impl crate::DynFence for Fence {}
+
 impl Fence {
     /// Return the highest [`FenceValue`] among the signalled fences in `active`.
     ///

From 70f76411938923f659f72e7c3fcbf653237d873a Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 12:04:44 +0200
Subject: [PATCH 190/226] impl DynSurfaceTexture for all surface textures

---
 wgpu-hal/src/dx12/mod.rs    | 1 +
 wgpu-hal/src/dynamic/mod.rs | 1 +
 wgpu-hal/src/empty.rs       | 1 +
 wgpu-hal/src/gles/mod.rs    | 1 +
 wgpu-hal/src/lib.rs         | 4 ++--
 wgpu-hal/src/metal/mod.rs   | 3 +++
 wgpu-hal/src/vulkan/mod.rs  | 3 +++
 7 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 8267f7f4bf..44a56a244e 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -456,6 +456,7 @@ pub struct Texture {
 }
 
 impl crate::DynTexture for Texture {}
+impl crate::DynSurfaceTexture for Texture {}
 
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 0262d4fbb1..a7e343ad49 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -101,6 +101,7 @@ pub trait DynQuerySet: DynResource + std::fmt::Debug {}
 pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
 pub trait DynSampler: DynResource + std::fmt::Debug {}
 pub trait DynShaderModule: DynResource + std::fmt::Debug {}
+pub trait DynSurfaceTexture: DynResource + std::fmt::Debug {}
 pub trait DynTexture: DynResource + std::fmt::Debug {}
 pub trait DynTextureView: DynResource + std::fmt::Debug {}
 
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 0e7f43073b..37723326bc 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -51,6 +51,7 @@ impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 impl crate::DynRenderPipeline for Resource {}
 impl crate::DynShaderModule for Resource {}
+impl crate::DynSurfaceTexture for Resource {}
 impl crate::DynTexture for Resource {}
 impl crate::DynTextureView for Resource {}
 
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 412424a851..2e76871b1d 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -375,6 +375,7 @@ pub struct Texture {
 }
 
 impl crate::DynTexture for Texture {}
+impl crate::DynSurfaceTexture for Texture {}
 
 impl Texture {
     pub fn default_framebuffer(format: wgt::TextureFormat) -> Self {
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 6ae5d6bdc7..9a80eea6c3 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -269,7 +269,7 @@ pub use dynamic::{
     DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandBuffer,
     DynCommandEncoder, DynComputePipeline, DynDevice, DynFence, DynPipelineCache,
     DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynSampler, DynShaderModule,
-    DynTexture, DynTextureView,
+    DynSurfaceTexture, DynTexture, DynTextureView,
 };
 
 use std::{
@@ -411,7 +411,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
 
     type Buffer: DynBuffer;
     type Texture: DynTexture;
-    type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>;
+    type SurfaceTexture: DynSurfaceTexture + fmt::Debug + Borrow<Self::Texture>;
     type TextureView: DynTextureView;
     type Sampler: fmt::Debug + WasmNotSendSync;
     type QuerySet: DynQuerySet;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index fdcf6550b6..35a4d8159a 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -86,6 +86,7 @@ crate::impl_dyn_resource!(
     Sampler,
     ShaderModule,
     Surface,
+    SurfaceTexture,
     Texture,
     TextureView
 );
@@ -381,6 +382,8 @@ pub struct SurfaceTexture {
     present_with_transaction: bool,
 }
 
+impl crate::DynSurfaceTexture for SurfaceTexture {}
+
 impl std::borrow::Borrow<Texture> for SurfaceTexture {
     fn borrow(&self) -> &Texture {
         &self.texture
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 360abdfee6..f0fb84f153 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -94,6 +94,7 @@ crate::impl_dyn_resource!(
     Sampler,
     ShaderModule,
     Surface,
+    SurfaceTexture,
     Texture,
     TextureView
 );
@@ -377,6 +378,8 @@ pub struct SurfaceTexture {
     surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
 }
 
+impl crate::DynSurfaceTexture for SurfaceTexture {}
+
 impl Borrow<Texture> for SurfaceTexture {
     fn borrow(&self) -> &Texture {
         &self.texture

From 4e5721350f75e883bb395502dceaf2d90f1de718 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 12:13:23 +0200
Subject: [PATCH 191/226] introduce DynSurface

---
 wgpu-hal/src/dynamic/mod.rs     |  6 ++-
 wgpu-hal/src/dynamic/surface.rs | 71 +++++++++++++++++++++++++++++++++
 wgpu-hal/src/lib.rs             |  8 ++--
 3 files changed, 79 insertions(+), 6 deletions(-)
 create mode 100644 wgpu-hal/src/dynamic/surface.rs

diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index a7e343ad49..f51bf742c7 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -1,8 +1,10 @@
 mod command;
 mod device;
+mod surface;
 
-pub use self::command::DynCommandEncoder;
-pub use self::device::DynDevice;
+pub use command::DynCommandEncoder;
+pub use device::DynDevice;
+pub use surface::{DynAcquiredSurfaceTexture, DynSurface};
 
 use std::any::Any;
 
diff --git a/wgpu-hal/src/dynamic/surface.rs b/wgpu-hal/src/dynamic/surface.rs
new file mode 100644
index 0000000000..d6c3dad623
--- /dev/null
+++ b/wgpu-hal/src/dynamic/surface.rs
@@ -0,0 +1,71 @@
+use crate::{
+    DynDevice, DynFence, DynResource, DynSurfaceTexture, Surface, SurfaceConfiguration,
+    SurfaceError,
+};
+
+use super::DynResourceExt as _;
+
+#[derive(Debug)]
+pub struct DynAcquiredSurfaceTexture {
+    pub texture: Box<dyn DynSurfaceTexture>,
+    /// The presentation configuration no longer matches
+    /// the surface properties exactly, but can still be used to present
+    /// to the surface successfully.
+    pub suboptimal: bool,
+}
+
+pub trait DynSurface: DynResource {
+    unsafe fn configure(
+        &self,
+        device: &dyn DynDevice,
+        config: &SurfaceConfiguration,
+    ) -> Result<(), SurfaceError>;
+
+    unsafe fn unconfigure(&self, device: &dyn DynDevice);
+
+    unsafe fn acquire_texture(
+        &self,
+        timeout: Option<std::time::Duration>,
+        fence: &dyn DynFence,
+    ) -> Result<Option<DynAcquiredSurfaceTexture>, SurfaceError>;
+
+    unsafe fn discard_texture(&self, texture: Box<dyn DynSurfaceTexture>);
+}
+
+impl<S: Surface + DynResource> DynSurface for S {
+    unsafe fn configure(
+        &self,
+        device: &dyn DynDevice,
+        config: &SurfaceConfiguration,
+    ) -> Result<(), SurfaceError> {
+        let device = device.expect_downcast_ref();
+        unsafe { S::configure(self, device, config) }
+    }
+
+    unsafe fn unconfigure(&self, device: &dyn DynDevice) {
+        let device = device.expect_downcast_ref();
+        unsafe { S::unconfigure(self, device) }
+    }
+
+    unsafe fn acquire_texture(
+        &self,
+        timeout: Option<std::time::Duration>,
+        fence: &dyn DynFence,
+    ) -> Result<Option<DynAcquiredSurfaceTexture>, SurfaceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { S::acquire_texture(self, timeout, fence) }.map(|acquired| {
+            acquired.map(|ast| {
+                let texture = Box::new(ast.texture);
+                let suboptimal = ast.suboptimal;
+                DynAcquiredSurfaceTexture {
+                    texture,
+                    suboptimal,
+                }
+            })
+        })
+    }
+
+    unsafe fn discard_texture(&self, texture: Box<dyn DynSurfaceTexture>) {
+        unsafe { S::discard_texture(self, texture.unbox()) }
+    }
+}
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 9a80eea6c3..c97c232a6c 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -266,10 +266,10 @@ mod dynamic;
 
 pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
-    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandBuffer,
-    DynCommandEncoder, DynComputePipeline, DynDevice, DynFence, DynPipelineCache,
-    DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynSampler, DynShaderModule,
-    DynSurfaceTexture, DynTexture, DynTextureView,
+    DynAccelerationStructure, DynAcquiredSurfaceTexture, DynBindGroup, DynBindGroupLayout,
+    DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline, DynDevice, DynFence,
+    DynPipelineCache, DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynSampler,
+    DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
 };
 
 use std::{

From 72f30a34f504c0109772684d1960cb2619e1f82a Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 12:19:47 +0200
Subject: [PATCH 192/226] introduce DynQueue

---
 wgpu-hal/src/dx12/mod.rs      |  6 ++++
 wgpu-hal/src/dynamic/mod.rs   |  7 ++++-
 wgpu-hal/src/dynamic/queue.rs | 54 +++++++++++++++++++++++++++++++++++
 wgpu-hal/src/empty.rs         |  6 ++++
 wgpu-hal/src/gles/mod.rs      |  6 ++++
 wgpu-hal/src/lib.rs           |  6 ++--
 wgpu-hal/src/metal/mod.rs     |  6 ++++
 wgpu-hal/src/vulkan/mod.rs    |  6 ++++
 8 files changed, 93 insertions(+), 4 deletions(-)
 create mode 100644 wgpu-hal/src/dynamic/queue.rs

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 44a56a244e..345624da64 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -458,6 +458,12 @@ pub struct Texture {
 impl crate::DynTexture for Texture {}
 impl crate::DynSurfaceTexture for Texture {}
 
+impl std::borrow::Borrow<dyn crate::DynTexture> for Texture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index f51bf742c7..2607ba44c3 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -1,9 +1,11 @@
 mod command;
 mod device;
+mod queue;
 mod surface;
 
 pub use command::DynCommandEncoder;
 pub use device::DynDevice;
+pub use queue::DynQueue;
 pub use surface::{DynAcquiredSurfaceTexture, DynSurface};
 
 use std::any::Any;
@@ -103,7 +105,10 @@ pub trait DynQuerySet: DynResource + std::fmt::Debug {}
 pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
 pub trait DynSampler: DynResource + std::fmt::Debug {}
 pub trait DynShaderModule: DynResource + std::fmt::Debug {}
-pub trait DynSurfaceTexture: DynResource + std::fmt::Debug {}
+pub trait DynSurfaceTexture:
+    DynResource + std::borrow::Borrow<dyn DynTexture> + std::fmt::Debug
+{
+}
 pub trait DynTexture: DynResource + std::fmt::Debug {}
 pub trait DynTextureView: DynResource + std::fmt::Debug {}
 
diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs
new file mode 100644
index 0000000000..14d7e5a969
--- /dev/null
+++ b/wgpu-hal/src/dynamic/queue.rs
@@ -0,0 +1,54 @@
+use crate::{
+    DeviceError, DynCommandBuffer, DynFence, DynResource, DynSurface, DynSurfaceTexture,
+    FenceValue, Queue, SurfaceError,
+};
+
+use super::DynResourceExt as _;
+
+pub trait DynQueue: DynResource {
+    unsafe fn submit(
+        &self,
+        command_buffers: &[&dyn DynCommandBuffer],
+        surface_textures: &[&dyn DynSurfaceTexture],
+        signal_fence: (&mut dyn DynFence, FenceValue),
+    ) -> Result<(), DeviceError>;
+    unsafe fn present(
+        &self,
+        surface: &dyn DynSurface,
+        texture: Box<dyn DynSurfaceTexture>,
+    ) -> Result<(), SurfaceError>;
+    unsafe fn get_timestamp_period(&self) -> f32;
+}
+
+impl<Q: Queue + DynResource> DynQueue for Q {
+    unsafe fn submit(
+        &self,
+        command_buffers: &[&dyn DynCommandBuffer],
+        surface_textures: &[&dyn DynSurfaceTexture],
+        signal_fence: (&mut dyn DynFence, FenceValue),
+    ) -> Result<(), DeviceError> {
+        let command_buffers = command_buffers
+            .iter()
+            .map(|cb| (*cb).expect_downcast_ref())
+            .collect::<Vec<_>>();
+        let surface_textures = surface_textures
+            .iter()
+            .map(|surface| (*surface).expect_downcast_ref())
+            .collect::<Vec<_>>();
+        let signal_fence = (signal_fence.0.expect_downcast_mut(), signal_fence.1);
+        unsafe { Q::submit(self, &command_buffers, &surface_textures, signal_fence) }
+    }
+
+    unsafe fn present(
+        &self,
+        surface: &dyn DynSurface,
+        texture: Box<dyn DynSurfaceTexture>,
+    ) -> Result<(), SurfaceError> {
+        let surface = surface.expect_downcast_ref();
+        unsafe { Q::present(self, surface, texture.unbox()) }
+    }
+
+    unsafe fn get_timestamp_period(&self) -> f32 {
+        unsafe { Q::get_timestamp_period(self) }
+    }
+}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 37723326bc..9b97b16b80 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -55,6 +55,12 @@ impl crate::DynSurfaceTexture for Resource {}
 impl crate::DynTexture for Resource {}
 impl crate::DynTextureView for Resource {}
 
+impl std::borrow::Borrow<dyn crate::DynTexture> for Resource {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 impl crate::Instance for Context {
     type A = Api;
 
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 2e76871b1d..7208f5c7e4 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -377,6 +377,12 @@ pub struct Texture {
 impl crate::DynTexture for Texture {}
 impl crate::DynSurfaceTexture for Texture {}
 
+impl std::borrow::Borrow<dyn crate::DynTexture> for Texture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 impl Texture {
     pub fn default_framebuffer(format: wgt::TextureFormat) -> Self {
         Self {
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index c97c232a6c..c7939066c2 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -268,8 +268,8 @@ pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
     DynAccelerationStructure, DynAcquiredSurfaceTexture, DynBindGroup, DynBindGroupLayout,
     DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline, DynDevice, DynFence,
-    DynPipelineCache, DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynSampler,
-    DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
+    DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue, DynRenderPipeline, DynResource,
+    DynSampler, DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
 };
 
 use std::{
@@ -392,7 +392,7 @@ impl InstanceError {
 
 pub trait Api: Clone + fmt::Debug + Sized {
     type Instance: Instance<A = Self>;
-    type Surface: Surface<A = Self>;
+    type Surface: DynSurface + Surface<A = Self>;
     type Adapter: Adapter<A = Self>;
     type Device: DynDevice + Device<A = Self>;
 
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 35a4d8159a..322e627597 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -390,6 +390,12 @@ impl std::borrow::Borrow<Texture> for SurfaceTexture {
     }
 }
 
+impl std::borrow::Borrow<dyn crate::DynTexture> for SurfaceTexture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        &self.texture
+    }
+}
+
 unsafe impl Send for SurfaceTexture {}
 unsafe impl Sync for SurfaceTexture {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index f0fb84f153..d8692d7205 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -386,6 +386,12 @@ impl Borrow<Texture> for SurfaceTexture {
     }
 }
 
+impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        &self.texture
+    }
+}
+
 pub struct Adapter {
     raw: vk::PhysicalDevice,
     instance: Arc<InstanceShared>,

From b599d97243c06d7dd499bee28ea3df45bfdf2814 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 21 Jul 2024 12:52:25 +0200
Subject: [PATCH 193/226] DynDevice buffer operations

---
 wgpu-hal/src/dynamic/device.rs | 53 ++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index b92139d417..65944acb54 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -1,13 +1,62 @@
-use crate::{Device, DynBuffer, DynResource};
+use crate::{
+    BufferDescriptor, BufferMapping, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
+};
 
-use super::DynResourceExt;
+use super::DynResourceExt as _;
 
 pub trait DynDevice: DynResource {
+    unsafe fn create_buffer(
+        &self,
+        desc: &BufferDescriptor,
+    ) -> Result<Box<dyn DynBuffer>, DeviceError>;
+
     unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>);
+
+    unsafe fn map_buffer(
+        &self,
+        buffer: &dyn DynBuffer,
+        range: MemoryRange,
+    ) -> Result<BufferMapping, DeviceError>;
+
+    unsafe fn unmap_buffer(&self, buffer: &dyn DynBuffer);
+
+    unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
+    unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
+    unsafe fn create_buffer(
+        &self,
+        desc: &BufferDescriptor,
+    ) -> Result<Box<dyn DynBuffer>, DeviceError> {
+        unsafe { D::create_buffer(self, desc) }.map(|b| -> Box<dyn DynBuffer> { Box::new(b) })
+    }
+
     unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>) {
         unsafe { D::destroy_buffer(self, buffer.unbox()) };
     }
+
+    unsafe fn map_buffer(
+        &self,
+        buffer: &dyn DynBuffer,
+        range: MemoryRange,
+    ) -> Result<BufferMapping, DeviceError> {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::map_buffer(self, buffer, range) }
+    }
+
+    unsafe fn unmap_buffer(&self, buffer: &dyn DynBuffer) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::unmap_buffer(self, buffer) }
+    }
+
+    unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::flush_mapped_ranges(self, buffer, ranges.iter().cloned()) }
+    }
+
+    unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::invalidate_mapped_ranges(self, buffer, ranges.iter().cloned()) }
+    }
 }

From ea0df25d5e03c6226efcd1805a2750f316b1c0b0 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Wed, 24 Jul 2024 23:51:24 +0200
Subject: [PATCH 194/226] DynDevice create/destroy for
 texture/textureview/sampler

---
 wgpu-hal/src/dx12/mod.rs       |  2 +
 wgpu-hal/src/dynamic/device.rs | 69 +++++++++++++++++++++++++++++++++-
 wgpu-hal/src/empty.rs          |  1 +
 wgpu-hal/src/gles/mod.rs       |  2 +
 wgpu-hal/src/lib.rs            |  2 +-
 wgpu-hal/src/metal/mod.rs      |  2 +
 wgpu-hal/src/vulkan/mod.rs     |  2 +
 7 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 345624da64..a0aadb795f 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -513,6 +513,8 @@ pub struct Sampler {
     handle: descriptor::Handle,
 }
 
+impl crate::DynSampler for Sampler {}
+
 unsafe impl Send for Sampler {}
 unsafe impl Sync for Sampler {}
 
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 65944acb54..75de0413ac 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -1,8 +1,9 @@
 use crate::{
-    BufferDescriptor, BufferMapping, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
+    Api, BufferDescriptor, BufferMapping, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
+    SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
-use super::DynResourceExt as _;
+use super::{DynResourceExt as _, DynSampler, DynTexture, DynTextureView};
 
 pub trait DynDevice: DynResource {
     unsafe fn create_buffer(
@@ -22,6 +23,23 @@ pub trait DynDevice: DynResource {
 
     unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
     unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
+
+    unsafe fn create_texture(
+        &self,
+        desc: &TextureDescriptor,
+    ) -> Result<Box<dyn DynTexture>, DeviceError>;
+    unsafe fn destroy_texture(&self, texture: Box<dyn DynTexture>);
+    unsafe fn create_texture_view(
+        &self,
+        texture: &dyn DynTexture,
+        desc: &TextureViewDescriptor,
+    ) -> Result<Box<dyn DynTextureView>, DeviceError>;
+    unsafe fn destroy_texture_view(&self, view: Box<dyn DynTextureView>);
+    unsafe fn create_sampler(
+        &self,
+        desc: &SamplerDescriptor,
+    ) -> Result<Box<dyn DynSampler>, DeviceError>;
+    unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -59,4 +77,51 @@ impl<D: Device + DynResource> DynDevice for D {
         let buffer = buffer.expect_downcast_ref();
         unsafe { D::invalidate_mapped_ranges(self, buffer, ranges.iter().cloned()) }
     }
+
+    unsafe fn create_texture(
+        &self,
+        desc: &TextureDescriptor,
+    ) -> Result<Box<dyn DynTexture>, DeviceError> {
+        unsafe { D::create_texture(self, desc) }.map(|b| {
+            let boxed_texture: Box<<D::A as Api>::Texture> = Box::new(b);
+            let boxed_texture: Box<dyn DynTexture> = boxed_texture;
+            boxed_texture
+        })
+    }
+
+    unsafe fn destroy_texture(&self, texture: Box<dyn DynTexture>) {
+        unsafe { D::destroy_texture(self, texture.unbox()) };
+    }
+
+    unsafe fn create_texture_view(
+        &self,
+        texture: &dyn DynTexture,
+        desc: &TextureViewDescriptor,
+    ) -> Result<Box<dyn DynTextureView>, DeviceError> {
+        let texture = texture.expect_downcast_ref();
+        unsafe { D::create_texture_view(self, texture, desc) }.map(|b| {
+            let boxed_texture_view: Box<<D::A as Api>::TextureView> = Box::new(b);
+            let boxed_texture_view: Box<dyn DynTextureView> = boxed_texture_view;
+            boxed_texture_view
+        })
+    }
+
+    unsafe fn destroy_texture_view(&self, view: Box<dyn DynTextureView>) {
+        unsafe { D::destroy_texture_view(self, view.unbox()) };
+    }
+
+    unsafe fn create_sampler(
+        &self,
+        desc: &SamplerDescriptor,
+    ) -> Result<Box<dyn DynSampler>, DeviceError> {
+        unsafe { D::create_sampler(self, desc) }.map(|b| {
+            let boxed_sampler: Box<<D::A as Api>::Sampler> = Box::new(b);
+            let boxed_sampler: Box<dyn DynSampler> = boxed_sampler;
+            boxed_sampler
+        })
+    }
+
+    unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>) {
+        unsafe { D::destroy_sampler(self, sampler.unbox()) };
+    }
 }
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 9b97b16b80..e800324024 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -50,6 +50,7 @@ impl crate::DynFence for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 impl crate::DynRenderPipeline for Resource {}
+impl crate::DynSampler for Resource {}
 impl crate::DynShaderModule for Resource {}
 impl crate::DynSurfaceTexture for Resource {}
 impl crate::DynTexture for Resource {}
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 7208f5c7e4..26693d8e60 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -477,6 +477,8 @@ pub struct Sampler {
     raw: glow::Sampler,
 }
 
+impl crate::DynSampler for Sampler {}
+
 #[derive(Debug)]
 pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index c7939066c2..d41137ad1a 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -413,7 +413,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type Texture: DynTexture;
     type SurfaceTexture: DynSurfaceTexture + fmt::Debug + Borrow<Self::Texture>;
     type TextureView: DynTextureView;
-    type Sampler: fmt::Debug + WasmNotSendSync;
+    type Sampler: DynSampler + fmt::Debug;
     type QuerySet: DynQuerySet;
 
     /// A value you can block on to wait for something to finish.
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 322e627597..e353f85a0d 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -542,6 +542,8 @@ pub struct Sampler {
     raw: metal::SamplerState,
 }
 
+impl crate::DynSampler for Sampler {}
+
 unsafe impl Send for Sampler {}
 unsafe impl Sync for Sampler {}
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index d8692d7205..1898e59539 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -715,6 +715,8 @@ pub struct Sampler {
     raw: vk::Sampler,
 }
 
+impl crate::DynSampler for Sampler {}
+
 #[derive(Debug)]
 pub struct BindGroupLayout {
     raw: vk::DescriptorSetLayout,

From 05a09338cd69c00152951a1eabc72811adacd52c Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 27 Jul 2024 12:30:59 +0200
Subject: [PATCH 195/226] DynDevice create/destroy command encoder

---
 wgpu-hal/src/dx12/device.rs    |  2 +-
 wgpu-hal/src/dx12/mod.rs       |  1 +
 wgpu-hal/src/dynamic/device.rs | 33 ++++++++++++++++++++++++++++++---
 wgpu-hal/src/empty.rs          |  2 +-
 wgpu-hal/src/gles/device.rs    |  2 +-
 wgpu-hal/src/gles/mod.rs       |  1 +
 wgpu-hal/src/lib.rs            |  8 ++++----
 wgpu-hal/src/metal/device.rs   |  2 +-
 wgpu-hal/src/metal/mod.rs      |  1 +
 wgpu-hal/src/vulkan/device.rs  |  2 +-
 wgpu-hal/src/vulkan/mod.rs     |  1 +
 11 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index b3204a8cc0..b74c9ebca2 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -689,7 +689,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, DeviceError> {
         let allocator = self
             .raw
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index a0aadb795f..addb2a32ec 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -99,6 +99,7 @@ crate::impl_dyn_resource!(
     PipelineCache,
     PipelineLayout,
     QuerySet,
+    Queue,
     RenderPipeline,
     Sampler,
     ShaderModule,
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 75de0413ac..de6c93b8bb 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -1,9 +1,14 @@
+// Box casts are needed, alternative would be a temporaries which are more verbose and not more expressive.
+#![allow(trivial_casts)]
+
 use crate::{
-    Api, BufferDescriptor, BufferMapping, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
-    SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
+    Api, BufferDescriptor, BufferMapping, CommandEncoderDescriptor, Device, DeviceError, DynBuffer,
+    DynResource, MemoryRange, SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
-use super::{DynResourceExt as _, DynSampler, DynTexture, DynTextureView};
+use super::{
+    DynCommandEncoder, DynQueue, DynResourceExt as _, DynSampler, DynTexture, DynTextureView,
+};
 
 pub trait DynDevice: DynResource {
     unsafe fn create_buffer(
@@ -40,6 +45,12 @@ pub trait DynDevice: DynResource {
         desc: &SamplerDescriptor,
     ) -> Result<Box<dyn DynSampler>, DeviceError>;
     unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>);
+
+    unsafe fn create_command_encoder(
+        &self,
+        desc: &CommandEncoderDescriptor<dyn DynQueue>,
+    ) -> Result<Box<dyn DynCommandEncoder>, DeviceError>;
+    unsafe fn destroy_command_encoder(&self, pool: Box<dyn DynCommandEncoder>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -124,4 +135,20 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>) {
         unsafe { D::destroy_sampler(self, sampler.unbox()) };
     }
+
+    unsafe fn create_command_encoder(
+        &self,
+        desc: &CommandEncoderDescriptor<'_, dyn DynQueue>,
+    ) -> Result<Box<dyn DynCommandEncoder>, DeviceError> {
+        let desc = CommandEncoderDescriptor {
+            label: desc.label,
+            queue: desc.queue.expect_downcast_ref(),
+        };
+        unsafe { D::create_command_encoder(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynCommandEncoder>)
+    }
+
+    unsafe fn destroy_command_encoder(&self, encoder: Box<dyn DynCommandEncoder>) {
+        unsafe { D::destroy_command_encoder(self, encoder.unbox()) };
+    }
 }
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index e800324024..f33dc7b4b0 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -195,7 +195,7 @@ impl crate::Device for Context {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<Api>,
+        desc: &crate::CommandEncoderDescriptor<Context>,
     ) -> DeviceResult<Encoder> {
         Ok(Encoder)
     }
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index c651da6828..6ce15269b0 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -1109,7 +1109,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        _desc: &crate::CommandEncoderDescriptor<super::Api>,
+        _desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         self.counters.command_encoders.add(1);
 
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 26693d8e60..2306e6ea3f 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -175,6 +175,7 @@ crate::impl_dyn_resource!(
     Fence,
     PipelineLayout,
     QuerySet,
+    Queue,
     RenderPipeline,
     Sampler,
     ShaderModule,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index d41137ad1a..0c83739d82 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -396,7 +396,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type Adapter: Adapter<A = Self>;
     type Device: DynDevice + Device<A = Self>;
 
-    type Queue: Queue<A = Self>;
+    type Queue: DynQueue + Queue<A = Self>;
     type CommandEncoder: DynCommandEncoder + CommandEncoder<A = Self>;
 
     /// This API's command buffer type.
@@ -789,7 +789,7 @@ pub trait Device: WasmNotSendSync {
     /// The new `CommandEncoder` is in the "closed" state.
     unsafe fn create_command_encoder(
         &self,
-        desc: &CommandEncoderDescriptor<Self::A>,
+        desc: &CommandEncoderDescriptor<<Self::A as Api>::Queue>,
     ) -> Result<<Self::A as Api>::CommandEncoder, DeviceError>;
     unsafe fn destroy_command_encoder(&self, pool: <Self::A as Api>::CommandEncoder);
 
@@ -1831,9 +1831,9 @@ pub struct BindGroupDescriptor<'a, A: Api> {
 }
 
 #[derive(Clone, Debug)]
-pub struct CommandEncoderDescriptor<'a, A: Api> {
+pub struct CommandEncoderDescriptor<'a, Q: DynQueue + ?Sized> {
     pub label: Label<'a>,
-    pub queue: &'a A::Queue,
+    pub queue: &'a Q,
 }
 
 /// Naga shader module.
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 4ca392bc1f..77631ab937 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -569,7 +569,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         self.counters.command_encoders.add(1);
         Ok(super::CommandEncoder {
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index e353f85a0d..5c836afd94 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -82,6 +82,7 @@ crate::impl_dyn_resource!(
     Fence,
     PipelineLayout,
     QuerySet,
+    Queue,
     RenderPipeline,
     Sampler,
     ShaderModule,
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index a71263df50..9e9c50171f 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1215,7 +1215,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         let vk_info = vk::CommandPoolCreateInfo::default()
             .queue_family_index(desc.queue.family_index)
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 1898e59539..3f1ba28bc7 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -90,6 +90,7 @@ crate::impl_dyn_resource!(
     PipelineCache,
     PipelineLayout,
     QuerySet,
+    Queue,
     RenderPipeline,
     Sampler,
     ShaderModule,

From 1da319f804da07799ea9ac5f4d4ba268710ce218 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 27 Jul 2024 12:35:22 +0200
Subject: [PATCH 196/226] DynDevice create/destroy bind group layout

---
 wgpu-hal/src/dx12/mod.rs       |  2 ++
 wgpu-hal/src/dynamic/device.rs | 26 +++++++++++++++++++++++---
 wgpu-hal/src/empty.rs          |  1 +
 wgpu-hal/src/gles/mod.rs       |  2 ++
 wgpu-hal/src/lib.rs            |  2 +-
 wgpu-hal/src/metal/mod.rs      |  2 ++
 wgpu-hal/src/vulkan/mod.rs     |  2 ++
 7 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index addb2a32ec..fb0d9a1997 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -555,6 +555,8 @@ pub struct BindGroupLayout {
     copy_counts: Vec<u32>, // all 1's
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug, Clone, Copy)]
 enum BufferViewKind {
     Constant,
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index de6c93b8bb..a9adc36227 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -2,12 +2,14 @@
 #![allow(trivial_casts)]
 
 use crate::{
-    Api, BufferDescriptor, BufferMapping, CommandEncoderDescriptor, Device, DeviceError, DynBuffer,
-    DynResource, MemoryRange, SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
+    Api, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor,
+    Device, DeviceError, DynBuffer, DynResource, MemoryRange, SamplerDescriptor, TextureDescriptor,
+    TextureViewDescriptor,
 };
 
 use super::{
-    DynCommandEncoder, DynQueue, DynResourceExt as _, DynSampler, DynTexture, DynTextureView,
+    DynBindGroupLayout, DynCommandEncoder, DynQueue, DynResourceExt as _, DynSampler, DynTexture,
+    DynTextureView,
 };
 
 pub trait DynDevice: DynResource {
@@ -51,6 +53,12 @@ pub trait DynDevice: DynResource {
         desc: &CommandEncoderDescriptor<dyn DynQueue>,
     ) -> Result<Box<dyn DynCommandEncoder>, DeviceError>;
     unsafe fn destroy_command_encoder(&self, pool: Box<dyn DynCommandEncoder>);
+
+    unsafe fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor,
+    ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError>;
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -151,4 +159,16 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_command_encoder(&self, encoder: Box<dyn DynCommandEncoder>) {
         unsafe { D::destroy_command_encoder(self, encoder.unbox()) };
     }
+
+    unsafe fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor,
+    ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError> {
+        unsafe { D::create_bind_group_layout(self, desc) }
+            .map(|b| Box::new(b) as Box<dyn DynBindGroupLayout>)
+    }
+
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>) {
+        unsafe { D::destroy_bind_group_layout(self, bg_layout.unbox()) };
+    }
 }
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index f33dc7b4b0..b1d0eed6c4 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -43,6 +43,7 @@ impl crate::Api for Api {
 crate::impl_dyn_resource!(Context, Encoder, Resource);
 
 impl crate::DynBindGroup for Resource {}
+impl crate::DynBindGroupLayout for Resource {}
 impl crate::DynBuffer for Resource {}
 impl crate::DynCommandBuffer for Resource {}
 impl crate::DynComputePipeline for Resource {}
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 2306e6ea3f..9fd40d4d26 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -485,6 +485,8 @@ pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug)]
 struct BindGroupLayoutInfo {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 0c83739d82..26f4918fbf 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -435,7 +435,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// finished.
     type Fence: DynFence + fmt::Debug;
 
-    type BindGroupLayout: fmt::Debug + WasmNotSendSync;
+    type BindGroupLayout: DynBindGroupLayout + fmt::Debug;
     type BindGroup: DynBindGroup;
     type PipelineLayout: DynPipelineLayout;
     type ShaderModule: DynShaderModule;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 5c836afd94..7f2d31a9f7 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -560,6 +560,8 @@ pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Clone, Debug, Default)]
 struct ResourceData<T> {
     buffers: T,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 3f1ba28bc7..ba68e38d9c 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -727,6 +727,8 @@ pub struct BindGroupLayout {
     binding_arrays: Vec<(u32, NonZeroU32)>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug)]
 pub struct PipelineLayout {
     raw: vk::PipelineLayout,

From 33f57e23059605efcb360213b513516fdf25f043 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 27 Jul 2024 12:43:04 +0200
Subject: [PATCH 197/226] DynDevice create/destroy pipeline layout

---
 wgpu-hal/src/dx12/device.rs    |  2 +-
 wgpu-hal/src/dynamic/device.rs | 38 ++++++++++++++++++++++++++++++----
 wgpu-hal/src/empty.rs          |  2 +-
 wgpu-hal/src/gles/device.rs    |  2 +-
 wgpu-hal/src/lib.rs            |  6 +++---
 wgpu-hal/src/metal/device.rs   |  2 +-
 wgpu-hal/src/vulkan/device.rs  |  2 +-
 7 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index b74c9ebca2..d99554241c 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -776,7 +776,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, DeviceError> {
         use naga::back::hlsl;
         // Pipeline layouts are implemented as RootSignature for D3D12.
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index a9adc36227..79b8f0d275 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -3,13 +3,13 @@
 
 use crate::{
     Api, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor,
-    Device, DeviceError, DynBuffer, DynResource, MemoryRange, SamplerDescriptor, TextureDescriptor,
-    TextureViewDescriptor,
+    Device, DeviceError, DynBuffer, DynResource, MemoryRange, PipelineLayoutDescriptor,
+    SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
-    DynBindGroupLayout, DynCommandEncoder, DynQueue, DynResourceExt as _, DynSampler, DynTexture,
-    DynTextureView,
+    DynBindGroupLayout, DynCommandEncoder, DynPipelineLayout, DynQueue, DynResourceExt as _,
+    DynSampler, DynTexture, DynTextureView,
 };
 
 pub trait DynDevice: DynResource {
@@ -59,6 +59,12 @@ pub trait DynDevice: DynResource {
         desc: &BindGroupLayoutDescriptor,
     ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError>;
     unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>);
+
+    unsafe fn create_pipeline_layout(
+        &self,
+        desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
+    ) -> Result<Box<dyn DynPipelineLayout>, DeviceError>;
+    unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -171,4 +177,28 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>) {
         unsafe { D::destroy_bind_group_layout(self, bg_layout.unbox()) };
     }
+
+    unsafe fn create_pipeline_layout(
+        &self,
+        desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
+    ) -> Result<Box<dyn DynPipelineLayout>, DeviceError> {
+        let bind_group_layouts: Vec<_> = desc
+            .bind_group_layouts
+            .iter()
+            .map(|bgl| bgl.expect_downcast_ref())
+            .collect();
+        let desc = PipelineLayoutDescriptor {
+            label: desc.label,
+            bind_group_layouts: &bind_group_layouts,
+            push_constant_ranges: desc.push_constant_ranges,
+            flags: desc.flags,
+        };
+
+        unsafe { D::create_pipeline_layout(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynPipelineLayout>)
+    }
+
+    unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>) {
+        unsafe { D::destroy_pipeline_layout(self, pipeline_layout.unbox()) };
+    }
 }
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index b1d0eed6c4..3847756032 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -211,7 +211,7 @@ impl crate::Device for Context {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {}
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<Api>,
+        desc: &crate::PipelineLayoutDescriptor<Resource>,
     ) -> DeviceResult<Resource> {
         Ok(Resource)
     }
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 6ce15269b0..81c17cae52 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -1140,7 +1140,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, crate::DeviceError> {
         use naga::back::glsl;
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 26f4918fbf..c65b34a031 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -801,7 +801,7 @@ pub trait Device: WasmNotSendSync {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: <Self::A as Api>::BindGroupLayout);
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &PipelineLayoutDescriptor<Self::A>,
+        desc: &PipelineLayoutDescriptor<<Self::A as Api>::BindGroupLayout>,
     ) -> Result<<Self::A as Api>::PipelineLayout, DeviceError>;
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: <Self::A as Api>::PipelineLayout);
     unsafe fn create_bind_group(
@@ -1743,10 +1743,10 @@ pub struct BindGroupLayoutDescriptor<'a> {
 }
 
 #[derive(Clone, Debug)]
-pub struct PipelineLayoutDescriptor<'a, A: Api> {
+pub struct PipelineLayoutDescriptor<'a, B: DynBindGroupLayout + ?Sized> {
     pub label: Label<'a>,
     pub flags: PipelineLayoutFlags,
-    pub bind_group_layouts: &'a [&'a A::BindGroupLayout],
+    pub bind_group_layouts: &'a [&'a B],
     pub push_constant_ranges: &'a [wgt::PushConstantRange],
 }
 
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 77631ab937..d1e58e5ce2 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -602,7 +602,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> DeviceResult<super::PipelineLayout> {
         #[derive(Debug)]
         struct StageInfo {
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 9e9c50171f..f95cfdfec2 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1387,7 +1387,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, crate::DeviceError> {
         //Note: not bothering with on stack array here as it's low frequency
         let vk_set_layouts = desc

From 276753f9630330ff1c62e20e90e3832f42b737db Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 27 Jul 2024 13:12:47 +0200
Subject: [PATCH 198/226] DynDevice create/destroy bind group

bindgroup fixup
---
 wgpu-core/src/device/resource.rs |  2 +-
 wgpu-hal/src/dx12/device.rs      |  8 +++-
 wgpu-hal/src/dx12/mod.rs         |  3 ++
 wgpu-hal/src/dynamic/device.rs   | 70 +++++++++++++++++++++++++++++---
 wgpu-hal/src/dynamic/mod.rs      | 11 ++++-
 wgpu-hal/src/empty.rs            |  3 +-
 wgpu-hal/src/gles/device.rs      | 18 ++++++--
 wgpu-hal/src/gles/mod.rs         |  8 +++-
 wgpu-hal/src/lib.rs              | 45 ++++++++++++--------
 wgpu-hal/src/metal/device.rs     |  8 +++-
 wgpu-hal/src/metal/mod.rs        |  3 ++
 wgpu-hal/src/vulkan/device.rs    |  8 +++-
 wgpu-hal/src/vulkan/mod.rs       |  3 ++
 13 files changed, 158 insertions(+), 32 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index de21f0a39a..104f54a40a 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2072,7 +2072,7 @@ impl<A: HalApi> Device<A> {
         used: &mut BindGroupStates<A>,
         used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::TextureBinding<'a, A>, binding_model::CreateBindGroupError> {
+    ) -> Result<hal::TextureBinding<'a, A::TextureView>, binding_model::CreateBindGroupError> {
         view.same_device(self)?;
 
         let (pub_usage, internal_use) = self.texture_use_parameters(
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index d99554241c..a5120b0a41 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1104,7 +1104,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, DeviceError> {
         let mut cpu_views = desc
             .layout
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index fb0d9a1997..471e90f2b7 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -88,6 +88,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    AccelerationStructure,
     BindGroup,
     BindGroupLayout,
     Buffer,
@@ -671,6 +672,8 @@ unsafe impl Sync for ComputePipeline {}
 #[derive(Debug)]
 pub struct AccelerationStructure {}
 
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 impl SwapChain {
     unsafe fn release_resources(self) -> d3d12::ComPtr<dxgi1_4::IDXGISwapChain3> {
         self.raw
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 79b8f0d275..788c6dc175 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -2,14 +2,14 @@
 #![allow(trivial_casts)]
 
 use crate::{
-    Api, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor,
-    Device, DeviceError, DynBuffer, DynResource, MemoryRange, PipelineLayoutDescriptor,
-    SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
+    Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
+    CommandEncoderDescriptor, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
+    PipelineLayoutDescriptor, SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
-    DynBindGroupLayout, DynCommandEncoder, DynPipelineLayout, DynQueue, DynResourceExt as _,
-    DynSampler, DynTexture, DynTextureView,
+    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynCommandEncoder,
+    DynPipelineLayout, DynQueue, DynResourceExt as _, DynSampler, DynTexture, DynTextureView,
 };
 
 pub trait DynDevice: DynResource {
@@ -65,6 +65,18 @@ pub trait DynDevice: DynResource {
         desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
     ) -> Result<Box<dyn DynPipelineLayout>, DeviceError>;
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>);
+
+    unsafe fn create_bind_group(
+        &self,
+        desc: &BindGroupDescriptor<
+            dyn DynBindGroupLayout,
+            dyn DynBuffer,
+            dyn DynSampler,
+            dyn DynTextureView,
+            dyn DynAccelerationStructure,
+        >,
+    ) -> Result<Box<dyn DynBindGroup>, DeviceError>;
+    unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -201,4 +213,52 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>) {
         unsafe { D::destroy_pipeline_layout(self, pipeline_layout.unbox()) };
     }
+
+    unsafe fn create_bind_group(
+        &self,
+        desc: &BindGroupDescriptor<
+            dyn DynBindGroupLayout,
+            dyn DynBuffer,
+            dyn DynSampler,
+            dyn DynTextureView,
+            dyn DynAccelerationStructure,
+        >,
+    ) -> Result<Box<dyn DynBindGroup>, DeviceError> {
+        let buffers: Vec<_> = desc
+            .buffers
+            .iter()
+            .map(|b| b.clone().expect_downcast())
+            .collect();
+        let samplers: Vec<_> = desc
+            .samplers
+            .iter()
+            .map(|s| s.expect_downcast_ref())
+            .collect();
+        let textures: Vec<_> = desc
+            .textures
+            .iter()
+            .map(|t| t.clone().expect_downcast())
+            .collect();
+        let acceleration_structures: Vec<_> = desc
+            .acceleration_structures
+            .iter()
+            .map(|a| a.expect_downcast_ref())
+            .collect();
+
+        let desc = BindGroupDescriptor {
+            label: desc.label.to_owned(),
+            layout: desc.layout.expect_downcast_ref(),
+            buffers: &buffers,
+            samplers: &samplers,
+            textures: &textures,
+            entries: desc.entries,
+            acceleration_structures: &acceleration_structures,
+        };
+
+        unsafe { D::create_bind_group(self, &desc) }.map(|b| Box::new(b) as Box<dyn DynBindGroup>)
+    }
+
+    unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>) {
+        unsafe { D::destroy_bind_group(self, group.unbox()) };
+    }
 }
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 2607ba44c3..3b7312fecf 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -12,7 +12,7 @@ use std::any::Any;
 
 use wgt::WasmNotSendSync;
 
-use crate::BufferBinding;
+use crate::{BufferBinding, TextureBinding};
 
 /// Base trait for all resources, allows downcasting via [`Any`].
 pub trait DynResource: Any + WasmNotSendSync + 'static {
@@ -121,3 +121,12 @@ impl<'a> BufferBinding<'a, dyn DynBuffer> {
         }
     }
 }
+
+impl<'a> TextureBinding<'a, dyn DynTextureView> {
+    pub fn expect_downcast<T: DynTextureView>(self) -> TextureBinding<'a, T> {
+        TextureBinding {
+            view: self.view.expect_downcast_ref(),
+            usage: self.usage,
+        }
+    }
+}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 3847756032..1ca1ae6545 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -42,6 +42,7 @@ impl crate::Api for Api {
 
 crate::impl_dyn_resource!(Context, Encoder, Resource);
 
+impl crate::DynAccelerationStructure for Resource {}
 impl crate::DynBindGroup for Resource {}
 impl crate::DynBindGroupLayout for Resource {}
 impl crate::DynBuffer for Resource {}
@@ -218,7 +219,7 @@ impl crate::Device for Context {
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {}
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<Api>,
+        desc: &crate::BindGroupDescriptor<Resource, Resource, Resource, Resource, Resource>,
     ) -> DeviceResult<Resource> {
         Ok(Resource)
     }
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 81c17cae52..777225edfc 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -1232,7 +1232,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut contents = Vec::new();
 
@@ -1589,7 +1595,7 @@ impl crate::Device for super::Device {
     unsafe fn create_acceleration_structure(
         &self,
         _desc: &crate::AccelerationStructureDescriptor,
-    ) -> Result<(), crate::DeviceError> {
+    ) -> Result<super::AccelerationStructure, crate::DeviceError> {
         unimplemented!()
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
@@ -1600,11 +1606,15 @@ impl crate::Device for super::Device {
     }
     unsafe fn get_acceleration_structure_device_address(
         &self,
-        _acceleration_structure: &(),
+        _acceleration_structure: &super::AccelerationStructure,
     ) -> wgt::BufferAddress {
         unimplemented!()
     }
-    unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {}
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        _acceleration_structure: super::AccelerationStructure,
+    ) {
+    }
 
     fn get_internal_counters(&self) -> wgt::HalCounters {
         self.counters.clone()
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 9fd40d4d26..3ef0699c08 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -153,7 +153,7 @@ impl crate::Api for Api {
     type Sampler = Sampler;
     type QuerySet = QuerySet;
     type Fence = Fence;
-    type AccelerationStructure = ();
+    type AccelerationStructure = AccelerationStructure;
     type PipelineCache = ();
 
     type BindGroupLayout = BindGroupLayout;
@@ -165,6 +165,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    AccelerationStructure,
     BindGroup,
     BindGroupLayout,
     Buffer,
@@ -750,6 +751,11 @@ impl Fence {
     }
 }
 
+#[derive(Debug)]
+pub struct AccelerationStructure;
+
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 #[derive(Clone, Debug, PartialEq)]
 struct StencilOps {
     pass: u32,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index c65b34a031..016421b4ad 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -443,7 +443,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type ComputePipeline: DynComputePipeline;
     type PipelineCache: fmt::Debug + WasmNotSendSync;
 
-    type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static;
+    type AccelerationStructure: DynAccelerationStructure + fmt::Debug + 'static;
 }
 
 pub trait Instance: Sized + WasmNotSendSync {
@@ -804,9 +804,17 @@ pub trait Device: WasmNotSendSync {
         desc: &PipelineLayoutDescriptor<<Self::A as Api>::BindGroupLayout>,
     ) -> Result<<Self::A as Api>::PipelineLayout, DeviceError>;
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: <Self::A as Api>::PipelineLayout);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_bind_group(
         &self,
-        desc: &BindGroupDescriptor<Self::A>,
+        desc: &BindGroupDescriptor<
+            <Self::A as Api>::BindGroupLayout,
+            <Self::A as Api>::Buffer,
+            <Self::A as Api>::Sampler,
+            <Self::A as Api>::TextureView,
+            <Self::A as Api>::AccelerationStructure,
+        >,
     ) -> Result<<Self::A as Api>::BindGroup, DeviceError>;
     unsafe fn destroy_bind_group(&self, group: <Self::A as Api>::BindGroup);
 
@@ -1776,10 +1784,9 @@ pub struct BufferBinding<'a, B: DynBuffer + ?Sized> {
     pub size: Option<wgt::BufferSize>,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<B: DynBuffer> Clone for BufferBinding<'_, B> {
+impl<'a, T: DynBuffer + ?Sized> Clone for BufferBinding<'a, T> {
     fn clone(&self) -> Self {
-        Self {
+        BufferBinding {
             buffer: self.buffer,
             offset: self.offset,
             size: self.size,
@@ -1788,15 +1795,14 @@ impl<B: DynBuffer> Clone for BufferBinding<'_, B> {
 }
 
 #[derive(Debug)]
-pub struct TextureBinding<'a, A: Api> {
-    pub view: &'a A::TextureView,
+pub struct TextureBinding<'a, T: DynTextureView + ?Sized> {
+    pub view: &'a T,
     pub usage: TextureUses,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for TextureBinding<'_, A> {
+impl<'a, T: DynTextureView + ?Sized> Clone for TextureBinding<'a, T> {
     fn clone(&self) -> Self {
-        Self {
+        TextureBinding {
             view: self.view,
             usage: self.usage,
         }
@@ -1820,14 +1826,21 @@ pub struct BindGroupEntry {
 ///    of the corresponding resource array, selected by the relevant
 ///    `BindGroupLayoutEntry`.
 #[derive(Clone, Debug)]
-pub struct BindGroupDescriptor<'a, A: Api> {
+pub struct BindGroupDescriptor<
+    'a,
+    Bgl: DynBindGroupLayout + ?Sized,
+    B: DynBuffer + ?Sized,
+    S: DynSampler + ?Sized,
+    T: DynTextureView + ?Sized,
+    A: DynAccelerationStructure + ?Sized,
+> {
     pub label: Label<'a>,
-    pub layout: &'a A::BindGroupLayout,
-    pub buffers: &'a [BufferBinding<'a, A::Buffer>],
-    pub samplers: &'a [&'a A::Sampler],
-    pub textures: &'a [TextureBinding<'a, A>],
+    pub layout: &'a Bgl,
+    pub buffers: &'a [BufferBinding<'a, B>],
+    pub samplers: &'a [&'a S],
+    pub textures: &'a [TextureBinding<'a, T>],
     pub entries: &'a [BindGroupEntry],
-    pub acceleration_structures: &'a [&'a A::AccelerationStructure],
+    pub acceleration_structures: &'a [&'a A],
 }
 
 #[derive(Clone, Debug)]
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index d1e58e5ce2..7fb7b5132b 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -776,7 +776,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> DeviceResult<super::BindGroup> {
         let mut bg = super::BindGroup::default();
         for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) {
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 7f2d31a9f7..7b4dc8b22e 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -72,6 +72,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    AccelerationStructure,
     BindGroup,
     BindGroupLayout,
     Buffer,
@@ -931,3 +932,5 @@ unsafe impl Sync for CommandBuffer {}
 
 #[derive(Debug)]
 pub struct AccelerationStructure;
+
+impl crate::DynAccelerationStructure for AccelerationStructure {}
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index f95cfdfec2..5d0aa39760 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1453,7 +1453,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut vk_sets = unsafe {
             self.desc_allocator.lock().allocate(
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index ba68e38d9c..816bf37e5f 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -79,6 +79,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    AccelerationStructure,
     BindGroup,
     BindGroupLayout,
     Buffer,
@@ -670,6 +671,8 @@ pub struct AccelerationStructure {
     block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
 }
 
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 #[derive(Debug)]
 pub struct Texture {
     raw: vk::Image,

From ad5c7fe50e751ed5764e531fa1c5d7962ce94c4f Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 27 Jul 2024 18:16:41 +0200
Subject: [PATCH 199/226] DynDevice create/destroy ShaderModule

---
 wgpu-hal/src/dynamic/device.rs | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 788c6dc175..0e13273852 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -4,12 +4,14 @@
 use crate::{
     Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
     CommandEncoderDescriptor, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
-    PipelineLayoutDescriptor, SamplerDescriptor, TextureDescriptor, TextureViewDescriptor,
+    PipelineLayoutDescriptor, SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor,
+    TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
     DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynCommandEncoder,
-    DynPipelineLayout, DynQueue, DynResourceExt as _, DynSampler, DynTexture, DynTextureView,
+    DynPipelineLayout, DynQueue, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
+    DynTextureView,
 };
 
 pub trait DynDevice: DynResource {
@@ -77,6 +79,13 @@ pub trait DynDevice: DynResource {
         >,
     ) -> Result<Box<dyn DynBindGroup>, DeviceError>;
     unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>);
+
+    unsafe fn create_shader_module(
+        &self,
+        desc: &ShaderModuleDescriptor,
+        shader: ShaderInput,
+    ) -> Result<Box<dyn DynShaderModule>, ShaderError>;
+    unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -261,4 +270,17 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>) {
         unsafe { D::destroy_bind_group(self, group.unbox()) };
     }
+
+    unsafe fn create_shader_module(
+        &self,
+        desc: &ShaderModuleDescriptor,
+        shader: ShaderInput,
+    ) -> Result<Box<dyn DynShaderModule>, ShaderError> {
+        unsafe { D::create_shader_module(self, desc, shader) }
+            .map(|b| Box::new(b) as Box<dyn DynShaderModule>)
+    }
+
+    unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>) {
+        unsafe { D::destroy_shader_module(self, module.unbox()) };
+    }
 }

From 3932f8b8543e1cc15a836515609e2c9312e4343a Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 09:56:20 +0200
Subject: [PATCH 200/226] DynDevice create/destroy compute/render pipeline

---
 wgpu-hal/src/dx12/device.rs    | 22 ++++++---
 wgpu-hal/src/dx12/mod.rs       |  7 ++-
 wgpu-hal/src/dynamic/device.rs | 84 ++++++++++++++++++++++++++++++++--
 wgpu-hal/src/dynamic/mod.rs    | 13 +++++-
 wgpu-hal/src/empty.rs          |  5 +-
 wgpu-hal/src/gles/device.rs    | 22 ++++++---
 wgpu-hal/src/gles/mod.rs       |  8 +++-
 wgpu-hal/src/lib.rs            | 54 +++++++++++++++-------
 wgpu-hal/src/metal/device.rs   | 20 +++++---
 wgpu-hal/src/metal/mod.rs      |  8 +++-
 wgpu-hal/src/vulkan/device.rs  | 25 ++++++----
 wgpu-hal/src/vulkan/mod.rs     |  2 +
 12 files changed, 215 insertions(+), 55 deletions(-)

diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index a5120b0a41..b58b882b44 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -211,10 +211,10 @@ impl super::Device {
     /// allowed to be a subset of the vertex outputs.
     fn load_shader(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         layout: &super::PipelineLayout,
         naga_stage: naga::ShaderStage,
-        fragment_stage: Option<&crate::ProgrammableStage<super::Api>>,
+        fragment_stage: Option<&crate::ProgrammableStage<super::ShaderModule>>,
     ) -> Result<super::CompiledShader, crate::PipelineError> {
         use naga::back::hlsl;
 
@@ -1320,7 +1320,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let (topology_class, topology) = conv::map_topology(desc.primitive.topology);
         let mut shader_stages = wgt::ShaderStages::VERTEX;
@@ -1515,7 +1519,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let blob_cs =
             self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute, None)?;
@@ -1559,10 +1567,10 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         _desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
-        Ok(())
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     unsafe fn create_query_set(
         &self,
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 471e90f2b7..e5db9fc234 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -82,7 +82,7 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type AccelerationStructure = AccelerationStructure;
 }
@@ -669,6 +669,11 @@ impl crate::DynComputePipeline for ComputePipeline {}
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure {}
 
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 0e13273852..4d3b978267 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -3,15 +3,16 @@
 
 use crate::{
     Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
-    CommandEncoderDescriptor, Device, DeviceError, DynBuffer, DynResource, MemoryRange,
-    PipelineLayoutDescriptor, SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor,
-    TextureDescriptor, TextureViewDescriptor,
+    CommandEncoderDescriptor, ComputePipelineDescriptor, Device, DeviceError, DynBuffer,
+    DynResource, MemoryRange, PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor,
+    SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor,
+    TextureViewDescriptor,
 };
 
 use super::{
     DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynCommandEncoder,
-    DynPipelineLayout, DynQueue, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
-    DynTextureView,
+    DynComputePipeline, DynPipelineCache, DynPipelineLayout, DynQueue, DynRenderPipeline,
+    DynResourceExt as _, DynSampler, DynShaderModule, DynTexture, DynTextureView,
 };
 
 pub trait DynDevice: DynResource {
@@ -86,6 +87,26 @@ pub trait DynDevice: DynResource {
         shader: ShaderInput,
     ) -> Result<Box<dyn DynShaderModule>, ShaderError>;
     unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>);
+
+    unsafe fn create_render_pipeline(
+        &self,
+        desc: &RenderPipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynRenderPipeline>, PipelineError>;
+    unsafe fn destroy_render_pipeline(&self, pipeline: Box<dyn DynRenderPipeline>);
+
+    unsafe fn create_compute_pipeline(
+        &self,
+        desc: &ComputePipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynComputePipeline>, PipelineError>;
+    unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -283,4 +304,57 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>) {
         unsafe { D::destroy_shader_module(self, module.unbox()) };
     }
+
+    unsafe fn create_render_pipeline(
+        &self,
+        desc: &RenderPipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynRenderPipeline>, PipelineError> {
+        let desc = RenderPipelineDescriptor {
+            label: desc.label,
+            layout: desc.layout.expect_downcast_ref(),
+            vertex_buffers: desc.vertex_buffers,
+            vertex_stage: desc.vertex_stage.clone().expect_downcast(),
+            primitive: desc.primitive,
+            depth_stencil: desc.depth_stencil.clone(),
+            multisample: desc.multisample,
+            fragment_stage: desc.fragment_stage.clone().map(|f| f.expect_downcast()),
+            color_targets: desc.color_targets,
+            multiview: desc.multiview,
+            cache: desc.cache.map(|c| c.expect_downcast_ref()),
+        };
+
+        unsafe { D::create_render_pipeline(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynRenderPipeline>)
+    }
+
+    unsafe fn destroy_render_pipeline(&self, pipeline: Box<dyn DynRenderPipeline>) {
+        unsafe { D::destroy_render_pipeline(self, pipeline.unbox()) };
+    }
+
+    unsafe fn create_compute_pipeline(
+        &self,
+        desc: &ComputePipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynComputePipeline>, PipelineError> {
+        let desc = ComputePipelineDescriptor {
+            label: desc.label,
+            layout: desc.layout.expect_downcast_ref(),
+            stage: desc.stage.clone().expect_downcast(),
+            cache: desc.cache.as_ref().map(|c| c.expect_downcast_ref()),
+        };
+
+        unsafe { D::create_compute_pipeline(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynComputePipeline>)
+    }
+
+    unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>) {
+        unsafe { D::destroy_compute_pipeline(self, pipeline.unbox()) };
+    }
 }
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 3b7312fecf..9b4875bc76 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -12,7 +12,7 @@ use std::any::Any;
 
 use wgt::WasmNotSendSync;
 
-use crate::{BufferBinding, TextureBinding};
+use crate::{BufferBinding, ProgrammableStage, TextureBinding};
 
 /// Base trait for all resources, allows downcasting via [`Any`].
 pub trait DynResource: Any + WasmNotSendSync + 'static {
@@ -130,3 +130,14 @@ impl<'a> TextureBinding<'a, dyn DynTextureView> {
         }
     }
 }
+
+impl<'a> ProgrammableStage<'a, dyn DynShaderModule> {
+    fn expect_downcast<T: DynShaderModule>(self) -> ProgrammableStage<'a, T> {
+        ProgrammableStage {
+            module: self.module.expect_downcast_ref(),
+            entry_point: self.entry_point,
+            constants: self.constants,
+            zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
+        }
+    }
+}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 1ca1ae6545..87139ce0f0 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -49,6 +49,7 @@ impl crate::DynBuffer for Resource {}
 impl crate::DynCommandBuffer for Resource {}
 impl crate::DynComputePipeline for Resource {}
 impl crate::DynFence for Resource {}
+impl crate::DynPipelineCache for Resource {}
 impl crate::DynPipelineLayout for Resource {}
 impl crate::DynQuerySet for Resource {}
 impl crate::DynRenderPipeline for Resource {}
@@ -235,14 +236,14 @@ impl crate::Device for Context {
     unsafe fn destroy_shader_module(&self, module: Resource) {}
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<Api>,
+        desc: &crate::RenderPipelineDescriptor<Resource, Resource, Resource>,
     ) -> Result<Resource, crate::PipelineError> {
         Ok(Resource)
     }
     unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {}
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<Api>,
+        desc: &crate::ComputePipelineDescriptor<Resource, Resource, Resource>,
     ) -> Result<Resource, crate::PipelineError> {
         Ok(Resource)
     }
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 777225edfc..f459d970fc 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -13,7 +13,7 @@ use std::sync::atomic::Ordering;
 
 type ShaderStage<'a> = (
     naga::ShaderStage,
-    &'a crate::ProgrammableStage<'a, super::Api>,
+    &'a crate::ProgrammableStage<'a, super::ShaderModule>,
 );
 type NameBindingMap = rustc_hash::FxHashMap<String, (super::BindingRegister, u8)>;
 
@@ -205,7 +205,7 @@ impl super::Device {
     fn create_shader(
         gl: &glow::Context,
         naga_stage: naga::ShaderStage,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         context: CompilationContext,
         program: glow::Program,
     ) -> Result<glow::Shader, crate::PipelineError> {
@@ -1346,7 +1346,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let gl = &self.shared.context.lock();
         let mut shaders = ArrayVec::new();
@@ -1436,7 +1440,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let gl = &self.shared.context.lock();
         let mut shaders = ArrayVec::new();
@@ -1469,12 +1477,12 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         _: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
         // Even though the cache doesn't do anything, we still return something here
         // as the least bad option
-        Ok(())
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     unsafe fn create_query_set(
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 3ef0699c08..617d3f0729 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -154,7 +154,7 @@ impl crate::Api for Api {
     type QuerySet = QuerySet;
     type Fence = Fence;
     type AccelerationStructure = AccelerationStructure;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type BindGroupLayout = BindGroupLayout;
     type BindGroup = BindGroup;
@@ -174,6 +174,7 @@ crate::impl_dyn_resource!(
     ComputePipeline,
     Device,
     Fence,
+    PipelineCache,
     PipelineLayout,
     QuerySet,
     Queue,
@@ -756,6 +757,11 @@ pub struct AccelerationStructure;
 
 impl crate::DynAccelerationStructure for AccelerationStructure {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Clone, Debug, PartialEq)]
 struct StencilOps {
     pass: u32,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 016421b4ad..38e0cd1f5b 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -441,7 +441,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type ShaderModule: DynShaderModule;
     type RenderPipeline: DynRenderPipeline;
     type ComputePipeline: DynComputePipeline;
-    type PipelineCache: fmt::Debug + WasmNotSendSync;
+    type PipelineCache: DynPipelineCache + fmt::Debug;
 
     type AccelerationStructure: DynAccelerationStructure + fmt::Debug + 'static;
 }
@@ -824,16 +824,29 @@ pub trait Device: WasmNotSendSync {
         shader: ShaderInput,
     ) -> Result<<Self::A as Api>::ShaderModule, ShaderError>;
     unsafe fn destroy_shader_module(&self, module: <Self::A as Api>::ShaderModule);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_render_pipeline(
         &self,
-        desc: &RenderPipelineDescriptor<Self::A>,
+        desc: &RenderPipelineDescriptor<
+            <Self::A as Api>::PipelineLayout,
+            <Self::A as Api>::ShaderModule,
+            <Self::A as Api>::PipelineCache,
+        >,
     ) -> Result<<Self::A as Api>::RenderPipeline, PipelineError>;
     unsafe fn destroy_render_pipeline(&self, pipeline: <Self::A as Api>::RenderPipeline);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &ComputePipelineDescriptor<Self::A>,
+        desc: &ComputePipelineDescriptor<
+            <Self::A as Api>::PipelineLayout,
+            <Self::A as Api>::ShaderModule,
+            <Self::A as Api>::PipelineCache,
+        >,
     ) -> Result<<Self::A as Api>::ComputePipeline, PipelineError>;
     unsafe fn destroy_compute_pipeline(&self, pipeline: <Self::A as Api>::ComputePipeline);
+
     unsafe fn create_pipeline_cache(
         &self,
         desc: &PipelineCacheDescriptor<'_>,
@@ -1887,9 +1900,9 @@ pub struct DebugSource {
 
 /// Describes a programmable pipeline stage.
 #[derive(Debug)]
-pub struct ProgrammableStage<'a, A: Api> {
+pub struct ProgrammableStage<'a, M: DynShaderModule + ?Sized> {
     /// The compiled shader module for this stage.
-    pub module: &'a A::ShaderModule,
+    pub module: &'a M,
     /// The name of the entry point in the compiled shader. There must be a function with this name
     ///  in the shader.
     pub entry_point: &'a str,
@@ -1902,8 +1915,7 @@ pub struct ProgrammableStage<'a, A: Api> {
     pub zero_initialize_workgroup_memory: bool,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ProgrammableStage<'_, A> {
+impl<M: DynShaderModule + ?Sized> Clone for ProgrammableStage<'_, M> {
     fn clone(&self) -> Self {
         Self {
             module: self.module,
@@ -1916,14 +1928,19 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
 
 /// Describes a compute pipeline.
 #[derive(Clone, Debug)]
-pub struct ComputePipelineDescriptor<'a, A: Api> {
+pub struct ComputePipelineDescriptor<
+    'a,
+    Pl: DynPipelineLayout + ?Sized,
+    M: DynShaderModule + ?Sized,
+    Pc: DynPipelineCache + ?Sized,
+> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: &'a A::PipelineLayout,
+    pub layout: &'a Pl,
     /// The compiled compute stage and its entry point.
-    pub stage: ProgrammableStage<'a, A>,
+    pub stage: ProgrammableStage<'a, M>,
     /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
+    pub cache: Option<&'a Pc>,
 }
 
 pub struct PipelineCacheDescriptor<'a> {
@@ -1944,14 +1961,19 @@ pub struct VertexBufferLayout<'a> {
 
 /// Describes a render (graphics) pipeline.
 #[derive(Clone, Debug)]
-pub struct RenderPipelineDescriptor<'a, A: Api> {
+pub struct RenderPipelineDescriptor<
+    'a,
+    Pl: DynPipelineLayout + ?Sized,
+    M: DynShaderModule + ?Sized,
+    Pc: DynPipelineCache + ?Sized,
+> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: &'a A::PipelineLayout,
+    pub layout: &'a Pl,
     /// The format of any vertex buffers used with this pipeline.
     pub vertex_buffers: &'a [VertexBufferLayout<'a>],
     /// The vertex stage for this pipeline.
-    pub vertex_stage: ProgrammableStage<'a, A>,
+    pub vertex_stage: ProgrammableStage<'a, M>,
     /// The properties of the pipeline at the primitive assembly and rasterization level.
     pub primitive: wgt::PrimitiveState,
     /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
@@ -1959,14 +1981,14 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
     /// The multi-sampling properties of the pipeline.
     pub multisample: wgt::MultisampleState,
     /// The fragment stage for this pipeline.
-    pub fragment_stage: Option<ProgrammableStage<'a, A>>,
+    pub fragment_stage: Option<ProgrammableStage<'a, M>>,
     /// The effect of draw calls on the color aspect of the output target.
     pub color_targets: &'a [Option<wgt::ColorTargetState>],
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
     /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
+    pub cache: Option<&'a Pc>,
 }
 
 #[derive(Debug, Clone)]
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 7fb7b5132b..a17ca2ec42 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -99,7 +99,7 @@ const fn convert_vertex_format_to_naga(format: wgt::VertexFormat) -> naga::back:
 impl super::Device {
     fn load_shader(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         vertex_buffer_mappings: &[naga::back::msl::VertexBufferMapping],
         layout: &super::PipelineLayout,
         primitive_class: metal::MTLPrimitiveTopologyClass,
@@ -898,7 +898,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         objc::rc::autoreleasepool(|| {
             let descriptor = metal::RenderPipelineDescriptor::new();
@@ -1169,7 +1173,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         objc::rc::autoreleasepool(|| {
             let descriptor = metal::ComputePipelineDescriptor::new();
@@ -1232,10 +1240,10 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         _desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
-        Ok(())
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     unsafe fn create_query_set(
         &self,
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 7b4dc8b22e..f861474f8a 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -66,7 +66,7 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type AccelerationStructure = AccelerationStructure;
 }
@@ -81,6 +81,7 @@ crate::impl_dyn_resource!(
     ComputePipeline,
     Device,
     Fence,
+    PipelineCache,
     PipelineLayout,
     QuerySet,
     Queue,
@@ -930,6 +931,11 @@ impl crate::DynCommandBuffer for CommandBuffer {}
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure;
 
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 5d0aa39760..e7be52a097 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1,4 +1,4 @@
-use super::{conv, PipelineCache};
+use super::conv;
 
 use arrayvec::ArrayVec;
 use ash::{khr, vk};
@@ -709,7 +709,7 @@ impl super::Device {
 
     fn compile_stage(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         naga_stage: naga::ShaderStage,
         binding_map: &naga::back::spv::BindingMap,
     ) -> Result<CompiledStage, crate::PipelineError> {
@@ -1725,7 +1725,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let dynamic_states = [
             vk::DynamicState::VIEWPORT,
@@ -1955,6 +1959,7 @@ impl crate::Device for super::Device {
 
         Ok(super::RenderPipeline { raw })
     }
+
     unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) {
         unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) };
 
@@ -1963,7 +1968,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let compiled = self.compile_stage(
             &desc.stage,
@@ -2015,7 +2024,7 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<PipelineCache, crate::PipelineCacheError> {
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
         let mut info = vk::PipelineCacheCreateInfo::default();
         if let Some(data) = desc.data {
             info = info.initial_data(data)
@@ -2024,12 +2033,12 @@ impl crate::Device for super::Device {
         let raw = unsafe { self.shared.raw.create_pipeline_cache(&info, None) }
             .map_err(crate::DeviceError::from)?;
 
-        Ok(PipelineCache { raw })
+        Ok(super::PipelineCache { raw })
     }
     fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
         Some(self.shared.pipeline_cache_validation_key)
     }
-    unsafe fn destroy_pipeline_cache(&self, cache: PipelineCache) {
+    unsafe fn destroy_pipeline_cache(&self, cache: super::PipelineCache) {
         unsafe { self.shared.raw.destroy_pipeline_cache(cache.raw, None) }
     }
     unsafe fn create_query_set(
@@ -2160,7 +2169,7 @@ impl crate::Device for super::Device {
         }
     }
 
-    unsafe fn pipeline_cache_get_data(&self, cache: &PipelineCache) -> Option<Vec<u8>> {
+    unsafe fn pipeline_cache_get_data(&self, cache: &super::PipelineCache) -> Option<Vec<u8>> {
         let data = unsafe { self.raw_device().get_pipeline_cache_data(cache.raw) };
         data.ok()
     }
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 816bf37e5f..a77ff444b6 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -862,6 +862,8 @@ pub struct PipelineCache {
     raw: vk::PipelineCache,
 }
 
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct QuerySet {
     raw: vk::QueryPool,

From 4d355bf30ed9548cf56946ad5ba2acb5c1b22d72 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 10:07:40 +0200
Subject: [PATCH 201/226] DynDevice pipeline cache

---
 wgpu-hal/src/dynamic/device.rs | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 4d3b978267..bf5a4d40e6 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -4,9 +4,9 @@
 use crate::{
     Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
     CommandEncoderDescriptor, ComputePipelineDescriptor, Device, DeviceError, DynBuffer,
-    DynResource, MemoryRange, PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor,
-    SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor,
-    TextureViewDescriptor,
+    DynResource, MemoryRange, PipelineCacheDescriptor, PipelineCacheError, PipelineError,
+    PipelineLayoutDescriptor, RenderPipelineDescriptor, SamplerDescriptor, ShaderError,
+    ShaderInput, ShaderModuleDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
@@ -107,6 +107,15 @@ pub trait DynDevice: DynResource {
         >,
     ) -> Result<Box<dyn DynComputePipeline>, PipelineError>;
     unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>);
+
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Result<Box<dyn DynPipelineCache>, PipelineCacheError>;
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        None
+    }
+    unsafe fn destroy_pipeline_cache(&self, cache: Box<dyn DynPipelineCache>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -357,4 +366,20 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>) {
         unsafe { D::destroy_compute_pipeline(self, pipeline.unbox()) };
     }
+
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Result<Box<dyn DynPipelineCache>, PipelineCacheError> {
+        unsafe { D::create_pipeline_cache(self, desc) }
+            .map(|b| Box::new(b) as Box<dyn DynPipelineCache>)
+    }
+
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        D::pipeline_cache_validation_key(self)
+    }
+
+    unsafe fn destroy_pipeline_cache(&self, pipeline_cache: Box<dyn DynPipelineCache>) {
+        unsafe { D::destroy_pipeline_cache(self, pipeline_cache.unbox()) };
+    }
 }

From 84c4811f8173c0cc6226aa06855d23cc312ddab9 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 10:11:41 +0200
Subject: [PATCH 202/226] DynDevice create/destroy query set

---
 wgpu-hal/src/dynamic/device.rs | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index bf5a4d40e6..3bfd9df787 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -4,15 +4,16 @@
 use crate::{
     Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
     CommandEncoderDescriptor, ComputePipelineDescriptor, Device, DeviceError, DynBuffer,
-    DynResource, MemoryRange, PipelineCacheDescriptor, PipelineCacheError, PipelineError,
+    DynResource, Label, MemoryRange, PipelineCacheDescriptor, PipelineCacheError, PipelineError,
     PipelineLayoutDescriptor, RenderPipelineDescriptor, SamplerDescriptor, ShaderError,
     ShaderInput, ShaderModuleDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
     DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynCommandEncoder,
-    DynComputePipeline, DynPipelineCache, DynPipelineLayout, DynQueue, DynRenderPipeline,
-    DynResourceExt as _, DynSampler, DynShaderModule, DynTexture, DynTextureView,
+    DynComputePipeline, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
+    DynRenderPipeline, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
+    DynTextureView,
 };
 
 pub trait DynDevice: DynResource {
@@ -116,6 +117,12 @@ pub trait DynDevice: DynResource {
         None
     }
     unsafe fn destroy_pipeline_cache(&self, cache: Box<dyn DynPipelineCache>);
+
+    unsafe fn create_query_set(
+        &self,
+        desc: &wgt::QuerySetDescriptor<Label>,
+    ) -> Result<Box<dyn DynQuerySet>, DeviceError>;
+    unsafe fn destroy_query_set(&self, set: Box<dyn DynQuerySet>);
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -382,4 +389,15 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_pipeline_cache(&self, pipeline_cache: Box<dyn DynPipelineCache>) {
         unsafe { D::destroy_pipeline_cache(self, pipeline_cache.unbox()) };
     }
+
+    unsafe fn create_query_set(
+        &self,
+        desc: &wgt::QuerySetDescriptor<Label>,
+    ) -> Result<Box<dyn DynQuerySet>, DeviceError> {
+        unsafe { D::create_query_set(self, desc) }.map(|b| Box::new(b) as Box<dyn DynQuerySet>)
+    }
+
+    unsafe fn destroy_query_set(&self, query_set: Box<dyn DynQuerySet>) {
+        unsafe { D::destroy_query_set(self, query_set.unbox()) };
+    }
 }

From dbf9aea0bad930006776fe0aa85c39ba9a5d36fd Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 10:21:40 +0200
Subject: [PATCH 203/226] DynDevice fence

---
 wgpu-hal/src/dynamic/device.rs | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 3bfd9df787..c2dd42ac77 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -4,14 +4,14 @@
 use crate::{
     Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
     CommandEncoderDescriptor, ComputePipelineDescriptor, Device, DeviceError, DynBuffer,
-    DynResource, Label, MemoryRange, PipelineCacheDescriptor, PipelineCacheError, PipelineError,
-    PipelineLayoutDescriptor, RenderPipelineDescriptor, SamplerDescriptor, ShaderError,
-    ShaderInput, ShaderModuleDescriptor, TextureDescriptor, TextureViewDescriptor,
+    DynResource, FenceValue, Label, MemoryRange, PipelineCacheDescriptor, PipelineCacheError,
+    PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor, SamplerDescriptor,
+    ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
     DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynCommandEncoder,
-    DynComputePipeline, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
+    DynComputePipeline, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
     DynRenderPipeline, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
     DynTextureView,
 };
@@ -123,6 +123,10 @@ pub trait DynDevice: DynResource {
         desc: &wgt::QuerySetDescriptor<Label>,
     ) -> Result<Box<dyn DynQuerySet>, DeviceError>;
     unsafe fn destroy_query_set(&self, set: Box<dyn DynQuerySet>);
+
+    unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError>;
+    unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>);
+    unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError>;
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -400,4 +404,17 @@ impl<D: Device + DynResource> DynDevice for D {
     unsafe fn destroy_query_set(&self, query_set: Box<dyn DynQuerySet>) {
         unsafe { D::destroy_query_set(self, query_set.unbox()) };
     }
+
+    unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError> {
+        unsafe { D::create_fence(self) }.map(|f| Box::new(f) as Box<dyn DynFence>)
+    }
+
+    unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>) {
+        unsafe { D::destroy_fence(self, fence.unbox()) };
+    }
+
+    unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { D::get_fence_value(self, fence) }
+    }
 }

From 522b98c582b69a3d964adc779b3a8901cf112179 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 10:25:36 +0200
Subject: [PATCH 204/226] DynDevice wait/capture/pipeline_cache_get_data

---
 wgpu-hal/src/dynamic/device.rs | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index c2dd42ac77..2ede8a42a0 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -127,6 +127,18 @@ pub trait DynDevice: DynResource {
     unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError>;
     unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>);
     unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError>;
+
+    unsafe fn wait(
+        &self,
+        fence: &dyn DynFence,
+        value: FenceValue,
+        timeout_ms: u32,
+    ) -> Result<bool, DeviceError>;
+
+    unsafe fn start_capture(&self) -> bool;
+    unsafe fn stop_capture(&self);
+
+    unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>>;
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -417,4 +429,27 @@ impl<D: Device + DynResource> DynDevice for D {
         let fence = fence.expect_downcast_ref();
         unsafe { D::get_fence_value(self, fence) }
     }
+
+    unsafe fn wait(
+        &self,
+        fence: &dyn DynFence,
+        value: FenceValue,
+        timeout_ms: u32,
+    ) -> Result<bool, DeviceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { D::wait(self, fence, value, timeout_ms) }
+    }
+
+    unsafe fn start_capture(&self) -> bool {
+        unsafe { D::start_capture(self) }
+    }
+
+    unsafe fn stop_capture(&self) {
+        unsafe { D::stop_capture(self) }
+    }
+
+    unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>> {
+        let cache = cache.expect_downcast_ref();
+        unsafe { D::pipeline_cache_get_data(self, cache) }
+    }
 }

From 9e1f1487b4d4afbaeaba75dbf21c33c669f04eb9 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 10:55:18 +0200
Subject: [PATCH 205/226] DynDevice acceleration structure handling

---
 wgpu-hal/src/dx12/command.rs   |   8 ++-
 wgpu-hal/src/dx12/device.rs    |   2 +-
 wgpu-hal/src/dynamic/device.rs | 120 +++++++++++++++++++++++++++++++--
 wgpu-hal/src/empty.rs          |   4 +-
 wgpu-hal/src/gles/command.rs   |   8 ++-
 wgpu-hal/src/gles/device.rs    |   2 +-
 wgpu-hal/src/lib.rs            |  60 ++++++++++-------
 wgpu-hal/src/metal/command.rs  |   8 ++-
 wgpu-hal/src/metal/device.rs   |   2 +-
 wgpu-hal/src/vulkan/command.rs |   8 ++-
 wgpu-hal/src/vulkan/device.rs  |   2 +-
 11 files changed, 182 insertions(+), 42 deletions(-)

diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index cf2147d0ca..0356b91978 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -1190,7 +1190,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         // Implement using `BuildRaytracingAccelerationStructure`:
         // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index b58b882b44..8cfd8deaee 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1745,7 +1745,7 @@ impl crate::Device for super::Device {
 
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`:
         // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 2ede8a42a0..3e83838778 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -2,17 +2,21 @@
 #![allow(trivial_casts)]
 
 use crate::{
-    Api, BindGroupDescriptor, BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping,
-    CommandEncoderDescriptor, ComputePipelineDescriptor, Device, DeviceError, DynBuffer,
-    DynResource, FenceValue, Label, MemoryRange, PipelineCacheDescriptor, PipelineCacheError,
-    PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor, SamplerDescriptor,
-    ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor, TextureViewDescriptor,
+    AccelerationStructureAABBs, AccelerationStructureBuildSizes, AccelerationStructureDescriptor,
+    AccelerationStructureEntries, AccelerationStructureInstances,
+    AccelerationStructureTriangleIndices, AccelerationStructureTriangleTransform,
+    AccelerationStructureTriangles, Api, BindGroupDescriptor, BindGroupLayoutDescriptor,
+    BufferDescriptor, BufferMapping, CommandEncoderDescriptor, ComputePipelineDescriptor, Device,
+    DeviceError, FenceValue, GetAccelerationStructureBuildSizesDescriptor, Label, MemoryRange,
+    PipelineCacheDescriptor, PipelineCacheError, PipelineError, PipelineLayoutDescriptor,
+    RenderPipelineDescriptor, SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor,
+    TextureDescriptor, TextureViewDescriptor,
 };
 
 use super::{
-    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynCommandEncoder,
+    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandEncoder,
     DynComputePipeline, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
-    DynRenderPipeline, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
+    DynRenderPipeline, DynResource, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
     DynTextureView,
 };
 
@@ -139,6 +143,23 @@ pub trait DynDevice: DynResource {
     unsafe fn stop_capture(&self);
 
     unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>>;
+
+    unsafe fn create_acceleration_structure(
+        &self,
+        desc: &AccelerationStructureDescriptor,
+    ) -> Result<Box<dyn DynAccelerationStructure>, DeviceError>;
+    unsafe fn get_acceleration_structure_build_sizes(
+        &self,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
+    ) -> AccelerationStructureBuildSizes;
+    unsafe fn get_acceleration_structure_device_address(
+        &self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+    ) -> wgt::BufferAddress;
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        acceleration_structure: Box<dyn DynAccelerationStructure>,
+    );
 }
 
 impl<D: Device + DynResource> DynDevice for D {
@@ -452,4 +473,89 @@ impl<D: Device + DynResource> DynDevice for D {
         let cache = cache.expect_downcast_ref();
         unsafe { D::pipeline_cache_get_data(self, cache) }
     }
+
+    unsafe fn create_acceleration_structure(
+        &self,
+        desc: &AccelerationStructureDescriptor,
+    ) -> Result<Box<dyn DynAccelerationStructure>, DeviceError> {
+        unsafe { D::create_acceleration_structure(self, desc) }
+            .map(|b| Box::new(b) as Box<dyn DynAccelerationStructure>)
+    }
+
+    unsafe fn get_acceleration_structure_build_sizes(
+        &self,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
+    ) -> AccelerationStructureBuildSizes {
+        let entries = match &desc.entries {
+            AccelerationStructureEntries::Instances(instances) => {
+                AccelerationStructureEntries::Instances(AccelerationStructureInstances {
+                    buffer: instances.buffer.map(|b| b.expect_downcast_ref()),
+                    offset: instances.offset,
+                    count: instances.count,
+                })
+            }
+            AccelerationStructureEntries::Triangles(triangles) => {
+                AccelerationStructureEntries::Triangles(
+                    triangles
+                        .iter()
+                        .map(|t| AccelerationStructureTriangles {
+                            vertex_buffer: t.vertex_buffer.map(|b| b.expect_downcast_ref()),
+                            vertex_format: t.vertex_format,
+                            first_vertex: t.first_vertex,
+                            vertex_count: t.vertex_count,
+                            vertex_stride: t.vertex_stride,
+                            indices: t.indices.as_ref().map(|i| {
+                                AccelerationStructureTriangleIndices {
+                                    buffer: i.buffer.map(|b| b.expect_downcast_ref()),
+                                    format: i.format,
+                                    offset: i.offset,
+                                    count: i.count,
+                                }
+                            }),
+                            transform: t.transform.as_ref().map(|t| {
+                                AccelerationStructureTriangleTransform {
+                                    buffer: t.buffer.expect_downcast_ref(),
+                                    offset: t.offset,
+                                }
+                            }),
+                            flags: t.flags,
+                        })
+                        .collect(),
+                )
+            }
+            AccelerationStructureEntries::AABBs(entries) => AccelerationStructureEntries::AABBs(
+                entries
+                    .iter()
+                    .map(|e| AccelerationStructureAABBs {
+                        buffer: e.buffer.map(|b| b.expect_downcast_ref()),
+                        offset: e.offset,
+                        count: e.count,
+                        stride: e.stride,
+                        flags: e.flags,
+                    })
+                    .collect(),
+            ),
+        };
+
+        let desc = GetAccelerationStructureBuildSizesDescriptor {
+            entries: &entries,
+            flags: desc.flags,
+        };
+        unsafe { D::get_acceleration_structure_build_sizes(self, &desc) }
+    }
+
+    unsafe fn get_acceleration_structure_device_address(
+        &self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+    ) -> wgt::BufferAddress {
+        let acceleration_structure = acceleration_structure.expect_downcast_ref();
+        unsafe { D::get_acceleration_structure_device_address(self, acceleration_structure) }
+    }
+
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        acceleration_structure: Box<dyn DynAccelerationStructure>,
+    ) {
+        unsafe { D::destroy_acceleration_structure(self, acceleration_structure.unbox()) }
+    }
 }
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 87139ce0f0..4d8868c360 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -291,7 +291,7 @@ impl crate::Device for Context {
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Resource>,
     ) -> crate::AccelerationStructureBuildSizes {
         Default::default()
     }
@@ -494,7 +494,7 @@ impl crate::CommandEncoder for Encoder {
         descriptors: T,
     ) where
         Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Api>>,
+        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Resource, Resource>>,
     {
     }
 
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index cd62726050..c002e76c1b 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -1188,7 +1188,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index f459d970fc..ad092307e9 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -1608,7 +1608,7 @@ impl crate::Device for super::Device {
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 38e0cd1f5b..dacd21049a 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -910,7 +910,7 @@ pub trait Device: WasmNotSendSync {
     ) -> Result<<Self::A as Api>::AccelerationStructure, DeviceError>;
     unsafe fn get_acceleration_structure_build_sizes(
         &self,
-        desc: &GetAccelerationStructureBuildSizesDescriptor<Self::A>,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<<Self::A as Api>::Buffer>,
     ) -> AccelerationStructureBuildSizes;
     unsafe fn get_acceleration_structure_device_address(
         &self,
@@ -1352,7 +1352,13 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
         descriptors: T,
     ) where
         Self::A: 'a,
-        T: IntoIterator<Item = BuildAccelerationStructureDescriptor<'a, Self::A>>;
+        T: IntoIterator<
+            Item = BuildAccelerationStructureDescriptor<
+                'a,
+                <Self::A as Api>::Buffer,
+                <Self::A as Api>::AccelerationStructure,
+            >,
+        >;
 
     unsafe fn place_acceleration_structure_barrier(
         &mut self,
@@ -2192,13 +2198,17 @@ pub struct AccelerationStructureBuildSizes {
 /// Updates use source_acceleration_structure if present, else the update will be performed in place.
 /// For updates, only the data is allowed to change (not the meta data or sizes).
 #[derive(Clone, Debug)]
-pub struct BuildAccelerationStructureDescriptor<'a, A: Api> {
-    pub entries: &'a AccelerationStructureEntries<'a, A>,
+pub struct BuildAccelerationStructureDescriptor<
+    'a,
+    B: DynBuffer + ?Sized,
+    A: DynAccelerationStructure + ?Sized,
+> {
+    pub entries: &'a AccelerationStructureEntries<'a, B>,
     pub mode: AccelerationStructureBuildMode,
     pub flags: AccelerationStructureBuildFlags,
-    pub source_acceleration_structure: Option<&'a A::AccelerationStructure>,
-    pub destination_acceleration_structure: &'a A::AccelerationStructure,
-    pub scratch_buffer: &'a A::Buffer,
+    pub source_acceleration_structure: Option<&'a A>,
+    pub destination_acceleration_structure: &'a A,
+    pub scratch_buffer: &'a B,
     pub scratch_buffer_offset: wgt::BufferAddress,
 }
 
@@ -2208,8 +2218,8 @@ pub struct BuildAccelerationStructureDescriptor<'a, A: Api> {
 ///   may result in reduced size requirements.
 /// - Any other change may result in a bigger or smaller size requirement.
 #[derive(Clone, Debug)]
-pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
-    pub entries: &'a AccelerationStructureEntries<'a, A>,
+pub struct GetAccelerationStructureBuildSizesDescriptor<'a, B: DynBuffer + ?Sized> {
+    pub entries: &'a AccelerationStructureEntries<'a, B>,
     pub flags: AccelerationStructureBuildFlags,
 }
 
@@ -2218,31 +2228,31 @@ pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
 /// * `Triangles` - Multiple triangle meshes for a bottom level acceleration structure
 /// * `AABBs` - List of list of axis aligned bounding boxes for a bottom level acceleration structure
 #[derive(Debug)]
-pub enum AccelerationStructureEntries<'a, A: Api> {
-    Instances(AccelerationStructureInstances<'a, A>),
-    Triangles(Vec<AccelerationStructureTriangles<'a, A>>),
-    AABBs(Vec<AccelerationStructureAABBs<'a, A>>),
+pub enum AccelerationStructureEntries<'a, B: DynBuffer + ?Sized> {
+    Instances(AccelerationStructureInstances<'a, B>),
+    Triangles(Vec<AccelerationStructureTriangles<'a, B>>),
+    AABBs(Vec<AccelerationStructureAABBs<'a, B>>),
 }
 
 /// * `first_vertex` - offset in the vertex buffer (as number of vertices)
 /// * `indices` - optional index buffer with attributes
 /// * `transform` - optional transform
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangles<'a, A: Api> {
-    pub vertex_buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureTriangles<'a, B: DynBuffer + ?Sized> {
+    pub vertex_buffer: Option<&'a B>,
     pub vertex_format: wgt::VertexFormat,
     pub first_vertex: u32,
     pub vertex_count: u32,
     pub vertex_stride: wgt::BufferAddress,
-    pub indices: Option<AccelerationStructureTriangleIndices<'a, A>>,
-    pub transform: Option<AccelerationStructureTriangleTransform<'a, A>>,
+    pub indices: Option<AccelerationStructureTriangleIndices<'a, B>>,
+    pub transform: Option<AccelerationStructureTriangleTransform<'a, B>>,
     pub flags: AccelerationStructureGeometryFlags,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureAABBs<'a, A: Api> {
-    pub buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureAABBs<'a, B: DynBuffer + ?Sized> {
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
     pub stride: wgt::BufferAddress,
@@ -2251,25 +2261,25 @@ pub struct AccelerationStructureAABBs<'a, A: Api> {
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureInstances<'a, A: Api> {
-    pub buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> {
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangleIndices<'a, A: Api> {
+pub struct AccelerationStructureTriangleIndices<'a, B: DynBuffer + ?Sized> {
     pub format: wgt::IndexFormat,
-    pub buffer: Option<&'a A::Buffer>,
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangleTransform<'a, A: Api> {
-    pub buffer: &'a A::Buffer,
+pub struct AccelerationStructureTriangleTransform<'a, B: DynBuffer + ?Sized> {
+    pub buffer: &'a B,
     pub offset: u32,
 }
 
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index 22a72739d6..7eea069a81 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -1260,7 +1260,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index a17ca2ec42..077c10f517 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -1394,7 +1394,7 @@ impl crate::Device for super::Device {
 
     unsafe fn get_acceleration_structure_build_sizes(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index b7f4306f69..0c81321c93 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -408,7 +408,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn build_acceleration_structures<'a, T>(&mut self, descriptor_count: u32, descriptors: T)
     where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         const CAPACITY_OUTER: usize = 8;
         const CAPACITY_INNER: usize = 1;
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index e7be52a097..c42cace857 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -2176,7 +2176,7 @@ impl crate::Device for super::Device {
 
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         const CAPACITY: usize = 8;
 

From 1a9170e57b930af191609061ef175907b5b71816 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 11:00:59 +0200
Subject: [PATCH 206/226] DynDevice exit, counters, report

---
 wgpu-hal/src/dynamic/device.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 3e83838778..36be5dca79 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -21,6 +21,8 @@ use super::{
 };
 
 pub trait DynDevice: DynResource {
+    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>);
+
     unsafe fn create_buffer(
         &self,
         desc: &BufferDescriptor,
@@ -160,9 +162,16 @@ pub trait DynDevice: DynResource {
         &self,
         acceleration_structure: Box<dyn DynAccelerationStructure>,
     );
+
+    fn get_internal_counters(&self) -> wgt::HalCounters;
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport>;
 }
 
 impl<D: Device + DynResource> DynDevice for D {
+    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>) {
+        unsafe { D::exit(*self, queue.unbox()) }
+    }
+
     unsafe fn create_buffer(
         &self,
         desc: &BufferDescriptor,
@@ -558,4 +567,12 @@ impl<D: Device + DynResource> DynDevice for D {
     ) {
         unsafe { D::destroy_acceleration_structure(self, acceleration_structure.unbox()) }
     }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        D::get_internal_counters(self)
+    }
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        D::generate_allocator_report(self)
+    }
 }

From d87aa3635e5fea1c9c676fa81038d869c3a35cbc Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 11:43:28 +0200
Subject: [PATCH 207/226] DynCommandencoder end_encoding, reset_all

---
 wgpu-hal/src/dynamic/command.rs | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index ae41b1bb7b..0fb741220e 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -7,8 +7,8 @@ use crate::{
 };
 
 use super::{
-    DynBindGroup, DynBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet, DynRenderPipeline,
-    DynResource, DynResourceExt as _, DynTexture, DynTextureView,
+    DynBindGroup, DynBuffer, DynCommandBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet,
+    DynRenderPipeline, DynResource, DynResourceExt as _, DynTexture, DynTextureView,
 };
 
 pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
@@ -16,6 +16,10 @@ pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
 
     unsafe fn discard_encoding(&mut self);
 
+    unsafe fn end_encoding(&mut self) -> Result<Box<dyn DynCommandBuffer>, DeviceError>;
+
+    unsafe fn reset_all(&mut self, command_buffers: Vec<Box<dyn DynCommandBuffer>>);
+
     unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]);
     unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]);
 
@@ -183,6 +187,18 @@ impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
         unsafe { C::discard_encoding(self) }
     }
 
+    unsafe fn end_encoding(&mut self) -> Result<Box<dyn DynCommandBuffer>, DeviceError> {
+        unsafe { C::end_encoding(self) }.map(|cb| {
+            let boxed_command_buffer: Box<<C::A as Api>::CommandBuffer> = Box::new(cb);
+            let boxed_command_buffer: Box<dyn DynCommandBuffer> = boxed_command_buffer;
+            boxed_command_buffer
+        })
+    }
+
+    unsafe fn reset_all(&mut self, command_buffers: Vec<Box<dyn DynCommandBuffer>>) {
+        unsafe { C::reset_all(self, command_buffers.into_iter().map(|cb| cb.unbox())) }
+    }
+
     unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]) {
         let barriers = barriers.iter().map(|barrier| BufferBarrier {
             buffer: barrier.buffer.expect_downcast_ref(),

From 27944debb1afc9a91241837a48593a3cf8b38352 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 12:17:37 +0200
Subject: [PATCH 208/226] DynCommandEncoder acceleration structure building

---
 wgpu-hal/src/dynamic/command.rs | 78 ++++++++++++++++++++++++++-------
 wgpu-hal/src/dynamic/device.rs  | 68 ++++------------------------
 wgpu-hal/src/dynamic/mod.rs     | 61 +++++++++++++++++++++++++-
 3 files changed, 130 insertions(+), 77 deletions(-)

diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 0fb741220e..6c0f1cb02d 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,14 +1,16 @@
 use std::ops::Range;
 
 use crate::{
-    Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, BufferTextureCopy, ColorAttachment,
-    CommandEncoder, ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
+    AccelerationStructureBarrier, Api, Attachment, BufferBarrier, BufferBinding, BufferCopy,
+    BufferTextureCopy, BuildAccelerationStructureDescriptor, ColorAttachment, CommandEncoder,
+    ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
     PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, TextureUses,
 };
 
 use super::{
-    DynBindGroup, DynBuffer, DynCommandBuffer, DynComputePipeline, DynPipelineLayout, DynQuerySet,
-    DynRenderPipeline, DynResource, DynResourceExt as _, DynTexture, DynTextureView,
+    DynAccelerationStructure, DynBindGroup, DynBuffer, DynCommandBuffer, DynComputePipeline,
+    DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynResourceExt as _,
+    DynTexture, DynTextureView,
 };
 
 pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
@@ -164,18 +166,19 @@ pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
     unsafe fn dispatch(&mut self, count: [u32; 3]);
     unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress);
 
-    // unsafe fn build_acceleration_structures<'a, T>(
-    //     &mut self,
-    //     descriptor_count: u32,
-    //     descriptors: T,
-    // ) where
-    //     Self::A: 'a,
-    //     T: IntoIterator<Item = BuildAccelerationStructureDescriptor<'a, Self::A>>;
-
-    // unsafe fn place_acceleration_structure_barrier(
-    //     &mut self,
-    //     barrier: AccelerationStructureBarrier,
-    // );
+    unsafe fn build_acceleration_structures<'a>(
+        &mut self,
+        descriptors: &'a [BuildAccelerationStructureDescriptor<
+            'a,
+            dyn DynBuffer,
+            dyn DynAccelerationStructure,
+        >],
+    );
+
+    unsafe fn place_acceleration_structure_barrier(
+        &mut self,
+        barrier: AccelerationStructureBarrier,
+    );
 }
 
 impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
@@ -559,6 +562,49 @@ impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
         let binding = binding.expect_downcast();
         unsafe { self.set_vertex_buffer(index, binding) };
     }
+
+    unsafe fn build_acceleration_structures<'a>(
+        &mut self,
+        descriptors: &'a [BuildAccelerationStructureDescriptor<
+            'a,
+            dyn DynBuffer,
+            dyn DynAccelerationStructure,
+        >],
+    ) {
+        // Need to collect entries here so we can reference them in the descriptor.
+        // TODO: API should be redesigned to avoid this and other descriptor copies that happen due to the dyn api.
+        let descriptor_entries = descriptors
+            .iter()
+            .map(|d| d.entries.expect_downcast())
+            .collect::<Vec<_>>();
+        let descriptors = descriptors
+            .iter()
+            .zip(descriptor_entries.iter())
+            .map(|(d, entries)| BuildAccelerationStructureDescriptor::<
+                <C::A as Api>::Buffer,
+                <C::A as Api>::AccelerationStructure,
+            > {
+                entries,
+                mode: d.mode,
+                flags: d.flags,
+                source_acceleration_structure: d
+                    .source_acceleration_structure
+                    .map(|a| a.expect_downcast_ref()),
+                destination_acceleration_structure: d
+                    .destination_acceleration_structure
+                    .expect_downcast_ref(),
+                scratch_buffer: d.scratch_buffer.expect_downcast_ref(),
+                scratch_buffer_offset: d.scratch_buffer_offset,
+            });
+        unsafe { C::build_acceleration_structures(self, descriptors.len() as _, descriptors) };
+    }
+
+    unsafe fn place_acceleration_structure_barrier(
+        &mut self,
+        barrier: AccelerationStructureBarrier,
+    ) {
+        unsafe { C::place_acceleration_structure_barrier(self, barrier) };
+    }
 }
 
 impl<'a> PassTimestampWrites<'a, dyn DynQuerySet> {
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index 36be5dca79..c1baf5b76d 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -2,15 +2,13 @@
 #![allow(trivial_casts)]
 
 use crate::{
-    AccelerationStructureAABBs, AccelerationStructureBuildSizes, AccelerationStructureDescriptor,
-    AccelerationStructureEntries, AccelerationStructureInstances,
-    AccelerationStructureTriangleIndices, AccelerationStructureTriangleTransform,
-    AccelerationStructureTriangles, Api, BindGroupDescriptor, BindGroupLayoutDescriptor,
-    BufferDescriptor, BufferMapping, CommandEncoderDescriptor, ComputePipelineDescriptor, Device,
-    DeviceError, FenceValue, GetAccelerationStructureBuildSizesDescriptor, Label, MemoryRange,
-    PipelineCacheDescriptor, PipelineCacheError, PipelineError, PipelineLayoutDescriptor,
-    RenderPipelineDescriptor, SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor,
-    TextureDescriptor, TextureViewDescriptor,
+    AccelerationStructureBuildSizes, AccelerationStructureDescriptor, Api, BindGroupDescriptor,
+    BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor,
+    ComputePipelineDescriptor, Device, DeviceError, FenceValue,
+    GetAccelerationStructureBuildSizesDescriptor, Label, MemoryRange, PipelineCacheDescriptor,
+    PipelineCacheError, PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor,
+    SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor,
+    TextureViewDescriptor,
 };
 
 use super::{
@@ -495,57 +493,7 @@ impl<D: Device + DynResource> DynDevice for D {
         &self,
         desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
     ) -> AccelerationStructureBuildSizes {
-        let entries = match &desc.entries {
-            AccelerationStructureEntries::Instances(instances) => {
-                AccelerationStructureEntries::Instances(AccelerationStructureInstances {
-                    buffer: instances.buffer.map(|b| b.expect_downcast_ref()),
-                    offset: instances.offset,
-                    count: instances.count,
-                })
-            }
-            AccelerationStructureEntries::Triangles(triangles) => {
-                AccelerationStructureEntries::Triangles(
-                    triangles
-                        .iter()
-                        .map(|t| AccelerationStructureTriangles {
-                            vertex_buffer: t.vertex_buffer.map(|b| b.expect_downcast_ref()),
-                            vertex_format: t.vertex_format,
-                            first_vertex: t.first_vertex,
-                            vertex_count: t.vertex_count,
-                            vertex_stride: t.vertex_stride,
-                            indices: t.indices.as_ref().map(|i| {
-                                AccelerationStructureTriangleIndices {
-                                    buffer: i.buffer.map(|b| b.expect_downcast_ref()),
-                                    format: i.format,
-                                    offset: i.offset,
-                                    count: i.count,
-                                }
-                            }),
-                            transform: t.transform.as_ref().map(|t| {
-                                AccelerationStructureTriangleTransform {
-                                    buffer: t.buffer.expect_downcast_ref(),
-                                    offset: t.offset,
-                                }
-                            }),
-                            flags: t.flags,
-                        })
-                        .collect(),
-                )
-            }
-            AccelerationStructureEntries::AABBs(entries) => AccelerationStructureEntries::AABBs(
-                entries
-                    .iter()
-                    .map(|e| AccelerationStructureAABBs {
-                        buffer: e.buffer.map(|b| b.expect_downcast_ref()),
-                        offset: e.offset,
-                        count: e.count,
-                        stride: e.stride,
-                        flags: e.flags,
-                    })
-                    .collect(),
-            ),
-        };
-
+        let entries = desc.entries.expect_downcast();
         let desc = GetAccelerationStructureBuildSizesDescriptor {
             entries: &entries,
             flags: desc.flags,
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 9b4875bc76..b8fc8a1f6c 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -12,7 +12,11 @@ use std::any::Any;
 
 use wgt::WasmNotSendSync;
 
-use crate::{BufferBinding, ProgrammableStage, TextureBinding};
+use crate::{
+    AccelerationStructureAABBs, AccelerationStructureEntries, AccelerationStructureInstances,
+    AccelerationStructureTriangleIndices, AccelerationStructureTriangleTransform,
+    AccelerationStructureTriangles, BufferBinding, ProgrammableStage, TextureBinding,
+};
 
 /// Base trait for all resources, allows downcasting via [`Any`].
 pub trait DynResource: Any + WasmNotSendSync + 'static {
@@ -141,3 +145,58 @@ impl<'a> ProgrammableStage<'a, dyn DynShaderModule> {
         }
     }
 }
+
+impl<'a> AccelerationStructureEntries<'a, dyn DynBuffer> {
+    fn expect_downcast<B: DynBuffer>(&self) -> AccelerationStructureEntries<'a, B> {
+        match self {
+            AccelerationStructureEntries::Instances(instances) => {
+                AccelerationStructureEntries::Instances(AccelerationStructureInstances {
+                    buffer: instances.buffer.map(|b| b.expect_downcast_ref()),
+                    offset: instances.offset,
+                    count: instances.count,
+                })
+            }
+            AccelerationStructureEntries::Triangles(triangles) => {
+                AccelerationStructureEntries::Triangles(
+                    triangles
+                        .iter()
+                        .map(|t| AccelerationStructureTriangles {
+                            vertex_buffer: t.vertex_buffer.map(|b| b.expect_downcast_ref()),
+                            vertex_format: t.vertex_format,
+                            first_vertex: t.first_vertex,
+                            vertex_count: t.vertex_count,
+                            vertex_stride: t.vertex_stride,
+                            indices: t.indices.as_ref().map(|i| {
+                                AccelerationStructureTriangleIndices {
+                                    buffer: i.buffer.map(|b| b.expect_downcast_ref()),
+                                    format: i.format,
+                                    offset: i.offset,
+                                    count: i.count,
+                                }
+                            }),
+                            transform: t.transform.as_ref().map(|t| {
+                                AccelerationStructureTriangleTransform {
+                                    buffer: t.buffer.expect_downcast_ref(),
+                                    offset: t.offset,
+                                }
+                            }),
+                            flags: t.flags,
+                        })
+                        .collect(),
+                )
+            }
+            AccelerationStructureEntries::AABBs(entries) => AccelerationStructureEntries::AABBs(
+                entries
+                    .iter()
+                    .map(|e| AccelerationStructureAABBs {
+                        buffer: e.buffer.map(|b| b.expect_downcast_ref()),
+                        offset: e.offset,
+                        count: e.count,
+                        stride: e.stride,
+                        flags: e.flags,
+                    })
+                    .collect(),
+            ),
+        }
+    }
+}

From 99ffc129eb75e0332354e5523d85cde5348e788a Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 28 Jul 2024 12:20:43 +0200
Subject: [PATCH 209/226] remove unnecessary debug constraints from Api 
 (handled by Dyn traits)

---
 wgpu-hal/src/lib.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index dacd21049a..1f4ee79b47 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -407,13 +407,13 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// them to [`CommandEncoder::reset_all`].
     ///
     /// [`CommandEncoder`]: Api::CommandEncoder
-    type CommandBuffer: DynCommandBuffer + fmt::Debug;
+    type CommandBuffer: DynCommandBuffer;
 
     type Buffer: DynBuffer;
     type Texture: DynTexture;
-    type SurfaceTexture: DynSurfaceTexture + fmt::Debug + Borrow<Self::Texture>;
+    type SurfaceTexture: DynSurfaceTexture + Borrow<Self::Texture>;
     type TextureView: DynTextureView;
-    type Sampler: DynSampler + fmt::Debug;
+    type Sampler: DynSampler;
     type QuerySet: DynQuerySet;
 
     /// A value you can block on to wait for something to finish.
@@ -433,17 +433,17 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// before a lower-valued operation, then waiting for the fence to reach the
     /// lower value could return before the lower-valued operation has actually
     /// finished.
-    type Fence: DynFence + fmt::Debug;
+    type Fence: DynFence;
 
-    type BindGroupLayout: DynBindGroupLayout + fmt::Debug;
+    type BindGroupLayout: DynBindGroupLayout;
     type BindGroup: DynBindGroup;
     type PipelineLayout: DynPipelineLayout;
     type ShaderModule: DynShaderModule;
     type RenderPipeline: DynRenderPipeline;
     type ComputePipeline: DynComputePipeline;
-    type PipelineCache: DynPipelineCache + fmt::Debug;
+    type PipelineCache: DynPipelineCache;
 
-    type AccelerationStructure: DynAccelerationStructure + fmt::Debug + 'static;
+    type AccelerationStructure: DynAccelerationStructure + 'static;
 }
 
 pub trait Instance: Sized + WasmNotSendSync {

From 6f01cbff061e4ba6376b7939980543111b91d006 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 4 Aug 2024 17:17:03 +0200
Subject: [PATCH 210/226] introduce DynAdapter

---
 wgpu-hal/src/dx12/mod.rs        |  1 +
 wgpu-hal/src/dynamic/adapter.rs | 56 +++++++++++++++++++++++++++++++++
 wgpu-hal/src/dynamic/mod.rs     |  2 ++
 wgpu-hal/src/gles/mod.rs        |  1 +
 wgpu-hal/src/lib.rs             | 11 ++++---
 wgpu-hal/src/metal/mod.rs       |  1 +
 wgpu-hal/src/vulkan/mod.rs      |  1 +
 7 files changed, 68 insertions(+), 5 deletions(-)
 create mode 100644 wgpu-hal/src/dynamic/adapter.rs

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index e5db9fc234..fc3f2fbd12 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -88,6 +88,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    Adapter,
     AccelerationStructure,
     BindGroup,
     BindGroupLayout,
diff --git a/wgpu-hal/src/dynamic/adapter.rs b/wgpu-hal/src/dynamic/adapter.rs
new file mode 100644
index 0000000000..7f9b63a838
--- /dev/null
+++ b/wgpu-hal/src/dynamic/adapter.rs
@@ -0,0 +1,56 @@
+use crate::{Adapter, DeviceError, SurfaceCapabilities, TextureFormatCapabilities};
+
+use super::{DynDevice, DynQueue, DynResource, DynResourceExt, DynSurface};
+
+pub struct DynOpenDevice {
+    pub device: Box<dyn DynDevice>,
+    pub queue: Box<dyn DynQueue>,
+}
+
+pub trait DynAdapter: DynResource {
+    unsafe fn open(
+        &self,
+        features: wgt::Features,
+        limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
+    ) -> Result<DynOpenDevice, DeviceError>;
+
+    unsafe fn texture_format_capabilities(
+        &self,
+        format: wgt::TextureFormat,
+    ) -> TextureFormatCapabilities;
+
+    unsafe fn surface_capabilities(&self, surface: &dyn DynSurface) -> Option<SurfaceCapabilities>;
+
+    unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp;
+}
+
+impl<A: Adapter + DynResource> DynAdapter for A {
+    unsafe fn open(
+        &self,
+        features: wgt::Features,
+        limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
+    ) -> Result<DynOpenDevice, DeviceError> {
+        unsafe { A::open(self, features, limits, memory_hints) }.map(|open_device| DynOpenDevice {
+            device: Box::new(open_device.device),
+            queue: Box::new(open_device.queue),
+        })
+    }
+
+    unsafe fn texture_format_capabilities(
+        &self,
+        format: wgt::TextureFormat,
+    ) -> TextureFormatCapabilities {
+        unsafe { A::texture_format_capabilities(self, format) }
+    }
+
+    unsafe fn surface_capabilities(&self, surface: &dyn DynSurface) -> Option<SurfaceCapabilities> {
+        let surface = surface.expect_downcast_ref();
+        unsafe { A::surface_capabilities(self, surface) }
+    }
+
+    unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp {
+        unsafe { A::get_presentation_timestamp(self) }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index b8fc8a1f6c..490251a511 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -1,8 +1,10 @@
+mod adapter;
 mod command;
 mod device;
 mod queue;
 mod surface;
 
+pub use adapter::{DynAdapter, DynOpenDevice};
 pub use command::DynCommandEncoder;
 pub use device::DynDevice;
 pub use queue::DynQueue;
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 617d3f0729..86ee6df29d 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -165,6 +165,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    Adapter,
     AccelerationStructure,
     BindGroup,
     BindGroupLayout,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 1f4ee79b47..0ba8bef8d0 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -266,10 +266,11 @@ mod dynamic;
 
 pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
-    DynAccelerationStructure, DynAcquiredSurfaceTexture, DynBindGroup, DynBindGroupLayout,
-    DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline, DynDevice, DynFence,
-    DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue, DynRenderPipeline, DynResource,
-    DynSampler, DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
+    DynAccelerationStructure, DynAcquiredSurfaceTexture, DynAdapter, DynBindGroup,
+    DynBindGroupLayout, DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline,
+    DynDevice, DynFence, DynOpenDevice, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
+    DynRenderPipeline, DynResource, DynSampler, DynShaderModule, DynSurface, DynSurfaceTexture,
+    DynTexture, DynTextureView,
 };
 
 use std::{
@@ -393,7 +394,7 @@ impl InstanceError {
 pub trait Api: Clone + fmt::Debug + Sized {
     type Instance: Instance<A = Self>;
     type Surface: DynSurface + Surface<A = Self>;
-    type Adapter: Adapter<A = Self>;
+    type Adapter: DynAdapter + Adapter<A = Self>;
     type Device: DynDevice + Device<A = Self>;
 
     type Queue: DynQueue + Queue<A = Self>;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index f861474f8a..728ee8f496 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -72,6 +72,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    Adapter,
     AccelerationStructure,
     BindGroup,
     BindGroupLayout,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index a77ff444b6..90c2cf2a0c 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -79,6 +79,7 @@ impl crate::Api for Api {
 }
 
 crate::impl_dyn_resource!(
+    Adapter,
     AccelerationStructure,
     BindGroup,
     BindGroupLayout,

From 5b9198fd43790f60219a47ac65aa2864d4d60db1 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 4 Aug 2024 17:37:02 +0200
Subject: [PATCH 211/226] introduce DynInstance

---
 wgpu-hal/src/dx12/mod.rs         |  1 +
 wgpu-hal/src/dynamic/instance.rs | 53 ++++++++++++++++++++++++++++++++
 wgpu-hal/src/dynamic/mod.rs      |  2 ++
 wgpu-hal/src/gles/mod.rs         |  1 +
 wgpu-hal/src/lib.rs              |  8 ++---
 wgpu-hal/src/metal/mod.rs        |  1 +
 wgpu-hal/src/vulkan/mod.rs       |  1 +
 7 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 wgpu-hal/src/dynamic/instance.rs

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index fc3f2fbd12..8401bbe1eb 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -98,6 +98,7 @@ crate::impl_dyn_resource!(
     ComputePipeline,
     Device,
     Fence,
+    Instance,
     PipelineCache,
     PipelineLayout,
     QuerySet,
diff --git a/wgpu-hal/src/dynamic/instance.rs b/wgpu-hal/src/dynamic/instance.rs
new file mode 100644
index 0000000000..80d834544d
--- /dev/null
+++ b/wgpu-hal/src/dynamic/instance.rs
@@ -0,0 +1,53 @@
+// Box casts are needed, alternative would be a temporaries which are more verbose and not more expressive.
+#![allow(trivial_casts)]
+
+use crate::{Capabilities, Instance, InstanceError};
+
+use super::{DynAdapter, DynResource, DynResourceExt as _, DynSurface};
+
+pub struct DynExposedAdapter {
+    pub adapter: Box<dyn DynAdapter>,
+    pub info: wgt::AdapterInfo,
+    pub features: wgt::Features,
+    pub capabilities: Capabilities,
+}
+
+pub trait DynInstance: DynResource {
+    unsafe fn create_surface(
+        &self,
+        display_handle: raw_window_handle::RawDisplayHandle,
+        window_handle: raw_window_handle::RawWindowHandle,
+    ) -> Result<Box<dyn DynSurface>, InstanceError>;
+
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&dyn DynSurface>,
+    ) -> Vec<DynExposedAdapter>;
+}
+
+impl<I: Instance + DynResource> DynInstance for I {
+    unsafe fn create_surface(
+        &self,
+        display_handle: raw_window_handle::RawDisplayHandle,
+        window_handle: raw_window_handle::RawWindowHandle,
+    ) -> Result<Box<dyn DynSurface>, InstanceError> {
+        unsafe { I::create_surface(self, display_handle, window_handle) }
+            .map(|surface| Box::new(surface) as Box<dyn DynSurface>)
+    }
+
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&dyn DynSurface>,
+    ) -> Vec<DynExposedAdapter> {
+        let surface_hint = surface_hint.map(|s| s.expect_downcast_ref());
+        unsafe { I::enumerate_adapters(self, surface_hint) }
+            .into_iter()
+            .map(|exposed| DynExposedAdapter {
+                adapter: Box::new(exposed.adapter),
+                info: exposed.info,
+                features: exposed.features,
+                capabilities: exposed.capabilities,
+            })
+            .collect()
+    }
+}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 490251a511..5509d7cce6 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -1,12 +1,14 @@
 mod adapter;
 mod command;
 mod device;
+mod instance;
 mod queue;
 mod surface;
 
 pub use adapter::{DynAdapter, DynOpenDevice};
 pub use command::DynCommandEncoder;
 pub use device::DynDevice;
+pub use instance::{DynExposedAdapter, DynInstance};
 pub use queue::DynQueue;
 pub use surface::{DynAcquiredSurfaceTexture, DynSurface};
 
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 86ee6df29d..df59778065 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -175,6 +175,7 @@ crate::impl_dyn_resource!(
     ComputePipeline,
     Device,
     Fence,
+    Instance,
     PipelineCache,
     PipelineLayout,
     QuerySet,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 0ba8bef8d0..f26b6925cc 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -268,9 +268,9 @@ pub(crate) use dynamic::impl_dyn_resource;
 pub use dynamic::{
     DynAccelerationStructure, DynAcquiredSurfaceTexture, DynAdapter, DynBindGroup,
     DynBindGroupLayout, DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline,
-    DynDevice, DynFence, DynOpenDevice, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
-    DynRenderPipeline, DynResource, DynSampler, DynShaderModule, DynSurface, DynSurfaceTexture,
-    DynTexture, DynTextureView,
+    DynDevice, DynExposedAdapter, DynFence, DynInstance, DynOpenDevice, DynPipelineCache,
+    DynPipelineLayout, DynQuerySet, DynQueue, DynRenderPipeline, DynResource, DynSampler,
+    DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
 };
 
 use std::{
@@ -392,7 +392,7 @@ impl InstanceError {
 }
 
 pub trait Api: Clone + fmt::Debug + Sized {
-    type Instance: Instance<A = Self>;
+    type Instance: DynInstance + Instance<A = Self>;
     type Surface: DynSurface + Surface<A = Self>;
     type Adapter: DynAdapter + Adapter<A = Self>;
     type Device: DynDevice + Device<A = Self>;
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 728ee8f496..62d409a8ff 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -82,6 +82,7 @@ crate::impl_dyn_resource!(
     ComputePipeline,
     Device,
     Fence,
+    Instance,
     PipelineCache,
     PipelineLayout,
     QuerySet,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 90c2cf2a0c..0b024b80a7 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -89,6 +89,7 @@ crate::impl_dyn_resource!(
     ComputePipeline,
     Device,
     Fence,
+    Instance,
     PipelineCache,
     PipelineLayout,
     QuerySet,

From 7c7e4164f178d951c80cb937786f3ae4b1022e32 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Thu, 11 Jul 2024 00:35:36 +0200
Subject: [PATCH 212/226] The big unraveling: core device now has a boxed
 `DynDevice`, ripple effects from there leading to boxing of almost all hal
 resources

---
 wgpu-core/src/binding_model.rs       |  20 ++--
 wgpu-core/src/command/allocator.rs   |  18 ++--
 wgpu-core/src/command/bundle.rs      |   6 +-
 wgpu-core/src/command/clear.rs       |  32 +++---
 wgpu-core/src/command/compute.rs     |  67 ++++++------
 wgpu-core/src/command/memory_init.rs |  12 +--
 wgpu-core/src/command/mod.rs         |  60 ++++++-----
 wgpu-core/src/command/query.rs       |  16 ++-
 wgpu-core/src/command/render.rs      |  17 ++-
 wgpu-core/src/command/transfer.rs    | 115 ++++++++++++---------
 wgpu-core/src/device/global.rs       |  14 ++-
 wgpu-core/src/device/life.rs         |   4 +-
 wgpu-core/src/device/mod.rs          |  10 +-
 wgpu-core/src/device/queue.rs        | 110 +++++++++++---------
 wgpu-core/src/device/resource.rs     |  95 ++++++++---------
 wgpu-core/src/hub.rs                 |   3 +-
 wgpu-core/src/instance.rs            |   6 +-
 wgpu-core/src/pipeline.rs            |  28 +++--
 wgpu-core/src/present.rs             |  15 ++-
 wgpu-core/src/resource.rs            | 148 ++++++++++++++-------------
 wgpu-core/src/track/buffer.rs        |   4 +-
 wgpu-core/src/track/mod.rs           |   8 +-
 wgpu-core/src/track/texture.rs       |   6 +-
 wgpu/src/backend/wgpu_core.rs        |   2 +-
 24 files changed, 419 insertions(+), 397 deletions(-)

diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 0687e6e0f0..825a96418c 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -499,7 +499,7 @@ impl<A: HalApi> std::fmt::Display for ExclusivePipeline<A> {
 /// Bind group layout.
 #[derive(Debug)]
 pub struct BindGroupLayout<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::BindGroupLayout>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynBindGroupLayout>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) entries: bgl::EntryMap,
     /// It is very important that we know if the bind group comes from the BGL pool.
@@ -525,7 +525,6 @@ impl<A: HalApi> Drop for BindGroupLayout<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_bind_group_layout(raw);
         }
     }
@@ -537,8 +536,8 @@ crate::impl_parent_device!(BindGroupLayout);
 crate::impl_storage_item!(BindGroupLayout);
 
 impl<A: HalApi> BindGroupLayout<A> {
-    pub(crate) fn raw(&self) -> &A::BindGroupLayout {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynBindGroupLayout {
+        self.raw.as_ref()
     }
 }
 
@@ -652,7 +651,7 @@ pub struct ResolvedPipelineLayoutDescriptor<'a, A: HalApi> {
 
 #[derive(Debug)]
 pub struct PipelineLayout<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::PipelineLayout>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineLayout>>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -666,15 +665,14 @@ impl<A: HalApi> Drop for PipelineLayout<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_pipeline_layout(raw);
         }
     }
 }
 
 impl<A: HalApi> PipelineLayout<A> {
-    pub(crate) fn raw(&self) -> &A::PipelineLayout {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynPipelineLayout {
+        self.raw.as_ref()
     }
 
     pub(crate) fn get_binding_maps(&self) -> ArrayVec<&bgl::EntryMap, { hal::MAX_BIND_GROUPS }> {
@@ -889,7 +887,7 @@ pub(crate) fn buffer_binding_type_alignment(
 
 #[derive(Debug)]
 pub struct BindGroup<A: HalApi> {
-    pub(crate) raw: Snatchable<A::BindGroup>,
+    pub(crate) raw: Snatchable<Box<dyn hal::DynBindGroup>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) layout: Arc<BindGroupLayout<A>>,
     /// The `label` from the descriptor used to create the resource.
@@ -909,7 +907,6 @@ impl<A: HalApi> Drop for BindGroup<A> {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_bind_group(raw);
             }
         }
@@ -920,7 +917,7 @@ impl<A: HalApi> BindGroup<A> {
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::BindGroup, DestroyedResourceError> {
+    ) -> Result<&dyn hal::DynBindGroup, DestroyedResourceError> {
         // Clippy insist on writing it this way. The idea is to return None
         // if any of the raw buffer is not valid anymore.
         for buffer in &self.used_buffer_ranges {
@@ -932,6 +929,7 @@ impl<A: HalApi> BindGroup<A> {
 
         self.raw
             .get(guard)
+            .map(|raw| raw.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs
index e17fd08d76..b05898a577 100644
--- a/wgpu-core/src/command/allocator.rs
+++ b/wgpu-core/src/command/allocator.rs
@@ -1,6 +1,4 @@
-use crate::hal_api::HalApi;
 use crate::resource_log;
-use hal::Device as _;
 
 use crate::lock::{rank, Mutex};
 
@@ -14,11 +12,11 @@ use crate::lock::{rank, Mutex};
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
 /// [ce]: hal::CommandEncoder
 /// [cb]: hal::Api::CommandBuffer
-pub(crate) struct CommandAllocator<A: HalApi> {
-    free_encoders: Mutex<Vec<A::CommandEncoder>>,
+pub(crate) struct CommandAllocator {
+    free_encoders: Mutex<Vec<Box<dyn hal::DynCommandEncoder>>>,
 }
 
-impl<A: HalApi> CommandAllocator<A> {
+impl CommandAllocator {
     pub(crate) fn new() -> Self {
         Self {
             free_encoders: Mutex::new(rank::COMMAND_ALLOCATOR_FREE_ENCODERS, Vec::new()),
@@ -33,9 +31,9 @@ impl<A: HalApi> CommandAllocator<A> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     pub(crate) fn acquire_encoder(
         &self,
-        device: &A::Device,
-        queue: &A::Queue,
-    ) -> Result<A::CommandEncoder, hal::DeviceError> {
+        device: &dyn hal::DynDevice,
+        queue: &dyn hal::DynQueue,
+    ) -> Result<Box<dyn hal::DynCommandEncoder>, hal::DeviceError> {
         let mut free_encoders = self.free_encoders.lock();
         match free_encoders.pop() {
             Some(encoder) => Ok(encoder),
@@ -47,7 +45,7 @@ impl<A: HalApi> CommandAllocator<A> {
     }
 
     /// Add `encoder` back to the free pool.
-    pub(crate) fn release_encoder(&self, encoder: A::CommandEncoder) {
+    pub(crate) fn release_encoder(&self, encoder: Box<dyn hal::DynCommandEncoder>) {
         let mut free_encoders = self.free_encoders.lock();
         free_encoders.push(encoder);
     }
@@ -55,7 +53,7 @@ impl<A: HalApi> CommandAllocator<A> {
     /// Free the pool of command encoders.
     ///
     /// This is only called when the `Device` is dropped.
-    pub(crate) fn dispose(&self, device: &A::Device) {
+    pub(crate) fn dispose(&self, device: &dyn hal::DynDevice) {
         let mut free_encoders = self.free_encoders.lock();
         resource_log!("CommandAllocator::dispose encoders {}", free_encoders.len());
         for cmd_encoder in free_encoders.drain(..) {
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 542c52b886..2f040e615b 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -104,8 +104,6 @@ use arrayvec::ArrayVec;
 use std::{borrow::Cow, mem, num::NonZeroU32, ops::Range, sync::Arc};
 use thiserror::Error;
 
-use hal::CommandEncoder as _;
-
 use super::{
     render_command::{ArcRenderCommand, RenderCommand},
     DrawKind,
@@ -965,7 +963,7 @@ impl<A: HalApi> RenderBundle<A> {
     /// The only failure condition is if some of the used buffers are destroyed.
     pub(super) unsafe fn execute(
         &self,
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
     ) -> Result<(), ExecutionError> {
         let mut offsets = self.base.dynamic_offsets.as_slice();
@@ -1006,7 +1004,7 @@ impl<A: HalApi> RenderBundle<A> {
                     offset,
                     size,
                 } => {
-                    let buffer: &A::Buffer = buffer.try_raw(snatch_guard)?;
+                    let buffer = buffer.try_raw(snatch_guard)?;
                     let bb = hal::BufferBinding {
                         buffer,
                         offset: *offset,
diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index a93fe8345d..487bdf756b 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -19,7 +19,6 @@ use crate::{
     track::{TextureSelector, TextureTrackerSetSingle},
 };
 
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect};
 
@@ -167,7 +166,7 @@ impl Global {
         let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_buffers(dst_barrier.into_iter());
+            cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
             cmd_buf_raw.clear_buffer(dst_raw, offset..end_offset);
         }
         Ok(())
@@ -263,7 +262,7 @@ impl Global {
             encoder,
             &mut tracker.textures,
             &device.alignments,
-            &device.zero_buffer,
+            device.zero_buffer.as_ref(),
             &snatch_guard,
         )
     }
@@ -272,10 +271,10 @@ impl Global {
 pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     dst_texture: &Arc<Texture<A>>,
     range: TextureInitRange,
-    encoder: &mut A::CommandEncoder,
+    encoder: &mut dyn hal::DynCommandEncoder,
     texture_tracker: &mut T,
     alignments: &hal::Alignments,
-    zero_buffer: &A::Buffer,
+    zero_buffer: &dyn hal::DynBuffer,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), ClearError> {
     let dst_raw = dst_texture.try_raw(snatch_guard)?;
@@ -316,14 +315,15 @@ pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     // change_replace_tracked whenever possible.
     let dst_barrier = texture_tracker
         .set_single(dst_texture, selector, clear_usage)
-        .map(|pending| pending.into_hal(dst_raw));
+        .map(|pending| pending.into_hal(dst_raw))
+        .collect::<Vec<_>>();
     unsafe {
-        encoder.transition_textures(dst_barrier.into_iter());
+        encoder.transition_textures(&dst_barrier);
     }
 
     // Record actual clearing
     match dst_texture.clear_mode {
-        TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::<A>(
+        TextureClearMode::BufferCopy => clear_texture_via_buffer_copies(
             &dst_texture.desc,
             alignments,
             zero_buffer,
@@ -346,13 +346,13 @@ pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     Ok(())
 }
 
-fn clear_texture_via_buffer_copies<A: HalApi>(
+fn clear_texture_via_buffer_copies(
     texture_desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     alignments: &hal::Alignments,
-    zero_buffer: &A::Buffer, // Buffer of size device::ZERO_BUFFER_SIZE
+    zero_buffer: &dyn hal::DynBuffer, // Buffer of size device::ZERO_BUFFER_SIZE
     range: TextureInitRange,
-    encoder: &mut A::CommandEncoder,
-    dst_raw: &A::Texture,
+    encoder: &mut dyn hal::DynCommandEncoder,
+    dst_raw: &dyn hal::DynTexture,
 ) {
     assert!(!texture_desc.format.is_depth_stencil_format());
 
@@ -436,7 +436,7 @@ fn clear_texture_via_buffer_copies<A: HalApi>(
     }
 
     unsafe {
-        encoder.copy_buffer_to_texture(zero_buffer, dst_raw, zero_buffer_copy_regions.into_iter());
+        encoder.copy_buffer_to_texture(zero_buffer, dst_raw, &zero_buffer_copy_regions);
     }
 }
 
@@ -444,7 +444,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
     dst_texture: &Texture<A>,
     range: TextureInitRange,
     is_color: bool,
-    encoder: &mut A::CommandEncoder,
+    encoder: &mut dyn hal::DynCommandEncoder,
 ) {
     assert_eq!(dst_texture.desc.dimension, wgt::TextureDimension::D2);
 
@@ -461,7 +461,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
             let (color_attachments, depth_stencil_attachment) = if is_color {
                 color_attachments_tmp = [Some(hal::ColorAttachment {
                     target: hal::Attachment {
-                        view: Texture::get_clear_view(
+                        view: Texture::<A>::get_clear_view(
                             &dst_texture.clear_mode,
                             &dst_texture.desc,
                             mip_level,
@@ -479,7 +479,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                     &[][..],
                     Some(hal::DepthStencilAttachment {
                         target: hal::Attachment {
-                            view: Texture::get_clear_view(
+                            view: Texture::<A>::get_clear_view(
                                 &dst_texture.clear_mode,
                                 &dst_texture.desc,
                                 mip_level,
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index c31db544d1..a23370527f 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -26,8 +26,6 @@ use crate::{
     Label,
 };
 
-use hal::CommandEncoder as _;
-
 use thiserror::Error;
 use wgt::{BufferAddress, DynamicOffset};
 
@@ -212,7 +210,7 @@ struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi> {
 
     device: &'cmd_buf Arc<Device<A>>,
 
-    raw_encoder: &'raw_encoder mut A::CommandEncoder,
+    raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
 
     tracker: &'cmd_buf mut Tracker<A>,
     buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction<A>>,
@@ -485,40 +483,41 @@ impl Global {
         state.tracker.buffers.set_size(indices.buffers.size());
         state.tracker.textures.set_size(indices.textures.size());
 
-        let timestamp_writes = if let Some(tw) = timestamp_writes.take() {
-            tw.query_set
-                .same_device_as(cmd_buf)
-                .map_pass_err(pass_scope)?;
-
-            let query_set = state.tracker.query_sets.insert_single(tw.query_set);
+        let timestamp_writes: Option<hal::PassTimestampWrites<'_, dyn hal::DynQuerySet>> =
+            if let Some(tw) = timestamp_writes.take() {
+                tw.query_set
+                    .same_device_as(cmd_buf)
+                    .map_pass_err(pass_scope)?;
+
+                let query_set = state.tracker.query_sets.insert_single(tw.query_set);
+
+                // Unlike in render passes we can't delay resetting the query sets since
+                // there is no auxiliary pass.
+                let range = if let (Some(index_a), Some(index_b)) =
+                    (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
+                {
+                    Some(index_a.min(index_b)..index_a.max(index_b) + 1)
+                } else {
+                    tw.beginning_of_pass_write_index
+                        .or(tw.end_of_pass_write_index)
+                        .map(|i| i..i + 1)
+                };
+                // Range should always be Some, both values being None should lead to a validation error.
+                // But no point in erroring over that nuance here!
+                if let Some(range) = range {
+                    unsafe {
+                        state.raw_encoder.reset_queries(query_set.raw(), range);
+                    }
+                }
 
-            // Unlike in render passes we can't delay resetting the query sets since
-            // there is no auxiliary pass.
-            let range = if let (Some(index_a), Some(index_b)) =
-                (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
-            {
-                Some(index_a.min(index_b)..index_a.max(index_b) + 1)
+                Some(hal::PassTimestampWrites {
+                    query_set: query_set.raw(),
+                    beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                    end_of_pass_write_index: tw.end_of_pass_write_index,
+                })
             } else {
-                tw.beginning_of_pass_write_index
-                    .or(tw.end_of_pass_write_index)
-                    .map(|i| i..i + 1)
+                None
             };
-            // Range should always be Some, both values being None should lead to a validation error.
-            // But no point in erroring over that nuance here!
-            if let Some(range) = range {
-                unsafe {
-                    state.raw_encoder.reset_queries(query_set.raw(), range);
-                }
-            }
-
-            Some(hal::PassTimestampWrites {
-                query_set: query_set.raw(),
-                beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
-                end_of_pass_write_index: tw.end_of_pass_write_index,
-            })
-        } else {
-            None
-        };
 
         let hal_desc = hal::ComputePassDescriptor {
             label: hal_label(base.label.as_deref(), self.instance.flags),
diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs
index 96427eacc7..7e672393f1 100644
--- a/wgpu-core/src/command/memory_init.rs
+++ b/wgpu-core/src/command/memory_init.rs
@@ -1,7 +1,5 @@
 use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain};
 
-use hal::CommandEncoder;
-
 use crate::{
     device::Device,
     hal_api::HalApi,
@@ -140,7 +138,7 @@ pub(crate) fn fixup_discarded_surfaces<
     InitIter: Iterator<Item = TextureSurfaceDiscard<A>>,
 >(
     inits: InitIter,
-    encoder: &mut A::CommandEncoder,
+    encoder: &mut dyn hal::DynCommandEncoder,
     texture_tracker: &mut TextureTracker<A>,
     device: &Device<A>,
     snatch_guard: &SnatchGuard<'_>,
@@ -155,7 +153,7 @@ pub(crate) fn fixup_discarded_surfaces<
             encoder,
             texture_tracker,
             &device.alignments,
-            &device.zero_buffer,
+            device.zero_buffer.as_ref(),
             snatch_guard,
         )
         .unwrap();
@@ -233,7 +231,7 @@ impl<A: HalApi> BakedCommands<A> {
                 self.encoder.transition_buffers(
                     transition
                         .map(|pending| pending.into_hal(&buffer, snatch_guard))
-                        .into_iter(),
+                        .as_slice(),
                 );
             }
 
@@ -307,10 +305,10 @@ impl<A: HalApi> BakedCommands<A> {
                 let clear_result = clear_texture(
                     &texture_use.texture,
                     range,
-                    &mut self.encoder,
+                    self.encoder.as_mut(),
                     &mut device_tracker.textures,
                     &device.alignments,
-                    &device.zero_buffer,
+                    device.zero_buffer.as_ref(),
                     snatch_guard,
                 );
 
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 7314e8f04c..d16e7f6d05 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -39,7 +39,6 @@ use crate::track::{DeviceTracker, Tracker, UsageScope};
 use crate::LabelHelpers;
 use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label};
 
-use hal::CommandEncoder as _;
 use thiserror::Error;
 
 #[cfg(feature = "trace")]
@@ -115,7 +114,7 @@ pub(crate) enum CommandEncoderStatus {
 /// [rce]: hal::Api::CommandEncoder
 /// [rcb]: hal::Api::CommandBuffer
 /// [`CommandEncoderId`]: crate::id::CommandEncoderId
-pub(crate) struct CommandEncoder<A: HalApi> {
+pub(crate) struct CommandEncoder {
     /// The underlying `wgpu_hal` [`CommandEncoder`].
     ///
     /// Successfully executed command buffers' encoders are saved in a
@@ -123,7 +122,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     ///
     /// [`CommandEncoder`]: hal::Api::CommandEncoder
     /// [`CommandAllocator`]: crate::command::CommandAllocator
-    raw: A::CommandEncoder,
+    raw: Box<dyn hal::DynCommandEncoder>,
 
     /// All the raw command buffers for our owning [`CommandBuffer`], in
     /// submission order.
@@ -136,7 +135,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     ///
     /// [CE::ra]: hal::CommandEncoder::reset_all
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
-    list: Vec<A::CommandBuffer>,
+    list: Vec<Box<dyn hal::DynCommandBuffer>>,
 
     /// True if `raw` is in the "recording" state.
     ///
@@ -150,7 +149,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
 }
 
 //TODO: handle errors better
-impl<A: HalApi> CommandEncoder<A> {
+impl CommandEncoder {
     /// Finish the current command buffer, if any, and place it
     /// at the second-to-last position in our list.
     ///
@@ -219,14 +218,14 @@ impl<A: HalApi> CommandEncoder<A> {
     /// Begin recording a new command buffer, if we haven't already.
     ///
     /// The underlying hal encoder is put in the "recording" state.
-    pub(crate) fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> {
+    pub(crate) fn open(&mut self) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
         if !self.is_open {
             self.is_open = true;
             let hal_label = self.hal_label.as_deref();
             unsafe { self.raw.begin_encoding(hal_label)? };
         }
 
-        Ok(&mut self.raw)
+        Ok(self.raw.as_mut())
     }
 
     /// Begin recording a new command buffer for a render pass, with
@@ -242,8 +241,8 @@ impl<A: HalApi> CommandEncoder<A> {
 }
 
 pub(crate) struct BakedCommands<A: HalApi> {
-    pub(crate) encoder: A::CommandEncoder,
-    pub(crate) list: Vec<A::CommandBuffer>,
+    pub(crate) encoder: Box<dyn hal::DynCommandEncoder>,
+    pub(crate) list: Vec<Box<dyn hal::DynCommandBuffer>>,
     pub(crate) trackers: Tracker<A>,
     buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
     texture_memory_actions: CommandBufferTextureMemoryActions<A>,
@@ -255,7 +254,7 @@ pub struct CommandBufferMutable<A: HalApi> {
     /// they belong to.
     ///
     /// [`wgpu_hal::Api::CommandBuffer`]: hal::Api::CommandBuffer
-    pub(crate) encoder: CommandEncoder<A>,
+    pub(crate) encoder: CommandEncoder,
 
     /// The current state of this command buffer's encoder.
     status: CommandEncoderStatus,
@@ -280,7 +279,7 @@ pub struct CommandBufferMutable<A: HalApi> {
 impl<A: HalApi> CommandBufferMutable<A> {
     pub(crate) fn open_encoder_and_tracker(
         &mut self,
-    ) -> Result<(&mut A::CommandEncoder, &mut Tracker<A>), DeviceError> {
+    ) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker<A>), DeviceError> {
         let encoder = self.encoder.open()?;
         let tracker = &mut self.trackers;
 
@@ -329,17 +328,20 @@ impl<A: HalApi> Drop for CommandBuffer<A> {
         }
         let mut baked = self.extract_baked_commands();
         unsafe {
-            baked.encoder.reset_all(baked.list.into_iter());
+            baked.encoder.reset_all(baked.list);
         }
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_command_encoder(baked.encoder);
         }
     }
 }
 
 impl<A: HalApi> CommandBuffer<A> {
-    pub(crate) fn new(encoder: A::CommandEncoder, device: &Arc<Device<A>>, label: &Label) -> Self {
+    pub(crate) fn new(
+        encoder: Box<dyn hal::DynCommandEncoder>,
+        device: &Arc<Device<A>>,
+        label: &Label,
+    ) -> Self {
         CommandBuffer {
             device: device.clone(),
             support_clear_texture: device.features.contains(wgt::Features::CLEAR_TEXTURE),
@@ -370,7 +372,7 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn insert_barriers_from_tracker(
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         base: &mut Tracker<A>,
         head: &Tracker<A>,
         snatch_guard: &SnatchGuard,
@@ -384,7 +386,7 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn insert_barriers_from_scope(
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         base: &mut Tracker<A>,
         head: &UsageScope<A>,
         snatch_guard: &SnatchGuard,
@@ -398,27 +400,31 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn drain_barriers(
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         base: &mut Tracker<A>,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("drain_barriers");
 
-        let buffer_barriers = base.buffers.drain_transitions(snatch_guard);
+        let buffer_barriers = base
+            .buffers
+            .drain_transitions(snatch_guard)
+            .collect::<Vec<_>>();
         let (transitions, textures) = base.textures.drain_transitions(snatch_guard);
         let texture_barriers = transitions
             .into_iter()
             .enumerate()
-            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw()));
+            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw()))
+            .collect::<Vec<_>>();
 
         unsafe {
-            raw.transition_buffers(buffer_barriers);
-            raw.transition_textures(texture_barriers);
+            raw.transition_buffers(&buffer_barriers);
+            raw.transition_textures(&texture_barriers);
         }
     }
 
     pub(crate) fn insert_barriers_from_device_tracker(
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         base: &mut DeviceTracker<A>,
         head: &Tracker<A>,
         snatch_guard: &SnatchGuard,
@@ -427,15 +433,17 @@ impl<A: HalApi> CommandBuffer<A> {
 
         let buffer_barriers = base
             .buffers
-            .set_from_tracker_and_drain_transitions(&head.buffers, snatch_guard);
+            .set_from_tracker_and_drain_transitions(&head.buffers, snatch_guard)
+            .collect::<Vec<_>>();
 
         let texture_barriers = base
             .textures
-            .set_from_tracker_and_drain_transitions(&head.textures, snatch_guard);
+            .set_from_tracker_and_drain_transitions(&head.textures, snatch_guard)
+            .collect::<Vec<_>>();
 
         unsafe {
-            raw.transition_buffers(buffer_barriers);
-            raw.transition_textures(texture_barriers);
+            raw.transition_buffers(&buffer_barriers);
+            raw.transition_textures(&texture_barriers);
         }
     }
 }
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
index 382fa2d296..26997ebd8b 100644
--- a/wgpu-core/src/command/query.rs
+++ b/wgpu-core/src/command/query.rs
@@ -1,5 +1,3 @@
-use hal::CommandEncoder as _;
-
 #[cfg(feature = "trace")]
 use crate::device::trace::Command as TraceCommand;
 use crate::{
@@ -44,7 +42,7 @@ impl<A: HalApi> QueryResetMap<A> {
         std::mem::replace(&mut vec_pair.0[query as usize], true)
     }
 
-    pub fn reset_queries(&mut self, raw_encoder: &mut A::CommandEncoder) {
+    pub fn reset_queries(&mut self, raw_encoder: &mut dyn hal::DynCommandEncoder) {
         for (_, (state, query_set)) in self.map.drain() {
             debug_assert_eq!(state.len(), query_set.desc.count as usize);
 
@@ -199,7 +197,7 @@ impl<A: HalApi> QuerySet<A> {
 
     pub(super) fn validate_and_write_timestamp(
         self: &Arc<Self>,
-        raw_encoder: &mut A::CommandEncoder,
+        raw_encoder: &mut dyn hal::DynCommandEncoder,
         query_index: u32,
         reset_state: Option<&mut QueryResetMap<A>>,
     ) -> Result<(), QueryUseError> {
@@ -220,7 +218,7 @@ impl<A: HalApi> QuerySet<A> {
 
 pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
     query_set: Arc<QuerySet<A>>,
-    raw_encoder: &mut A::CommandEncoder,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
     tracker: &mut StatelessTracker<QuerySet<A>>,
     query_index: u32,
     reset_state: Option<&mut QueryResetMap<A>>,
@@ -251,7 +249,7 @@ pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
 }
 
 pub(super) fn end_occlusion_query<A: HalApi>(
-    raw_encoder: &mut A::CommandEncoder,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
     active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
@@ -264,7 +262,7 @@ pub(super) fn end_occlusion_query<A: HalApi>(
 
 pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
     query_set: Arc<QuerySet<A>>,
-    raw_encoder: &mut A::CommandEncoder,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
     tracker: &mut StatelessTracker<QuerySet<A>>,
     cmd_buf: &CommandBuffer<A>,
     query_index: u32,
@@ -302,7 +300,7 @@ pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
 }
 
 pub(super) fn end_pipeline_statistics_query<A: HalApi>(
-    raw_encoder: &mut A::CommandEncoder,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
     active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
@@ -477,7 +475,7 @@ impl Global {
         let raw_dst_buffer = dst_buffer.try_raw(&snatch_guard)?;
 
         unsafe {
-            raw_encoder.transition_buffers(dst_barrier.into_iter());
+            raw_encoder.transition_buffers(dst_barrier.as_slice());
             raw_encoder.copy_query_results(
                 query_set.raw(),
                 start_query..end_query,
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 73ce837ba9..7e7f9a1af8 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -34,7 +34,6 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{
     BufferAddress, BufferSize, BufferUsages, Color, DynamicOffset, IndexFormat, ShaderStages,
@@ -461,7 +460,7 @@ struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi> {
 
     device: &'cmd_buf Arc<Device<A>>,
 
-    raw_encoder: &'raw_encoder mut A::CommandEncoder,
+    raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
 
     tracker: &'cmd_buf mut Tracker<A>,
     buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction<A>>,
@@ -826,7 +825,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         mut depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
         mut timestamp_writes: Option<ArcPassTimestampWrites<A>>,
         mut occlusion_query_set: Option<Arc<QuerySet<A>>>,
-        encoder: &mut CommandEncoder<A>,
+        encoder: &mut CommandEncoder,
         trackers: &mut Tracker<A>,
         texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
         pending_query_resets: &mut QueryResetMap<A>,
@@ -1255,7 +1254,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
     fn finish(
         mut self,
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
     ) -> Result<(UsageScope<'d, A>, SurfacesInDiscardState<A>), RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::finish");
@@ -1298,7 +1297,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                     hal::AttachmentOps::STORE,                            // clear depth
                 )
             };
-            let desc = hal::RenderPassDescriptor {
+            let desc = hal::RenderPassDescriptor::<'_, _, dyn hal::DynTextureView> {
                 label: Some("(wgpu internal) Zero init discarded depth/stencil aspect"),
                 extent: view.render_extent.unwrap(),
                 sample_count: view.samples,
@@ -1632,8 +1631,6 @@ impl Global {
             tracker.buffers.set_size(indices.buffers.size());
             tracker.textures.set_size(indices.textures.size());
 
-            let raw = &mut encoder.raw;
-
             let mut state = State {
                 pipeline_flags: PipelineFlags::empty(),
                 binder: Binder::new(),
@@ -1649,7 +1646,7 @@ impl Global {
                 snatch_guard,
 
                 device,
-                raw_encoder: raw,
+                raw_encoder: encoder.raw.as_mut(),
                 tracker,
                 buffer_memory_init_actions,
                 texture_memory_actions,
@@ -2179,7 +2176,7 @@ fn set_index_buffer<A: HalApi>(
         size,
     };
     unsafe {
-        state.raw_encoder.set_index_buffer(bb, index_format);
+        hal::DynCommandEncoder::set_index_buffer(state.raw_encoder, bb, index_format);
     }
     Ok(())
 }
@@ -2244,7 +2241,7 @@ fn set_vertex_buffer<A: HalApi>(
         size,
     };
     unsafe {
-        state.raw_encoder.set_vertex_buffer(slot, bb);
+        hal::DynCommandEncoder::set_vertex_buffer(state.raw_encoder, slot, bb);
     }
     state.vertex.update_limits();
     Ok(())
diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs
index b8208f5dd0..4ccc762720 100644
--- a/wgpu-core/src/command/transfer.rs
+++ b/wgpu-core/src/command/transfer.rs
@@ -21,11 +21,10 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages};
 
-use std::{iter, sync::Arc};
+use std::sync::Arc;
 
 use super::{memory_init::CommandBufferTextureMemoryActions, ClearError, CommandEncoder};
 
@@ -410,7 +409,7 @@ pub(crate) fn validate_texture_copy_range(
 
 fn handle_texture_init<A: HalApi>(
     init_kind: MemoryInitKind,
-    encoder: &mut CommandEncoder<A>,
+    encoder: &mut CommandEncoder,
     trackers: &mut Tracker<A>,
     texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
     device: &Device<A>,
@@ -445,7 +444,7 @@ fn handle_texture_init<A: HalApi>(
                 cmd_buf_raw,
                 &mut trackers.textures,
                 &device.alignments,
-                &device.zero_buffer,
+                device.zero_buffer.as_ref(),
                 snatch_guard,
             )?;
         }
@@ -459,7 +458,7 @@ fn handle_texture_init<A: HalApi>(
 /// Ensure the source texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
 fn handle_src_texture_init<A: HalApi>(
-    encoder: &mut CommandEncoder<A>,
+    encoder: &mut CommandEncoder,
     trackers: &mut Tracker<A>,
     texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
     device: &Device<A>,
@@ -487,7 +486,7 @@ fn handle_src_texture_init<A: HalApi>(
 /// Ensure the destination texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
 fn handle_dst_texture_init<A: HalApi>(
-    encoder: &mut CommandEncoder<A>,
+    encoder: &mut CommandEncoder,
     trackers: &mut Tracker<A>,
     texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
     device: &Device<A>,
@@ -687,9 +686,13 @@ impl Global {
             size: wgt::BufferSize::new(size).unwrap(),
         };
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
+        let barriers = src_barrier
+            .into_iter()
+            .chain(dst_barrier)
+            .collect::<Vec<_>>();
         unsafe {
-            cmd_buf_raw.transition_buffers(src_barrier.into_iter().chain(dst_barrier));
-            cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, iter::once(region));
+            cmd_buf_raw.transition_buffers(&barriers);
+            cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, &[region]);
         }
         Ok(())
     }
@@ -801,7 +804,9 @@ impl Global {
         dst_texture
             .check_usage(TextureUsages::COPY_DST)
             .map_err(TransferError::MissingTextureUsage)?;
-        let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_raw));
+        let dst_barrier = dst_pending
+            .map(|pending| pending.into_hal(dst_raw))
+            .collect::<Vec<_>>();
 
         if !dst_base.aspect.is_one() {
             return Err(TransferError::CopyAspectNotOne.into());
@@ -837,23 +842,25 @@ impl Global {
             MemoryInitKind::NeedsInitializedMemory,
         ));
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = dst_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            let mut buffer_layout = source.layout;
-            buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout,
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut texture_base = dst_base.clone();
+                texture_base.array_layer += rel_array_layer;
+                let mut buffer_layout = source.layout;
+                buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
+                hal::BufferTextureCopy {
+                    buffer_layout,
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
 
         let cmd_buf_raw = encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_textures(dst_barrier.into_iter());
-            cmd_buf_raw.transition_buffers(src_barrier.into_iter());
-            cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions);
+            cmd_buf_raw.transition_textures(&dst_barrier);
+            cmd_buf_raw.transition_buffers(src_barrier.as_slice());
+            cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, &regions);
         }
         Ok(())
     }
@@ -956,7 +963,9 @@ impl Global {
             }
             .into());
         }
-        let src_barrier = src_pending.map(|pending| pending.into_hal(src_raw));
+        let src_barrier = src_pending
+            .map(|pending| pending.into_hal(src_raw))
+            .collect::<Vec<_>>();
 
         let dst_buffer = hub
             .buffers
@@ -1009,26 +1018,28 @@ impl Global {
             MemoryInitKind::ImplicitlyInitialized,
         ));
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = src_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            let mut buffer_layout = destination.layout;
-            buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout,
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut texture_base = src_base.clone();
+                texture_base.array_layer += rel_array_layer;
+                let mut buffer_layout = destination.layout;
+                buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
+                hal::BufferTextureCopy {
+                    buffer_layout,
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
         let cmd_buf_raw = encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_buffers(dst_barrier.into_iter());
-            cmd_buf_raw.transition_textures(src_barrier.into_iter());
+            cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
+            cmd_buf_raw.transition_textures(&src_barrier);
             cmd_buf_raw.copy_texture_to_buffer(
                 src_raw,
                 hal::TextureUses::COPY_SRC,
                 dst_raw,
-                regions,
+                &regions,
             );
         }
         Ok(())
@@ -1186,25 +1197,27 @@ impl Global {
             height: src_copy_size.height.min(dst_copy_size.height),
             depth: src_copy_size.depth.min(dst_copy_size.depth),
         };
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut src_base = src_tex_base.clone();
-            let mut dst_base = dst_tex_base.clone();
-            src_base.array_layer += rel_array_layer;
-            dst_base.array_layer += rel_array_layer;
-            hal::TextureCopy {
-                src_base,
-                dst_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut src_base = src_tex_base.clone();
+                let mut dst_base = dst_tex_base.clone();
+                src_base.array_layer += rel_array_layer;
+                dst_base.array_layer += rel_array_layer;
+                hal::TextureCopy {
+                    src_base,
+                    dst_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_textures(barriers.into_iter());
+            cmd_buf_raw.transition_textures(&barriers);
             cmd_buf_raw.copy_texture_to_texture(
                 src_raw,
                 hal::TextureUses::COPY_SRC,
                 dst_raw,
-                regions,
+                &regions,
             );
         }
 
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 7fd82e8cee..cb9f62ea03 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -24,8 +24,6 @@ use crate::{
     Label,
 };
 
-use hal::Device as _;
-
 use wgt::{BufferAddress, TextureFormat};
 
 use std::{borrow::Cow, ptr::NonNull, sync::atomic::Ordering};
@@ -282,10 +280,10 @@ impl Global {
                 .map_err(DeviceError::from)?;
             std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
             if !mapping.is_coherent {
-                device.raw().flush_mapped_ranges(
-                    raw_buf,
-                    std::iter::once(offset..offset + data.len() as u64),
-                );
+                #[allow(clippy::single_range_in_vec_init)]
+                device
+                    .raw()
+                    .flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64]);
             }
             device.raw().unmap_buffer(raw_buf);
         }
@@ -391,7 +389,7 @@ impl Global {
     /// - `hal_texture` must be initialized
     pub unsafe fn create_texture_from_hal<A: HalApi>(
         &self,
-        hal_texture: A::Texture,
+        hal_texture: Box<dyn hal::DynTexture>,
         device_id: DeviceId,
         desc: &resource::TextureDescriptor,
         id_in: Option<id::TextureId>,
@@ -1995,7 +1993,7 @@ impl Global {
                 match unsafe {
                     A::surface_as_hal(surface)
                         .unwrap()
-                        .configure(device.raw(), &hal_config)
+                        .configure(device.raw().as_any().downcast_ref().unwrap(), &hal_config)
                 } {
                     Ok(()) => (),
                     Err(error) => {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 7408c184dc..1ee84be933 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -268,7 +268,7 @@ impl<A: HalApi> LifetimeTracker<A> {
     pub fn triage_submissions(
         &mut self,
         last_done: SubmissionIndex,
-        command_allocator: &crate::command::CommandAllocator<A>,
+        command_allocator: &crate::command::CommandAllocator,
     ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> {
         profiling::scope!("triage_submissions");
 
@@ -351,7 +351,7 @@ impl<A: HalApi> LifetimeTracker<A> {
     #[must_use]
     pub(crate) fn handle_mapping(
         &mut self,
-        raw: &A::Device,
+        raw: &dyn hal::DynDevice,
         snatch_guard: &SnatchGuard,
     ) -> Vec<super::BufferMapPendingClosure> {
         if self.ready_to_map.is_empty() {
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 1f890de902..c6f88b2634 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -12,13 +12,12 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::Device as _;
 use smallvec::SmallVec;
 use std::os::raw::c_char;
 use thiserror::Error;
 use wgt::{BufferAddress, DeviceLostReason, TextureFormat};
 
-use std::{iter, num::NonZeroU32};
+use std::num::NonZeroU32;
 
 pub mod any_device;
 pub(crate) mod bgl;
@@ -301,7 +300,7 @@ impl DeviceLostClosure {
 }
 
 fn map_buffer<A: HalApi>(
-    raw: &A::Device,
+    raw: &dyn hal::DynDevice,
     buffer: &Buffer<A>,
     offset: BufferAddress,
     size: BufferAddress,
@@ -315,8 +314,9 @@ fn map_buffer<A: HalApi>(
     };
 
     if !mapping.is_coherent && kind == HostMap::Read {
+        #[allow(clippy::single_range_in_vec_init)]
         unsafe {
-            raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size));
+            raw.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
         }
     }
 
@@ -350,7 +350,7 @@ fn map_buffer<A: HalApi>(
         mapped[fill_range].fill(0);
 
         if !mapping.is_coherent && kind == HostMap::Read {
-            unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) };
+            unsafe { raw.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
         }
     }
 
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 27f13e2f46..fbcbbdcbed 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -25,7 +25,6 @@ use crate::{
     FastHashMap, SubmissionIndex,
 };
 
-use hal::{CommandEncoder as _, Device as _, Queue as _};
 use smallvec::SmallVec;
 
 use std::{
@@ -39,20 +38,20 @@ use thiserror::Error;
 use super::Device;
 
 pub struct Queue<A: HalApi> {
-    raw: ManuallyDrop<A::Queue>,
+    raw: ManuallyDrop<Box<dyn hal::DynQueue>>,
     pub(crate) device: Arc<Device<A>>,
 }
 
 impl<A: HalApi> Queue<A> {
-    pub(crate) fn new(device: Arc<Device<A>>, raw: A::Queue) -> Self {
+    pub(crate) fn new(device: Arc<Device<A>>, raw: Box<dyn hal::DynQueue>) -> Self {
         Queue {
             raw: ManuallyDrop::new(raw),
             device,
         }
     }
 
-    pub(crate) fn raw(&self) -> &A::Queue {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynQueue {
+        self.raw.as_ref()
     }
 }
 
@@ -154,8 +153,8 @@ pub enum TempResource<A: HalApi> {
 /// [`CommandBuffer`]: hal::Api::CommandBuffer
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
 pub(crate) struct EncoderInFlight<A: HalApi> {
-    raw: A::CommandEncoder,
-    cmd_buffers: Vec<A::CommandBuffer>,
+    raw: Box<dyn hal::DynCommandEncoder>,
+    cmd_buffers: Vec<Box<dyn hal::DynCommandBuffer>>,
     pub(crate) trackers: Tracker<A>,
 
     /// These are the buffers that have been tracked by `PendingWrites`.
@@ -169,8 +168,8 @@ impl<A: HalApi> EncoderInFlight<A> {
     ///
     /// Return the command encoder, fully reset and ready to be
     /// reused.
-    pub(crate) unsafe fn land(mut self) -> A::CommandEncoder {
-        unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) };
+    pub(crate) unsafe fn land(mut self) -> Box<dyn hal::DynCommandEncoder> {
+        unsafe { self.raw.reset_all(self.cmd_buffers) };
         {
             // This involves actually decrementing the ref count of all command buffer
             // resources, so can be _very_ expensive.
@@ -205,7 +204,7 @@ impl<A: HalApi> EncoderInFlight<A> {
 /// All uses of [`StagingBuffer`]s end up here.
 #[derive(Debug)]
 pub(crate) struct PendingWrites<A: HalApi> {
-    pub command_encoder: A::CommandEncoder,
+    pub command_encoder: Box<dyn hal::DynCommandEncoder>,
 
     /// True if `command_encoder` is in the "recording" state, as
     /// described in the docs for the [`wgpu_hal::CommandEncoder`]
@@ -220,7 +219,7 @@ pub(crate) struct PendingWrites<A: HalApi> {
 }
 
 impl<A: HalApi> PendingWrites<A> {
-    pub fn new(command_encoder: A::CommandEncoder) -> Self {
+    pub fn new(command_encoder: Box<dyn hal::DynCommandEncoder>) -> Self {
         Self {
             command_encoder,
             is_recording: false,
@@ -230,7 +229,7 @@ impl<A: HalApi> PendingWrites<A> {
         }
     }
 
-    pub fn dispose(mut self, device: &A::Device) {
+    pub fn dispose(mut self, device: &dyn hal::DynDevice) {
         unsafe {
             if self.is_recording {
                 self.command_encoder.discard_encoding();
@@ -270,9 +269,9 @@ impl<A: HalApi> PendingWrites<A> {
 
     fn pre_submit(
         &mut self,
-        command_allocator: &CommandAllocator<A>,
-        device: &A::Device,
-        queue: &A::Queue,
+        command_allocator: &CommandAllocator,
+        device: &dyn hal::DynDevice,
+        queue: &dyn hal::DynQueue,
     ) -> Result<Option<EncoderInFlight<A>>, DeviceError> {
         if self.is_recording {
             let pending_buffers = mem::take(&mut self.dst_buffers);
@@ -298,7 +297,7 @@ impl<A: HalApi> PendingWrites<A> {
         }
     }
 
-    pub fn activate(&mut self) -> &mut A::CommandEncoder {
+    pub fn activate(&mut self) -> &mut dyn hal::DynCommandEncoder {
         if !self.is_recording {
             unsafe {
                 self.command_encoder
@@ -307,7 +306,7 @@ impl<A: HalApi> PendingWrites<A> {
             }
             self.is_recording = true;
         }
-        &mut self.command_encoder
+        self.command_encoder.as_mut()
     }
 
     pub fn deactivate(&mut self) {
@@ -586,11 +585,12 @@ impl Global {
             buffer: staging_buffer.raw(),
             usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
         })
-        .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)));
+        .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)))
+        .collect::<Vec<_>>();
         let encoder = pending_writes.activate();
         unsafe {
-            encoder.transition_buffers(barriers);
-            encoder.copy_buffer_to_buffer(staging_buffer.raw(), dst_raw, iter::once(region));
+            encoder.transition_buffers(&barriers);
+            encoder.copy_buffer_to_buffer(staging_buffer.raw(), dst_raw, &[region]);
         }
 
         pending_writes.insert_buffer(&dst);
@@ -723,7 +723,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        &device.zero_buffer,
+                        device.zero_buffer.as_ref(),
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -802,24 +802,26 @@ impl Global {
 
         let staging_buffer = staging_buffer.flush();
 
-        let regions = (0..array_layer_count).map(|array_layer_offset| {
-            let mut texture_base = dst_base.clone();
-            texture_base.array_layer += array_layer_offset;
-            hal::BufferTextureCopy {
-                buffer_layout: wgt::ImageDataLayout {
-                    offset: array_layer_offset as u64
-                        * rows_per_image as u64
-                        * stage_bytes_per_row as u64,
-                    bytes_per_row: Some(stage_bytes_per_row),
-                    rows_per_image: Some(rows_per_image),
-                },
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|array_layer_offset| {
+                let mut texture_base = dst_base.clone();
+                texture_base.array_layer += array_layer_offset;
+                hal::BufferTextureCopy {
+                    buffer_layout: wgt::ImageDataLayout {
+                        offset: array_layer_offset as u64
+                            * rows_per_image as u64
+                            * stage_bytes_per_row as u64,
+                        bytes_per_row: Some(stage_bytes_per_row),
+                        rows_per_image: Some(rows_per_image),
+                    },
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
 
         {
-            let barrier = hal::BufferBarrier {
+            let buffer_barrier = hal::BufferBarrier {
                 buffer: staging_buffer.raw(),
                 usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
             };
@@ -829,10 +831,14 @@ impl Global {
                 trackers
                     .textures
                     .set_single(&dst, selector, hal::TextureUses::COPY_DST);
+            let texture_barriers = transition
+                .map(|pending| pending.into_hal(dst_raw))
+                .collect::<Vec<_>>();
+
             unsafe {
-                encoder.transition_textures(transition.map(|pending| pending.into_hal(dst_raw)));
-                encoder.transition_buffers(iter::once(barrier));
-                encoder.copy_buffer_to_texture(staging_buffer.raw(), dst_raw, regions);
+                encoder.transition_textures(&texture_barriers);
+                encoder.transition_buffers(&[buffer_barrier]);
+                encoder.copy_buffer_to_texture(staging_buffer.raw(), dst_raw, &regions);
             }
         }
 
@@ -990,7 +996,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        &device.zero_buffer,
+                        device.zero_buffer.as_ref(),
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -1185,7 +1191,7 @@ impl Global {
                         //Note: stateless trackers are not merged:
                         // device already knows these resources exist.
                         CommandBuffer::insert_barriers_from_device_tracker(
-                            &mut baked.encoder,
+                            baked.encoder.as_mut(),
                             &mut *trackers,
                             &baked.trackers,
                             &snatch_guard,
@@ -1212,9 +1218,10 @@ impl Global {
                                 .set_from_usage_scope_and_drain_transitions(
                                     &used_surface_textures,
                                     &snatch_guard,
-                                );
+                                )
+                                .collect::<Vec<_>>();
                             let present = unsafe {
-                                baked.encoder.transition_textures(texture_barriers);
+                                baked.encoder.transition_textures(&texture_barriers);
                                 baked.encoder.end_encoding().unwrap()
                             };
                             baked.list.push(present);
@@ -1262,11 +1269,12 @@ impl Global {
                         .set_from_usage_scope_and_drain_transitions(
                             &used_surface_textures,
                             &snatch_guard,
-                        );
+                        )
+                        .collect::<Vec<_>>();
                     unsafe {
                         pending_writes
                             .command_encoder
-                            .transition_textures(texture_barriers);
+                            .transition_textures(&texture_barriers);
                     };
                 }
             }
@@ -1279,16 +1287,18 @@ impl Global {
 
             let hal_command_buffers = active_executions
                 .iter()
-                .flat_map(|e| e.cmd_buffers.iter())
+                .flat_map(|e| e.cmd_buffers.iter().map(|b| b.as_ref()))
                 .collect::<Vec<_>>();
 
             {
                 let mut submit_surface_textures =
-                    SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len());
+                    SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(
+                        submit_surface_textures_owned.len(),
+                    );
 
                 for texture in submit_surface_textures_owned.values() {
                     let raw = match texture.inner.get(&snatch_guard) {
-                        Some(TextureInner::Surface { raw, .. }) => raw,
+                        Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
                         _ => unreachable!(),
                     };
                     submit_surface_textures.push(raw);
@@ -1300,7 +1310,7 @@ impl Global {
                         .submit(
                             &hal_command_buffers,
                             &submit_surface_textures,
-                            (&mut fence, submit_index),
+                            (fence.as_mut(), submit_index),
                         )
                         .map_err(DeviceError::from)?;
                 }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 104f54a40a..7801ccd059 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -36,7 +36,6 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::{CommandEncoder as _, Device as _};
 use once_cell::sync::OnceCell;
 
 use smallvec::SmallVec;
@@ -45,7 +44,6 @@ use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimensi
 
 use std::{
     borrow::Cow,
-    iter,
     mem::ManuallyDrop,
     num::NonZeroU32,
     sync::{
@@ -80,15 +78,15 @@ use super::{
 /// When locking pending_writes please check that trackers is not locked
 /// trackers should be locked only when needed for the shortest time possible
 pub struct Device<A: HalApi> {
-    raw: ManuallyDrop<A::Device>,
+    raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
     pub(crate) adapter: Arc<Adapter<A>>,
     pub(crate) queue: OnceCell<Weak<Queue<A>>>,
-    queue_to_drop: OnceCell<A::Queue>,
-    pub(crate) zero_buffer: ManuallyDrop<A::Buffer>,
+    queue_to_drop: OnceCell<Box<dyn hal::DynQueue>>,
+    pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     /// The `label` from the descriptor used to create the resource.
     label: String,
 
-    pub(crate) command_allocator: command::CommandAllocator<A>,
+    pub(crate) command_allocator: command::CommandAllocator,
 
     /// The index of the last command submission that was attempted.
     ///
@@ -112,7 +110,7 @@ pub struct Device<A: HalApi> {
 
     // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
     // `fence` lock to avoid deadlocks.
-    pub(crate) fence: RwLock<ManuallyDrop<A::Fence>>,
+    pub(crate) fence: RwLock<ManuallyDrop<Box<dyn hal::DynFence>>>,
     pub(crate) snatchable_lock: SnatchLock,
 
     /// Is this device valid? Valid is closely associated with "lose the device",
@@ -177,8 +175,8 @@ impl<A: HalApi> Drop for Device<A> {
         let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
         // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point.
         let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) };
-        pending_writes.dispose(&raw);
-        self.command_allocator.dispose(&raw);
+        pending_writes.dispose(raw.as_ref());
+        self.command_allocator.dispose(raw.as_ref());
         unsafe {
             raw.destroy_buffer(zero_buffer);
             raw.destroy_fence(fence);
@@ -197,8 +195,8 @@ pub enum CreateDeviceError {
 }
 
 impl<A: HalApi> Device<A> {
-    pub(crate) fn raw(&self) -> &A::Device {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynDevice {
+        self.raw.as_ref()
     }
     pub(crate) fn require_features(&self, feature: wgt::Features) -> Result<(), MissingFeatures> {
         if self.features.contains(feature) {
@@ -222,8 +220,8 @@ impl<A: HalApi> Device<A> {
 
 impl<A: HalApi> Device<A> {
     pub(crate) fn new(
-        raw_device: A::Device,
-        raw_queue: &A::Queue,
+        raw_device: Box<dyn hal::DynDevice>,
+        raw_queue: &dyn hal::DynQueue,
         adapter: &Arc<Adapter<A>>,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
@@ -238,7 +236,7 @@ impl<A: HalApi> Device<A> {
 
         let command_allocator = command::CommandAllocator::new();
         let pending_encoder = command_allocator
-            .acquire_encoder(&raw_device, raw_queue)
+            .acquire_encoder(raw_device.as_ref(), raw_queue)
             .map_err(|_| CreateDeviceError::OutOfMemory)?;
         let mut pending_writes = PendingWrites::<A>::new(pending_encoder);
 
@@ -257,19 +255,19 @@ impl<A: HalApi> Device<A> {
         unsafe {
             pending_writes
                 .command_encoder
-                .transition_buffers(iter::once(hal::BufferBarrier {
-                    buffer: &zero_buffer,
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer.as_ref(),
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
-                }));
+                }]);
             pending_writes
                 .command_encoder
-                .clear_buffer(&zero_buffer, 0..ZERO_BUFFER_SIZE);
+                .clear_buffer(zero_buffer.as_ref(), 0..ZERO_BUFFER_SIZE);
             pending_writes
                 .command_encoder
-                .transition_buffers(iter::once(hal::BufferBarrier {
-                    buffer: &zero_buffer,
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer.as_ref(),
                     usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
-                }));
+                }]);
         }
 
         let alignments = adapter.raw.capabilities.alignments.clone();
@@ -335,7 +333,7 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub(crate) fn release_queue(&self, queue: A::Queue) {
+    pub(crate) fn release_queue(&self, queue: Box<dyn hal::DynQueue>) {
         assert!(self.queue_to_drop.set(queue).is_ok());
     }
 
@@ -364,7 +362,6 @@ impl<A: HalApi> Device<A> {
                     resource_log!("Destroy raw {}", view.error_ident());
 
                     unsafe {
-                        use hal::Device;
                         self.raw().destroy_texture_view(raw_view);
                     }
                 }
@@ -380,7 +377,6 @@ impl<A: HalApi> Device<A> {
                     resource_log!("Destroy raw {}", bind_group.error_ident());
 
                     unsafe {
-                        use hal::Device;
                         self.raw().destroy_bind_group(raw_bind_group);
                     }
                 }
@@ -411,7 +407,7 @@ impl<A: HalApi> Device<A> {
     ///   return it to our callers.)
     pub(crate) fn maintain<'this>(
         &'this self,
-        fence: crate::lock::RwLockReadGuard<ManuallyDrop<A::Fence>>,
+        fence: crate::lock::RwLockReadGuard<ManuallyDrop<Box<dyn hal::DynFence>>>,
         maintain: wgt::Maintain<crate::SubmissionIndex>,
         snatch_guard: SnatchGuard,
     ) -> Result<(UserClosures, bool), WaitIdleError> {
@@ -440,7 +436,7 @@ impl<A: HalApi> Device<A> {
                 .load(Ordering::Acquire),
             wgt::Maintain::Poll => unsafe {
                 self.raw()
-                    .get_fence_value(&fence)
+                    .get_fence_value(fence.as_ref())
                     .map_err(DeviceError::from)?
             },
         };
@@ -449,7 +445,7 @@ impl<A: HalApi> Device<A> {
         if maintain.is_wait() {
             unsafe {
                 self.raw()
-                    .wait(&fence, submission_index, CLEANUP_WAIT_MS)
+                    .wait(fence.as_ref(), submission_index, CLEANUP_WAIT_MS)
                     .map_err(DeviceError::from)?
             };
         }
@@ -654,7 +650,7 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_texture_from_hal(
         self: &Arc<Self>,
-        hal_texture: A::Texture,
+        hal_texture: Box<dyn hal::DynTexture>,
         desc: &resource::TextureDescriptor,
     ) -> Result<Arc<Texture<A>>, resource::CreateTextureError> {
         let format_features = self
@@ -687,7 +683,7 @@ impl<A: HalApi> Device<A> {
         desc: &resource::BufferDescriptor,
     ) -> Arc<Buffer<A>> {
         let buffer = Buffer {
-            raw: Snatchable::new(hal_buffer),
+            raw: Snatchable::new(Box::new(hal_buffer)),
             device: self.clone(),
             usage: desc.usage,
             size: desc.size,
@@ -972,8 +968,10 @@ impl<A: HalApi> Device<A> {
                                 },
                             };
                             clear_views.push(ManuallyDrop::new(
-                                unsafe { self.raw().create_texture_view(&raw_texture, &desc) }
-                                    .map_err(DeviceError::from)?,
+                                unsafe {
+                                    self.raw().create_texture_view(raw_texture.as_ref(), &desc)
+                                }
+                                .map_err(DeviceError::from)?,
                             ));
                         };
                     }
@@ -1889,7 +1887,8 @@ impl<A: HalApi> Device<A> {
         used: &mut BindGroupStates<A>,
         limits: &wgt::Limits,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::BufferBinding<'a, A::Buffer>, binding_model::CreateBindGroupError> {
+    ) -> Result<hal::BufferBinding<'a, dyn hal::DynBuffer>, binding_model::CreateBindGroupError>
+    {
         use crate::binding_model::CreateBindGroupError as Error;
 
         let (binding_ty, dynamic, min_size) = match decl.ty {
@@ -2021,7 +2020,7 @@ impl<A: HalApi> Device<A> {
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
         sampler: &'a Arc<Sampler<A>>,
-    ) -> Result<&'a A::Sampler, binding_model::CreateBindGroupError> {
+    ) -> Result<&'a dyn hal::DynSampler, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
         used.samplers.insert_single(sampler.clone());
@@ -2072,7 +2071,8 @@ impl<A: HalApi> Device<A> {
         used: &mut BindGroupStates<A>,
         used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::TextureBinding<'a, A::TextureView>, binding_model::CreateBindGroupError> {
+    ) -> Result<hal::TextureBinding<'a, dyn hal::DynTextureView>, binding_model::CreateBindGroupError>
+    {
         view.same_device(self)?;
 
         let (pub_usage, internal_use) = self.texture_use_parameters(
@@ -2389,14 +2389,14 @@ impl<A: HalApi> Device<A> {
                     .unwrap();
                 match (sample_type, compat_sample_type) {
                     (Tst::Uint, Tst::Uint) |
-                    (Tst::Sint, Tst::Sint) |
-                    (Tst::Depth, Tst::Depth) |
-                    // if we expect non-filterable, accept anything float
-                    (Tst::Float { filterable: false }, Tst::Float { .. }) |
-                    // if we expect filterable, require it
-                    (Tst::Float { filterable: true }, Tst::Float { filterable: true }) |
-                    // if we expect non-filterable, also accept depth
-                    (Tst::Float { filterable: false }, Tst::Depth) => {}
+                        (Tst::Sint, Tst::Sint) |
+                        (Tst::Depth, Tst::Depth) |
+                        // if we expect non-filterable, accept anything float
+                        (Tst::Float { filterable: false }, Tst::Float { .. }) |
+                        // if we expect filterable, require it
+                        (Tst::Float { filterable: true }, Tst::Float { filterable: true }) |
+                        // if we expect non-filterable, also accept depth
+                        (Tst::Float { filterable: false }, Tst::Depth) => {}
                     // if we expect filterable, also accept Float that is defined as
                     // unfilterable if filterable feature is explicitly enabled (only hit
                     // if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is
@@ -2999,7 +2999,7 @@ impl<A: HalApi> Device<A> {
                                     break;
                                 } else {
                                     return Err(pipeline::CreateRenderPipelineError
-                            ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 });
+                                        ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 });
                                 }
                             }
                         }
@@ -3491,9 +3491,9 @@ impl<A: HalApi> Device<A> {
         submission_index: crate::SubmissionIndex,
     ) -> Result<(), DeviceError> {
         let fence = self.fence.read();
-        let last_done_index = unsafe { self.raw().get_fence_value(&fence)? };
+        let last_done_index = unsafe { self.raw().get_fence_value(fence.as_ref())? };
         if last_done_index < submission_index {
-            unsafe { self.raw().wait(&fence, submission_index, !0)? };
+            unsafe { self.raw().wait(fence.as_ref(), submission_index, !0)? };
             drop(fence);
             let closures = self
                 .lock_life()
@@ -3622,7 +3622,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer<A>) {
         let mut baked = cmd_buf.extract_baked_commands();
         unsafe {
-            baked.encoder.reset_all(baked.list.into_iter());
+            baked.encoder.reset_all(baked.list);
         }
         unsafe {
             self.raw().destroy_command_encoder(baked.encoder);
@@ -3637,7 +3637,8 @@ impl<A: HalApi> Device<A> {
             .load(Ordering::Acquire);
         if let Err(error) = unsafe {
             let fence = self.fence.read();
-            self.raw().wait(&fence, current_index, CLEANUP_WAIT_MS)
+            self.raw()
+                .wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS)
         } {
             log::error!("failed to wait for the device: {error}");
         }
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index 1357a2e423..02049a4c31 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -244,7 +244,8 @@ impl<A: HalApi> Hub<A> {
                     if let Some(device) = present.device.downcast_ref::<A>() {
                         let suf = A::surface_as_hal(surface);
                         unsafe {
-                            suf.unwrap().unconfigure(device.raw());
+                            suf.unwrap()
+                                .unconfigure(device.raw().as_any().downcast_ref().unwrap());
                         }
                     }
                 }
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 9ddbaae2d5..4c1b9960c1 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -267,7 +267,7 @@ impl<A: HalApi> Adapter<A> {
         api_log!("Adapter::create_device");
 
         if let Ok(device) = Device::new(
-            hal_device.device,
+            Box::new(hal_device.device),
             &hal_device.queue,
             self,
             desc,
@@ -275,7 +275,7 @@ impl<A: HalApi> Adapter<A> {
             instance_flags,
         ) {
             let device = Arc::new(device);
-            let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
+            let queue = Arc::new(Queue::new(device.clone(), Box::new(hal_device.queue)));
             device.set_queue(&queue);
             return Ok((device, queue));
         }
@@ -662,7 +662,7 @@ impl Global {
             if let Some(surface) = surface {
                 if let Some(device) = present.device.downcast_ref::<A>() {
                     use hal::Surface;
-                    unsafe { surface.unconfigure(device.raw()) };
+                    unsafe { surface.unconfigure(device.raw().as_any().downcast_ref().unwrap()) };
                 }
             }
         }
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 59226051e5..80929c3b87 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -47,7 +47,7 @@ pub struct ShaderModuleDescriptor<'a> {
 
 #[derive(Debug)]
 pub struct ShaderModule<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::ShaderModule>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynShaderModule>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) interface: Option<validation::Interface>,
     /// The `label` from the descriptor used to create the resource.
@@ -60,7 +60,6 @@ impl<A: HalApi> Drop for ShaderModule<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_shader_module(raw);
         }
     }
@@ -72,8 +71,8 @@ crate::impl_parent_device!(ShaderModule);
 crate::impl_storage_item!(ShaderModule);
 
 impl<A: HalApi> ShaderModule<A> {
-    pub(crate) fn raw(&self) -> &A::ShaderModule {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynShaderModule {
+        self.raw.as_ref()
     }
 
     pub(crate) fn finalize_entry_point_name(
@@ -242,7 +241,7 @@ pub enum CreateComputePipelineError {
 
 #[derive(Debug)]
 pub struct ComputePipeline<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::ComputePipeline>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynComputePipeline>>,
     pub(crate) layout: Arc<PipelineLayout<A>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) _shader_module: Arc<ShaderModule<A>>,
@@ -258,7 +257,6 @@ impl<A: HalApi> Drop for ComputePipeline<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_compute_pipeline(raw);
         }
     }
@@ -271,8 +269,8 @@ crate::impl_storage_item!(ComputePipeline);
 crate::impl_trackable!(ComputePipeline);
 
 impl<A: HalApi> ComputePipeline<A> {
-    pub(crate) fn raw(&self) -> &A::ComputePipeline {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynComputePipeline {
+        self.raw.as_ref()
     }
 }
 
@@ -301,7 +299,7 @@ impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
 
 #[derive(Debug)]
 pub struct PipelineCache<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::PipelineCache>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineCache>>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -313,7 +311,6 @@ impl<A: HalApi> Drop for PipelineCache<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_pipeline_cache(raw);
         }
     }
@@ -325,8 +322,8 @@ crate::impl_parent_device!(PipelineCache);
 crate::impl_storage_item!(PipelineCache);
 
 impl<A: HalApi> PipelineCache<A> {
-    pub(crate) fn raw(&self) -> &A::PipelineCache {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynPipelineCache {
+        self.raw.as_ref()
     }
 }
 
@@ -592,7 +589,7 @@ impl Default for VertexStep {
 
 #[derive(Debug)]
 pub struct RenderPipeline<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::RenderPipeline>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynRenderPipeline>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) layout: Arc<PipelineLayout<A>>,
     pub(crate) _shader_modules:
@@ -613,7 +610,6 @@ impl<A: HalApi> Drop for RenderPipeline<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_render_pipeline(raw);
         }
     }
@@ -626,7 +622,7 @@ crate::impl_storage_item!(RenderPipeline);
 crate::impl_trackable!(RenderPipeline);
 
 impl<A: HalApi> RenderPipeline<A> {
-    pub(crate) fn raw(&self) -> &A::RenderPipeline {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynRenderPipeline {
+        self.raw.as_ref()
     }
 }
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 3521f04388..d0f09a97f7 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -9,7 +9,7 @@ When this texture is presented, we remove it from the device tracker as well as
 extract it from the hub.
 !*/
 
-use std::{borrow::Borrow, mem::ManuallyDrop, sync::Arc};
+use std::{mem::ManuallyDrop, sync::Arc};
 
 #[cfg(feature = "trace")]
 use crate::device::trace::Action;
@@ -23,7 +23,6 @@ use crate::{
     resource::{self, Trackable},
 };
 
-use hal::{Queue as _, Surface as _};
 use thiserror::Error;
 use wgt::SurfaceStatus as Status;
 
@@ -156,9 +155,10 @@ impl Global {
 
         let suf = A::surface_as_hal(surface.as_ref());
         let (texture_id, status) = match unsafe {
+            use hal::DynSurface;
             suf.unwrap().acquire_texture(
                 Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
-                &fence,
+                fence.as_ref(),
             )
         } {
             Ok(Some(ast)) => {
@@ -195,11 +195,9 @@ impl Global {
                     range: wgt::ImageSubresourceRange::default(),
                 };
                 let clear_view = unsafe {
-                    hal::Device::create_texture_view(
-                        device.raw(),
-                        ast.texture.borrow(),
-                        &clear_view_desc,
-                    )
+                    device
+                        .raw()
+                        .create_texture_view(ast.texture.as_ref().borrow(), &clear_view_desc)
                 }
                 .map_err(DeviceError::from)?;
 
@@ -386,6 +384,7 @@ impl Global {
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
                     resource::TextureInner::Surface { raw, parent_id } => {
                         if surface_id == parent_id {
+                            use hal::DynSurface;
                             unsafe { suf.unwrap().discard_texture(raw) };
                         } else {
                             log::warn!("Surface texture is outdated");
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index f6742ba825..6a3c02ece4 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -17,14 +17,12 @@ use crate::{
     Label, LabelHelpers,
 };
 
-use hal::CommandEncoder;
 use smallvec::SmallVec;
 use thiserror::Error;
 
 use std::{
     borrow::{Borrow, Cow},
     fmt::Debug,
-    iter,
     mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
@@ -426,7 +424,7 @@ pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
 
 #[derive(Debug)]
 pub struct Buffer<A: HalApi> {
-    pub(crate) raw: Snatchable<A::Buffer>,
+    pub(crate) raw: Snatchable<Box<dyn hal::DynBuffer>>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) usage: wgt::BufferUsages,
     pub(crate) size: wgt::BufferAddress,
@@ -443,7 +441,6 @@ impl<A: HalApi> Drop for Buffer<A> {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_buffer(raw);
             }
         }
@@ -451,16 +448,17 @@ impl<A: HalApi> Drop for Buffer<A> {
 }
 
 impl<A: HalApi> Buffer<A> {
-    pub(crate) fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a A::Buffer> {
-        self.raw.get(guard)
+    pub(crate) fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynBuffer> {
+        self.raw.get(guard).map(|b| b.as_ref())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::Buffer, DestroyedResourceError> {
+    ) -> Result<&dyn hal::DynBuffer, DestroyedResourceError> {
         self.raw
             .get(guard)
+            .map(|raw| raw.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
@@ -611,8 +609,6 @@ impl<A: HalApi> Buffer<A> {
         self: &Arc<Self>,
         #[cfg(feature = "trace")] buffer_id: BufferId,
     ) -> Result<Option<BufferMapPendingClosure>, BufferAccessError> {
-        use hal::Device;
-
         let device = &self.device;
         let snatch_guard = device.snatchable_lock.read();
         let raw_buf = self.try_raw(&snatch_guard)?;
@@ -642,20 +638,18 @@ impl<A: HalApi> Buffer<A> {
                     buffer: staging_buffer.raw(),
                     usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
                 };
-                let transition_dst = hal::BufferBarrier {
+                let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
                     buffer: raw_buf,
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
                 };
                 let encoder = pending_writes.activate();
                 unsafe {
-                    encoder.transition_buffers(
-                        iter::once(transition_src).chain(iter::once(transition_dst)),
-                    );
+                    encoder.transition_buffers(&[transition_src, transition_dst]);
                     if self.size > 0 {
                         encoder.copy_buffer_to_buffer(
                             staging_buffer.raw(),
                             raw_buf,
-                            region.into_iter(),
+                            region.as_slice(),
                         );
                     }
                 }
@@ -689,7 +683,7 @@ impl<A: HalApi> Buffer<A> {
                         });
                     }
                     if !mapping.is_coherent {
-                        unsafe { device.raw().flush_mapped_ranges(raw_buf, iter::once(range)) };
+                        unsafe { device.raw().flush_mapped_ranges(raw_buf, &[range]) };
                     }
                 }
                 unsafe { device.raw().unmap_buffer(raw_buf) };
@@ -766,7 +760,7 @@ crate::impl_trackable!(Buffer);
 /// A buffer that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
 pub struct DestroyedBuffer<A: HalApi> {
-    raw: ManuallyDrop<A::Buffer>,
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     device: Arc<Device<A>>,
     label: String,
     bind_groups: Vec<Weak<BindGroup<A>>>,
@@ -790,8 +784,7 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
-            self.device.raw().destroy_buffer(raw);
+            hal::DynDevice::destroy_buffer(self.device.raw(), raw);
         }
     }
 }
@@ -822,7 +815,7 @@ unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
 pub struct StagingBuffer<A: HalApi> {
-    raw: A::Buffer,
+    raw: Box<dyn hal::DynBuffer>,
     device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
     is_coherent: bool,
@@ -831,7 +824,6 @@ pub struct StagingBuffer<A: HalApi> {
 
 impl<A: HalApi> StagingBuffer<A> {
     pub(crate) fn new(device: &Arc<Device<A>>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
-        use hal::Device;
         profiling::scope!("StagingBuffer::new");
         let stage_desc = hal::BufferDescriptor {
             label: crate::hal_label(Some("(wgpu internal) Staging"), device.instance_flags),
@@ -841,7 +833,7 @@ impl<A: HalApi> StagingBuffer<A> {
         };
 
         let raw = unsafe { device.raw().create_buffer(&stage_desc)? };
-        let mapping = unsafe { device.raw().map_buffer(&raw, 0..size.get()) }?;
+        let mapping = unsafe { device.raw().map_buffer(raw.as_ref(), 0..size.get()) }?;
 
         let staging_buffer = StagingBuffer {
             raw,
@@ -900,12 +892,14 @@ impl<A: HalApi> StagingBuffer<A> {
     }
 
     pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {
-        use hal::Device;
         let device = self.device.raw();
         if !self.is_coherent {
-            unsafe { device.flush_mapped_ranges(&self.raw, iter::once(0..self.size.get())) };
+            #[allow(clippy::single_range_in_vec_init)]
+            unsafe {
+                device.flush_mapped_ranges(self.raw.as_ref(), &[0..self.size.get()])
+            };
         }
-        unsafe { device.unmap_buffer(&self.raw) };
+        unsafe { device.unmap_buffer(self.raw.as_ref()) };
 
         let StagingBuffer {
             raw, device, size, ..
@@ -924,20 +918,19 @@ crate::impl_storage_item!(StagingBuffer);
 
 #[derive(Debug)]
 pub struct FlushedStagingBuffer<A: HalApi> {
-    raw: ManuallyDrop<A::Buffer>,
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     device: Arc<Device<A>>,
     pub(crate) size: wgt::BufferSize,
 }
 
 impl<A: HalApi> FlushedStagingBuffer<A> {
-    pub(crate) fn raw(&self) -> &A::Buffer {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
+        self.raw.as_ref()
     }
 }
 
 impl<A: HalApi> Drop for FlushedStagingBuffer<A> {
     fn drop(&mut self) {
-        use hal::Device;
         resource_log!("Destroy raw StagingBuffer");
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
@@ -948,35 +941,35 @@ impl<A: HalApi> Drop for FlushedStagingBuffer<A> {
 pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, Vec<wgt::TextureFormat>>;
 
 #[derive(Debug)]
-pub(crate) enum TextureInner<A: HalApi> {
+pub(crate) enum TextureInner {
     Native {
-        raw: A::Texture,
+        raw: Box<dyn hal::DynTexture>,
     },
     Surface {
-        raw: A::SurfaceTexture,
+        raw: Box<dyn hal::DynSurfaceTexture>,
         parent_id: SurfaceId,
     },
 }
 
-impl<A: HalApi> TextureInner<A> {
-    pub(crate) fn raw(&self) -> &A::Texture {
+impl TextureInner {
+    pub(crate) fn raw(&self) -> &dyn hal::DynTexture {
         match self {
-            Self::Native { raw } => raw,
-            Self::Surface { raw, .. } => raw.borrow(),
+            Self::Native { raw } => raw.as_ref(),
+            Self::Surface { raw, .. } => raw.as_ref().borrow(),
         }
     }
 }
 
 #[derive(Debug)]
-pub enum TextureClearMode<A: HalApi> {
+pub enum TextureClearMode {
     BufferCopy,
     // View for clear via RenderPass for every subsurface (mip/layer/slice)
     RenderPass {
-        clear_views: SmallVec<[ManuallyDrop<A::TextureView>; 1]>,
+        clear_views: SmallVec<[ManuallyDrop<Box<dyn hal::DynTextureView>>; 1]>,
         is_color: bool,
     },
     Surface {
-        clear_view: ManuallyDrop<A::TextureView>,
+        clear_view: ManuallyDrop<Box<dyn hal::DynTextureView>>,
     },
     // Texture can't be cleared, attempting to do so will cause panic.
     // (either because it is impossible for the type of texture or it is being destroyed)
@@ -985,7 +978,7 @@ pub enum TextureClearMode<A: HalApi> {
 
 #[derive(Debug)]
 pub struct Texture<A: HalApi> {
-    pub(crate) inner: Snatchable<TextureInner<A>>,
+    pub(crate) inner: Snatchable<TextureInner>,
     pub(crate) device: Arc<Device<A>>,
     pub(crate) desc: wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     pub(crate) hal_usage: hal::TextureUses,
@@ -995,7 +988,7 @@ pub struct Texture<A: HalApi> {
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) clear_mode: TextureClearMode<A>,
+    pub(crate) clear_mode: TextureClearMode,
     pub(crate) views: Mutex<Vec<Weak<TextureView<A>>>>,
     pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
 }
@@ -1003,11 +996,11 @@ pub struct Texture<A: HalApi> {
 impl<A: HalApi> Texture<A> {
     pub(crate) fn new(
         device: &Arc<Device<A>>,
-        inner: TextureInner<A>,
+        inner: TextureInner,
         hal_usage: hal::TextureUses,
         desc: &TextureDescriptor,
         format_features: wgt::TextureFormatFeatures,
-        clear_mode: TextureClearMode<A>,
+        clear_mode: TextureClearMode,
         init: bool,
     ) -> Self {
         Texture {
@@ -1055,7 +1048,6 @@ impl<A: HalApi> Texture<A> {
 
 impl<A: HalApi> Drop for Texture<A> {
     fn drop(&mut self) {
-        use hal::Device;
         match self.clear_mode {
             TextureClearMode::Surface {
                 ref mut clear_view, ..
@@ -1094,20 +1086,23 @@ impl<A: HalApi> Texture<A> {
     pub(crate) fn try_inner<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&'a TextureInner<A>, DestroyedResourceError> {
+    ) -> Result<&'a TextureInner, DestroyedResourceError> {
         self.inner
             .get(guard)
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
-    pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::Texture> {
+    pub(crate) fn raw<'a>(
+        &'a self,
+        snatch_guard: &'a SnatchGuard,
+    ) -> Option<&'a dyn hal::DynTexture> {
         Some(self.inner.get(snatch_guard)?.raw())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&'a A::Texture, DestroyedResourceError> {
+    ) -> Result<&'a dyn hal::DynTexture, DestroyedResourceError> {
         self.inner
             .get(guard)
             .map(|t| t.raw())
@@ -1115,11 +1110,11 @@ impl<A: HalApi> Texture<A> {
     }
 
     pub(crate) fn get_clear_view<'a>(
-        clear_mode: &'a TextureClearMode<A>,
+        clear_mode: &'a TextureClearMode,
         desc: &'a wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
         mip_level: u32,
         depth_or_layer: u32,
-    ) -> &'a A::TextureView {
+    ) -> &'a dyn hal::DynTextureView {
         match *clear_mode {
             TextureClearMode::BufferCopy => {
                 panic!("Given texture is cleared with buffer copies, not render passes")
@@ -1127,7 +1122,7 @@ impl<A: HalApi> Texture<A> {
             TextureClearMode::None => {
                 panic!("Given texture can't be cleared")
             }
-            TextureClearMode::Surface { ref clear_view, .. } => clear_view,
+            TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref(),
             TextureClearMode::RenderPass {
                 ref clear_views, ..
             } => {
@@ -1138,7 +1133,7 @@ impl<A: HalApi> Texture<A> {
                 } else {
                     mip_level * desc.size.depth_or_array_layers
                 } + depth_or_layer;
-                &clear_views[index as usize]
+                clear_views[index as usize].as_ref()
             }
         }
     }
@@ -1207,7 +1202,9 @@ impl Global {
 
         if let Ok(buffer) = hub.buffers.get(id) {
             let snatch_guard = buffer.device.snatchable_lock.read();
-            let hal_buffer = buffer.raw(&snatch_guard);
+            let hal_buffer = buffer
+                .raw(&snatch_guard)
+                .and_then(|b| b.as_any().downcast_ref());
             hal_buffer_callback(hal_buffer)
         } else {
             hal_buffer_callback(None)
@@ -1229,6 +1226,9 @@ impl Global {
         if let Ok(texture) = hub.textures.get(id) {
             let snatch_guard = texture.device.snatchable_lock.read();
             let hal_texture = texture.raw(&snatch_guard);
+            let hal_texture = hal_texture
+                .as_ref()
+                .and_then(|it| it.as_any().downcast_ref());
             hal_texture_callback(hal_texture)
         } else {
             hal_texture_callback(None)
@@ -1250,6 +1250,9 @@ impl Global {
         if let Ok(texture_view) = hub.texture_views.get(id) {
             let snatch_guard = texture_view.device.snatchable_lock.read();
             let hal_texture_view = texture_view.raw(&snatch_guard);
+            let hal_texture_view = hal_texture_view
+                .as_ref()
+                .and_then(|it| it.as_any().downcast_ref());
             hal_texture_view_callback(hal_texture_view)
         } else {
             hal_texture_view_callback(None)
@@ -1285,7 +1288,10 @@ impl Global {
 
         let hub = A::hub(self);
         let device = hub.devices.get(id).ok();
-        let hal_device = device.as_ref().map(|device| device.raw());
+        let hal_device = device
+            .as_ref()
+            .map(|device| device.raw())
+            .and_then(|device| device.as_any().downcast_ref());
 
         hal_device_callback(hal_device)
     }
@@ -1304,7 +1310,7 @@ impl Global {
 
         if let Ok(device) = hub.devices.get(id) {
             let fence = device.fence.read();
-            hal_fence_callback(Some(&fence))
+            hal_fence_callback(fence.as_any().downcast_ref())
         } else {
             hal_fence_callback(None)
         }
@@ -1346,7 +1352,11 @@ impl Global {
         if let Ok(cmd_buf) = hub.command_buffers.get(id.into_command_buffer_id()) {
             let mut cmd_buf_data = cmd_buf.data.lock();
             let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
-            let cmd_buf_raw = cmd_buf_data.encoder.open().ok();
+            let cmd_buf_raw = cmd_buf_data
+                .encoder
+                .open()
+                .ok()
+                .and_then(|encoder| encoder.as_any_mut().downcast_mut());
             hal_command_encoder_callback(cmd_buf_raw)
         } else {
             hal_command_encoder_callback(None)
@@ -1357,7 +1367,7 @@ impl Global {
 /// A texture that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
 pub struct DestroyedTexture<A: HalApi> {
-    raw: ManuallyDrop<A::Texture>,
+    raw: ManuallyDrop<Box<dyn hal::DynTexture>>,
     views: Vec<Weak<TextureView<A>>>,
     bind_groups: Vec<Weak<BindGroup<A>>>,
     device: Arc<Device<A>>,
@@ -1387,7 +1397,6 @@ impl<A: HalApi> Drop for DestroyedTexture<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_texture(raw);
         }
     }
@@ -1553,7 +1562,7 @@ pub enum TextureViewNotRenderableReason {
 
 #[derive(Debug)]
 pub struct TextureView<A: HalApi> {
-    pub(crate) raw: Snatchable<A::TextureView>,
+    pub(crate) raw: Snatchable<Box<dyn hal::DynTextureView>>,
     // if it's a surface texture - it's none
     pub(crate) parent: Arc<Texture<A>>,
     pub(crate) device: Arc<Device<A>>,
@@ -1573,7 +1582,6 @@ impl<A: HalApi> Drop for TextureView<A> {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_texture_view(raw);
             }
         }
@@ -1581,16 +1589,20 @@ impl<A: HalApi> Drop for TextureView<A> {
 }
 
 impl<A: HalApi> TextureView<A> {
-    pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::TextureView> {
-        self.raw.get(snatch_guard)
+    pub(crate) fn raw<'a>(
+        &'a self,
+        snatch_guard: &'a SnatchGuard,
+    ) -> Option<&'a dyn hal::DynTextureView> {
+        self.raw.get(snatch_guard).map(|it| it.as_ref())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::TextureView, DestroyedResourceError> {
+    ) -> Result<&'a dyn hal::DynTextureView, DestroyedResourceError> {
         self.raw
             .get(guard)
+            .map(|it| it.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 }
@@ -1687,7 +1699,7 @@ pub struct SamplerDescriptor<'a> {
 
 #[derive(Debug)]
 pub struct Sampler<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::Sampler>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynSampler>>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -1704,15 +1716,14 @@ impl<A: HalApi> Drop for Sampler<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_sampler(raw);
         }
     }
 }
 
 impl<A: HalApi> Sampler<A> {
-    pub(crate) fn raw(&self) -> &A::Sampler {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynSampler {
+        self.raw.as_ref()
     }
 }
 
@@ -1783,7 +1794,7 @@ pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
 
 #[derive(Debug)]
 pub struct QuerySet<A: HalApi> {
-    pub(crate) raw: ManuallyDrop<A::QuerySet>,
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynQuerySet>>,
     pub(crate) device: Arc<Device<A>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
@@ -1797,7 +1808,6 @@ impl<A: HalApi> Drop for QuerySet<A> {
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
         let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_query_set(raw);
         }
     }
@@ -1810,8 +1820,8 @@ crate::impl_storage_item!(QuerySet);
 crate::impl_trackable!(QuerySet);
 
 impl<A: HalApi> QuerySet<A> {
-    pub(crate) fn raw(&self) -> &A::QuerySet {
-        &self.raw
+    pub(crate) fn raw(&self) -> &dyn hal::DynQuerySet {
+        self.raw.as_ref()
     }
 }
 
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index 0f2bc8cef9..ea670de35a 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -284,7 +284,7 @@ impl<A: HalApi> BufferTracker<A> {
     pub fn drain_transitions<'a, 'b: 'a>(
         &'b mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A::Buffer>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
         let buffer_barriers = self.temp.drain(..).map(|pending| {
             let buf = unsafe { self.metadata.get_resource_unchecked(pending.id as _) };
             pending.into_hal(buf, snatch_guard)
@@ -557,7 +557,7 @@ impl<A: HalApi> DeviceBufferTracker<A> {
         &'a mut self,
         tracker: &'a BufferTracker<A>,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A::Buffer>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
         for index in tracker.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 4fccb24abe..a75092d8be 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -261,7 +261,7 @@ impl PendingTransition<hal::BufferUses> {
         self,
         buf: &'a resource::Buffer<A>,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> hal::BufferBarrier<'a, A::Buffer> {
+    ) -> hal::BufferBarrier<'a, dyn hal::DynBuffer> {
         let buffer = buf.raw(snatch_guard).expect("Buffer is destroyed");
         hal::BufferBarrier {
             buffer,
@@ -272,10 +272,10 @@ impl PendingTransition<hal::BufferUses> {
 
 impl PendingTransition<hal::TextureUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, T: hal::DynTexture + ?Sized>(
+    pub fn into_hal(
         self,
-        texture: &'a T,
-    ) -> hal::TextureBarrier<'a, T> {
+        texture: &dyn hal::DynTexture,
+    ) -> hal::TextureBarrier<'_, dyn hal::DynTexture> {
         // These showing up in a barrier is always a bug
         strict_assert_ne!(self.usage.start, hal::TextureUses::UNKNOWN);
         strict_assert_ne!(self.usage.end, hal::TextureUses::UNKNOWN);
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index f454c3e225..9b11527645 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -438,7 +438,7 @@ impl<A: HalApi> TextureTracker<A> {
     pub fn drain_transitions<'a>(
         &'a mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> (PendingTransitionList, Vec<Option<&'a TextureInner<A>>>) {
+    ) -> (PendingTransitionList, Vec<Option<&'a TextureInner>>) {
         let mut textures = Vec::new();
         let transitions = self
             .temp
@@ -754,7 +754,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
         &'a mut self,
         tracker: &'a TextureTracker<A>,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = TextureBarrier<'a, A::Texture>> {
+    ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
         for index in tracker.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
@@ -798,7 +798,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
         &'a mut self,
         scope: &'a TextureUsageScope<A>,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = TextureBarrier<'a, A::Texture>> {
+    ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
         for index in scope.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 06632d68dd..b787130ba4 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -143,7 +143,7 @@ impl ContextWgpuCore {
         let descriptor = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
         let (id, error) = unsafe {
             self.0
-                .create_texture_from_hal::<A>(hal_texture, device.id, &descriptor, None)
+                .create_texture_from_hal::<A>(Box::new(hal_texture), device.id, &descriptor, None)
         };
         if let Some(cause) = error {
             self.handle_error(

From 04cadfb36948333159e3b9e1ebb74866c3f79dde Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 4 Aug 2024 21:48:07 +0200
Subject: [PATCH 213/226] Core's Surface, Instance and Adapter use now dynamic
 hal types

---
 wgpu-core/src/device/global.rs   |  12 +-
 wgpu-core/src/global.rs          |  96 ++++----
 wgpu-core/src/hal_api.rs         |  77 +-----
 wgpu-core/src/hub.rs             |   7 +-
 wgpu-core/src/instance.rs        | 392 ++++++++++++++-----------------
 wgpu-core/src/present.rs         |  14 +-
 wgpu-core/src/resource.rs        |   8 +-
 wgpu-hal/src/dynamic/adapter.rs  |  13 +-
 wgpu-hal/src/dynamic/instance.rs |  13 +-
 wgpu/src/backend/wgpu_core.rs    |   9 +-
 10 files changed, 280 insertions(+), 361 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index cb9f62ea03..b50dcb9593 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -56,7 +56,7 @@ impl Global {
     ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> {
         profiling::scope!("Surface::get_capabilities");
         self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| {
-            let mut hal_caps = surface.get_capabilities(adapter)?;
+            let mut hal_caps = surface.get_capabilities::<A>(A::VARIANT, adapter)?;
 
             hal_caps.formats.sort_by_key(|f| !f.is_srgb());
 
@@ -1765,7 +1765,6 @@ impl Global {
         device_id: DeviceId,
         config: &wgt::SurfaceConfiguration<Vec<TextureFormat>>,
     ) -> Option<present::ConfigureSurfaceError> {
-        use hal::Surface as _;
         use present::ConfigureSurfaceError as E;
         profiling::scope!("surface_configure");
 
@@ -1909,7 +1908,7 @@ impl Global {
                     Err(_) => break 'error E::InvalidSurface,
                 };
 
-                let caps = match surface.get_capabilities(&device.adapter) {
+                let caps = match surface.get_capabilities::<A>(A::VARIANT, &device.adapter) {
                     Ok(caps) => caps,
                     Err(_) => break 'error E::UnsupportedQueueFamily,
                 };
@@ -1990,11 +1989,8 @@ impl Global {
                 //
                 // https://github.com/gfx-rs/wgpu/issues/4105
 
-                match unsafe {
-                    A::surface_as_hal(surface)
-                        .unwrap()
-                        .configure(device.raw().as_any().downcast_ref().unwrap(), &hal_config)
-                } {
+                let surface_raw = surface.raw(A::VARIANT).unwrap();
+                match unsafe { surface_raw.configure(device.raw(), &hal_config) } {
                     Ok(()) => (),
                     Err(error) => {
                         break 'error match error {
diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index 54dcc8111c..e4708fd4dc 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+
 use wgt::Backend;
 
 use crate::{
@@ -11,14 +13,7 @@ use crate::{
 #[derive(Debug, PartialEq, Eq)]
 pub struct GlobalReport {
     pub surfaces: RegistryReport,
-    #[cfg(vulkan)]
-    pub vulkan: Option<HubReport>,
-    #[cfg(metal)]
-    pub metal: Option<HubReport>,
-    #[cfg(dx12)]
-    pub dx12: Option<HubReport>,
-    #[cfg(gles)]
-    pub gl: Option<HubReport>,
+    pub report_per_backend: HashMap<Backend, HubReport>,
 }
 
 impl GlobalReport {
@@ -26,17 +21,7 @@ impl GlobalReport {
         &self.surfaces
     }
     pub fn hub_report(&self, backend: Backend) -> &HubReport {
-        match backend {
-            #[cfg(vulkan)]
-            Backend::Vulkan => self.vulkan.as_ref().unwrap(),
-            #[cfg(metal)]
-            Backend::Metal => self.metal.as_ref().unwrap(),
-            #[cfg(dx12)]
-            Backend::Dx12 => self.dx12.as_ref().unwrap(),
-            #[cfg(gles)]
-            Backend::Gl => self.gl.as_ref().unwrap(),
-            _ => panic!("HubReport is not supported on this backend"),
-        }
+        self.report_per_backend.get(&backend).unwrap()
     }
 }
 
@@ -61,8 +46,14 @@ impl Global {
     /// Refer to the creation of wgpu-hal Instance for every backend.
     pub unsafe fn from_hal_instance<A: HalApi>(name: &str, hal_instance: A::Instance) -> Self {
         profiling::scope!("Global::new");
+
+        let dyn_instance: Box<dyn hal::DynInstance> = Box::new(hal_instance);
         Self {
-            instance: A::create_instance_from_hal(name, hal_instance),
+            instance: Instance {
+                name: name.to_owned(),
+                instance_per_backend: std::iter::once((A::VARIANT, dyn_instance)).collect(),
+                ..Default::default()
+            },
             surfaces: Registry::without_backend(),
             hubs: Hubs::new(),
         }
@@ -72,7 +63,13 @@ impl Global {
     ///
     /// - The raw instance handle returned must not be manually destroyed.
     pub unsafe fn instance_as_hal<A: HalApi>(&self) -> Option<&A::Instance> {
-        A::instance_as_hal(&self.instance)
+        self.instance.raw(A::VARIANT).map(|instance| {
+            instance
+                .as_any()
+                .downcast_ref()
+                // This should be impossible. It would mean that backend instance and enum type are mismatching.
+                .expect("Stored instance is not of the correct type")
+        })
     }
 
     /// # Safety
@@ -88,32 +85,41 @@ impl Global {
     }
 
     pub fn generate_report(&self) -> GlobalReport {
+        let mut report_per_backend = HashMap::default();
+        let instance_per_backend = &self.instance.instance_per_backend;
+
+        #[cfg(vulkan)]
+        if instance_per_backend
+            .iter()
+            .any(|(backend, _)| backend == &Backend::Vulkan)
+        {
+            report_per_backend.insert(Backend::Vulkan, self.hubs.vulkan.generate_report());
+        };
+        #[cfg(metal)]
+        if instance_per_backend
+            .iter()
+            .any(|(backend, _)| backend == &Backend::Metal)
+        {
+            report_per_backend.insert(Backend::Metal, self.hubs.metal.generate_report());
+        };
+        #[cfg(dx12)]
+        if instance_per_backend
+            .iter()
+            .any(|(backend, _)| backend == &Backend::Dx12)
+        {
+            report_per_backend.insert(Backend::Dx12, self.hubs.dx12.generate_report());
+        };
+        #[cfg(gles)]
+        if instance_per_backend
+            .iter()
+            .any(|(backend, _)| backend == &Backend::Gl)
+        {
+            report_per_backend.insert(Backend::Gl, self.hubs.gl.generate_report());
+        };
+
         GlobalReport {
             surfaces: self.surfaces.generate_report(),
-            #[cfg(vulkan)]
-            vulkan: if self.instance.vulkan.is_some() {
-                Some(self.hubs.vulkan.generate_report())
-            } else {
-                None
-            },
-            #[cfg(metal)]
-            metal: if self.instance.metal.is_some() {
-                Some(self.hubs.metal.generate_report())
-            } else {
-                None
-            },
-            #[cfg(dx12)]
-            dx12: if self.instance.dx12.is_some() {
-                Some(self.hubs.dx12.generate_report())
-            } else {
-                None
-            },
-            #[cfg(gles)]
-            gl: if self.instance.gl.is_some() {
-                Some(self.hubs.gl.generate_report())
-            } else {
-                None
-            },
+            report_per_backend,
         }
     }
 }
diff --git a/wgpu-core/src/hal_api.rs b/wgpu-core/src/hal_api.rs
index f1a40b1cff..ebd09ffc73 100644
--- a/wgpu-core/src/hal_api.rs
+++ b/wgpu-core/src/hal_api.rs
@@ -1,116 +1,53 @@
 use wgt::{Backend, WasmNotSendSync};
 
-use crate::{
-    global::Global,
-    hub::Hub,
-    instance::{Instance, Surface},
-};
+use crate::{global::Global, hub::Hub};
 
 pub trait HalApi: hal::Api + 'static + WasmNotSendSync {
     const VARIANT: Backend;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance;
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>;
+
     fn hub(global: &Global) -> &Hub<Self>;
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface>;
 }
 
 impl HalApi for hal::api::Empty {
     const VARIANT: Backend = Backend::Empty;
-    fn create_instance_from_hal(_: &str, _: Self::Instance) -> Instance {
-        unimplemented!("called empty api")
-    }
-    fn instance_as_hal(_: &Instance) -> Option<&Self::Instance> {
-        unimplemented!("called empty api")
-    }
+
     fn hub(_: &Global) -> &Hub<Self> {
         unimplemented!("called empty api")
     }
-    fn surface_as_hal(_: &Surface) -> Option<&Self::Surface> {
-        unimplemented!("called empty api")
-    }
 }
 
 #[cfg(vulkan)]
 impl HalApi for hal::api::Vulkan {
     const VARIANT: Backend = Backend::Vulkan;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            vulkan: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.vulkan.as_ref()
-    }
+
     fn hub(global: &Global) -> &Hub<Self> {
         &global.hubs.vulkan
     }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.vulkan.as_ref()
-    }
 }
 
 #[cfg(metal)]
 impl HalApi for hal::api::Metal {
     const VARIANT: Backend = Backend::Metal;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            metal: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.metal.as_ref()
-    }
+
     fn hub(global: &Global) -> &Hub<Self> {
         &global.hubs.metal
     }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.metal.as_ref()
-    }
 }
 
 #[cfg(dx12)]
 impl HalApi for hal::api::Dx12 {
     const VARIANT: Backend = Backend::Dx12;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            dx12: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.dx12.as_ref()
-    }
+
     fn hub(global: &Global) -> &Hub<Self> {
         &global.hubs.dx12
     }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.dx12.as_ref()
-    }
 }
 
 #[cfg(gles)]
 impl HalApi for hal::api::Gles {
     const VARIANT: Backend = Backend::Gl;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        #[allow(clippy::needless_update)]
-        Instance {
-            name: name.to_owned(),
-            gl: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.gl.as_ref()
-    }
+
     fn hub(global: &Global) -> &Hub<Self> {
         &global.hubs.gl
     }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.gl.as_ref()
-    }
 }
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index 02049a4c31..3dfbe57adc 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -215,8 +215,6 @@ impl<A: HalApi> Hub<A> {
     }
 
     pub(crate) fn clear(&self, surface_guard: &Storage<Surface>) {
-        use hal::Surface;
-
         let mut devices = self.devices.write();
         for element in devices.map.iter() {
             if let Element::Occupied(ref device, _) = *element {
@@ -242,10 +240,9 @@ impl<A: HalApi> Hub<A> {
             if let Element::Occupied(ref surface, _epoch) = *element {
                 if let Some(ref mut present) = surface.presentation.lock().take() {
                     if let Some(device) = present.device.downcast_ref::<A>() {
-                        let suf = A::surface_as_hal(surface);
+                        let suf = surface.raw(A::VARIANT);
                         unsafe {
-                            suf.unwrap()
-                                .unconfigure(device.raw().as_any().downcast_ref().unwrap());
+                            suf.unwrap().unconfigure(device.raw());
                         }
                     }
                 }
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 4c1b9960c1..75f8bb4d45 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -15,12 +15,9 @@ use crate::{
 
 use wgt::{Backend, Backends, PowerPreference};
 
-use hal::{Adapter as _, Instance as _, OpenDevice};
 use thiserror::Error;
 
 pub type RequestAdapterOptions = wgt::RequestAdapterOptions<SurfaceId>;
-type HalInstance<A> = <A as hal::Api>::Instance;
-type HalSurface<A> = <A as hal::Api>::Surface;
 
 #[derive(Clone, Debug, Error)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -58,20 +55,20 @@ fn downlevel_default_limits_less_than_default_limits() {
 pub struct Instance {
     #[allow(dead_code)]
     pub name: String,
-    #[cfg(vulkan)]
-    pub vulkan: Option<HalInstance<hal::api::Vulkan>>,
-    #[cfg(metal)]
-    pub metal: Option<HalInstance<hal::api::Metal>>,
-    #[cfg(dx12)]
-    pub dx12: Option<HalInstance<hal::api::Dx12>>,
-    #[cfg(gles)]
-    pub gl: Option<HalInstance<hal::api::Gles>>,
+    /// List of instances per backend.
+    ///
+    /// The ordering in this list implies prioritization and needs to be preserved.
+    pub instance_per_backend: Vec<(Backend, Box<dyn hal::DynInstance>)>,
     pub flags: wgt::InstanceFlags,
 }
 
 impl Instance {
     pub fn new(name: &str, instance_desc: wgt::InstanceDescriptor) -> Self {
-        fn init<A: HalApi>(_: A, instance_desc: &wgt::InstanceDescriptor) -> Option<A::Instance> {
+        fn init<A: HalApi>(
+            _: A,
+            instance_desc: &wgt::InstanceDescriptor,
+            instance_per_backend: &mut Vec<(Backend, Box<dyn hal::DynInstance>)>,
+        ) {
             if instance_desc.backends.contains(A::VARIANT.into()) {
                 let hal_desc = hal::InstanceDescriptor {
                     name: "wgpu",
@@ -79,10 +76,12 @@ impl Instance {
                     dx12_shader_compiler: instance_desc.dx12_shader_compiler.clone(),
                     gles_minor_version: instance_desc.gles_minor_version,
                 };
-                match unsafe { hal::Instance::init(&hal_desc) } {
+
+                use hal::Instance as _;
+                match unsafe { A::Instance::init(&hal_desc) } {
                     Ok(instance) => {
                         log::debug!("Instance::new: created {:?} backend", A::VARIANT);
-                        Some(instance)
+                        instance_per_backend.push((A::VARIANT, Box::new(instance)));
                     }
                     Err(err) => {
                         log::debug!(
@@ -90,41 +89,43 @@ impl Instance {
                             A::VARIANT,
                             err
                         );
-                        None
                     }
                 }
             } else {
                 log::trace!("Instance::new: backend {:?} not requested", A::VARIANT);
-                None
             }
         }
 
+        let mut instance_per_backend = Vec::new();
+
+        #[cfg(vulkan)]
+        init(hal::api::Vulkan, &instance_desc, &mut instance_per_backend);
+        #[cfg(metal)]
+        init(hal::api::Metal, &instance_desc, &mut instance_per_backend);
+        #[cfg(dx12)]
+        init(hal::api::Dx12, &instance_desc, &mut instance_per_backend);
+        #[cfg(gles)]
+        init(hal::api::Gles, &instance_desc, &mut instance_per_backend);
+
         Self {
             name: name.to_string(),
-            #[cfg(vulkan)]
-            vulkan: init(hal::api::Vulkan, &instance_desc),
-            #[cfg(metal)]
-            metal: init(hal::api::Metal, &instance_desc),
-            #[cfg(dx12)]
-            dx12: init(hal::api::Dx12, &instance_desc),
-            #[cfg(gles)]
-            gl: init(hal::api::Gles, &instance_desc),
+            instance_per_backend,
             flags: instance_desc.flags,
         }
     }
+
+    pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynInstance> {
+        self.instance_per_backend
+            .iter()
+            .find_map(|(instance_backend, instance)| {
+                (*instance_backend == backend).then(|| instance.as_ref())
+            })
+    }
 }
 
 pub struct Surface {
     pub(crate) presentation: Mutex<Option<Presentation>>,
-
-    #[cfg(vulkan)]
-    pub vulkan: Option<HalSurface<hal::api::Vulkan>>,
-    #[cfg(metal)]
-    pub metal: Option<HalSurface<hal::api::Metal>>,
-    #[cfg(dx12)]
-    pub dx12: Option<HalSurface<hal::api::Dx12>>,
-    #[cfg(gles)]
-    pub gl: Option<HalSurface<hal::api::Gles>>,
+    pub surface_per_backend: HashMap<Backend, Box<dyn hal::DynSurface>>,
 }
 
 impl ResourceType for Surface {
@@ -137,34 +138,41 @@ impl crate::storage::StorageItem for Surface {
 impl Surface {
     pub fn get_capabilities<A: HalApi>(
         &self,
+        backend: Backend,
         adapter: &Adapter<A>,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
-        self.get_capabilities_with_raw(&adapter.raw)
+        self.get_capabilities_with_raw(backend, &adapter.raw)
     }
 
-    pub fn get_capabilities_with_raw<A: HalApi>(
+    pub fn get_capabilities_with_raw(
         &self,
-        adapter: &hal::ExposedAdapter<A>,
+        backend: Backend,
+        adapter: &hal::DynExposedAdapter,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
-        let suf = A::surface_as_hal(self).ok_or(GetSurfaceSupportError::Unsupported)?;
+        let suf = self
+            .raw(backend)
+            .ok_or(GetSurfaceSupportError::Unsupported)?;
         profiling::scope!("surface_capabilities");
-        let caps = unsafe {
-            adapter
-                .adapter
-                .surface_capabilities(suf)
-                .ok_or(GetSurfaceSupportError::Unsupported)?
-        };
+        let caps = unsafe { adapter.adapter.surface_capabilities(suf) }
+            .ok_or(GetSurfaceSupportError::Unsupported)?;
 
         Ok(caps)
     }
+
+    pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynSurface> {
+        self.surface_per_backend
+            .get(&backend)
+            .map(|surface| surface.as_ref())
+    }
 }
 
 pub struct Adapter<A: HalApi> {
-    pub(crate) raw: hal::ExposedAdapter<A>,
+    pub(crate) raw: hal::DynExposedAdapter,
+    _marker: std::marker::PhantomData<A>,
 }
 
 impl<A: HalApi> Adapter<A> {
-    fn new(mut raw: hal::ExposedAdapter<A>) -> Self {
+    fn new(mut raw: hal::DynExposedAdapter) -> Self {
         // WebGPU requires this offset alignment as lower bound on all adapters.
         const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32;
 
@@ -177,7 +185,10 @@ impl<A: HalApi> Adapter<A> {
             .min_storage_buffer_offset_alignment
             .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND);
 
-        Self { raw }
+        Self {
+            raw,
+            _marker: std::marker::PhantomData,
+        }
     }
 
     pub fn is_surface_supported(&self, surface: &Surface) -> bool {
@@ -185,7 +196,7 @@ impl<A: HalApi> Adapter<A> {
         //
         // This could occur if the user is running their app on Wayland but Vulkan does not support
         // VK_KHR_wayland_surface.
-        surface.get_capabilities(self).is_ok()
+        surface.get_capabilities(A::VARIANT, self).is_ok()
     }
 
     pub(crate) fn get_texture_format_features(
@@ -259,7 +270,7 @@ impl<A: HalApi> Adapter<A> {
     #[allow(clippy::type_complexity)]
     fn create_device_and_queue_from_hal(
         self: &Arc<Self>,
-        hal_device: OpenDevice<A>,
+        hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
@@ -267,15 +278,15 @@ impl<A: HalApi> Adapter<A> {
         api_log!("Adapter::create_device");
 
         if let Ok(device) = Device::new(
-            Box::new(hal_device.device),
-            &hal_device.queue,
+            hal_device.device,
+            hal_device.queue.as_ref(),
             self,
             desc,
             trace_path,
             instance_flags,
         ) {
             let device = Arc::new(device);
-            let queue = Arc::new(Queue::new(device.clone(), Box::new(hal_device.queue)));
+            let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
             device.set_queue(&queue);
             return Ok((device, queue));
         }
@@ -456,85 +467,42 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::create_surface");
 
-        fn init<A: HalApi>(
-            errors: &mut HashMap<Backend, hal::InstanceError>,
-            any_created: &mut bool,
-            backend: Backend,
-            inst: &Option<A::Instance>,
-            display_handle: raw_window_handle::RawDisplayHandle,
-            window_handle: raw_window_handle::RawWindowHandle,
-        ) -> Option<HalSurface<A>> {
-            inst.as_ref().and_then(|inst| {
-                match unsafe { inst.create_surface(display_handle, window_handle) } {
-                    Ok(raw) => {
-                        *any_created = true;
-                        Some(raw)
-                    }
-                    Err(err) => {
-                        log::debug!(
-                            "Instance::create_surface: failed to create surface for {:?}: {:?}",
-                            backend,
-                            err
-                        );
-                        errors.insert(backend, err);
-                        None
-                    }
-                }
-            })
-        }
-
         let mut errors = HashMap::default();
-        let mut any_created = false;
+        let mut surface_per_backend = HashMap::default();
 
-        let surface = Surface {
-            presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
+        for (backend, instance) in &self.instance.instance_per_backend {
+            match unsafe {
+                instance
+                    .as_ref()
+                    .create_surface(display_handle, window_handle)
+            } {
+                Ok(raw) => {
+                    surface_per_backend.insert(*backend, raw);
+                }
+                Err(err) => {
+                    log::debug!(
+                        "Instance::create_surface: failed to create surface for {:?}: {:?}",
+                        backend,
+                        err
+                    );
+                    errors.insert(*backend, err);
+                }
+            }
+        }
 
-            #[cfg(vulkan)]
-            vulkan: init::<hal::api::Vulkan>(
-                &mut errors,
-                &mut any_created,
-                Backend::Vulkan,
-                &self.instance.vulkan,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(metal)]
-            metal: init::<hal::api::Metal>(
-                &mut errors,
-                &mut any_created,
-                Backend::Metal,
-                &self.instance.metal,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(dx12)]
-            dx12: init::<hal::api::Dx12>(
-                &mut errors,
-                &mut any_created,
-                Backend::Dx12,
-                &self.instance.dx12,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(gles)]
-            gl: init::<hal::api::Gles>(
-                &mut errors,
-                &mut any_created,
-                Backend::Gl,
-                &self.instance.gl,
-                display_handle,
-                window_handle,
-            ),
-        };
+        if surface_per_backend.is_empty() {
+            Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend(
+                errors,
+            ))
+        } else {
+            let surface = Surface {
+                presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
+                surface_per_backend,
+            };
 
-        if any_created {
             #[allow(clippy::arc_with_non_send_sync)]
             let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
             Ok(id)
-        } else {
-            Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend(
-                errors,
-            ))
         }
     }
 
@@ -549,33 +517,31 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::create_surface_metal");
 
+        let instance = self
+            .instance
+            .raw(Backend::Metal)
+            .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Metal))?;
+        let instance_metal: &hal::metal::Instance = instance.as_any().downcast_ref().unwrap();
+
+        let layer = layer.cast();
+        // SAFETY: We do this cast and deref. (rather than using `metal` to get the
+        // object we want) to avoid direct coupling on the `metal` crate.
+        //
+        // To wit, this pointer…
+        //
+        // - …is properly aligned.
+        // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+        //   field.
+        // - …points to an _initialized_ `MetalLayerRef`.
+        // - …is only ever aliased via an immutable reference that lives within this
+        //   lexical scope.
+        let layer = unsafe { &*layer };
+        let raw_surface: Box<dyn hal::DynSurface> =
+            Box::new(instance_metal.create_surface_from_layer(layer));
+
         let surface = Surface {
             presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            metal: Some(self.instance.metal.as_ref().map_or(
-                Err(CreateSurfaceError::BackendNotEnabled(Backend::Metal)),
-                |inst| {
-                    let layer = layer.cast();
-                    // SAFETY: We do this cast and deref. (rather than using `metal` to get the
-                    // object we want) to avoid direct coupling on the `metal` crate.
-                    //
-                    // To wit, this pointer…
-                    //
-                    // - …is properly aligned.
-                    // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
-                    //   field.
-                    // - …points to an _initialized_ `MetalLayerRef`.
-                    // - …is only ever aliased via an immutable reference that lives within this
-                    //   lexical scope.
-                    let layer = unsafe { &*layer };
-                    Ok(inst.create_surface_from_layer(layer))
-                },
-            )?),
-            #[cfg(dx12)]
-            dx12: None,
-            #[cfg(vulkan)]
-            vulkan: None,
-            #[cfg(gles)]
-            gl: None,
+            surface_per_backend: std::iter::once((Backend::Metal, raw_surface)).collect(),
         };
 
         let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
@@ -586,22 +552,18 @@ impl Global {
     fn instance_create_surface_dx12(
         &self,
         id_in: Option<SurfaceId>,
-        create_surface_func: impl FnOnce(&HalInstance<hal::api::Dx12>) -> HalSurface<hal::api::Dx12>,
+        create_surface_func: impl FnOnce(&hal::dx12::Instance) -> hal::dx12::Surface,
     ) -> Result<SurfaceId, CreateSurfaceError> {
+        let instance = self
+            .instance
+            .raw(Backend::Dx12)
+            .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?;
+        let instance_dx12 = instance.as_any().downcast_ref().unwrap();
+        let surface: Box<dyn hal::DynSurface> = Box::new(create_surface_func(instance_dx12));
+
         let surface = Surface {
             presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            dx12: Some(create_surface_func(
-                self.instance
-                    .dx12
-                    .as_ref()
-                    .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?,
-            )),
-            #[cfg(metal)]
-            metal: None,
-            #[cfg(vulkan)]
-            vulkan: None,
-            #[cfg(gles)]
-            gl: None,
+            surface_per_backend: std::iter::once((Backend::Dx12, surface)).collect(),
         };
 
         let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
@@ -658,11 +620,10 @@ impl Global {
 
         api_log!("Surface::drop {id:?}");
 
-        fn unconfigure<A: HalApi>(surface: &Option<HalSurface<A>>, present: &Presentation) {
-            if let Some(surface) = surface {
+        fn unconfigure<A: HalApi>(surface: &Surface, present: &Presentation) {
+            if let Some(surface) = surface.raw(A::VARIANT) {
                 if let Some(device) = present.device.downcast_ref::<A>() {
-                    use hal::Surface;
-                    unsafe { surface.unconfigure(device.raw().as_any().downcast_ref().unwrap()) };
+                    unsafe { surface.unconfigure(device.raw()) };
                 }
             }
         }
@@ -672,27 +633,31 @@ impl Global {
             .expect("Surface cannot be destroyed because is still in use");
 
         if let Some(present) = surface.presentation.lock().take() {
+            // TODO(#5124): Becomes a loop once we use Arc<Device>
             #[cfg(vulkan)]
-            unconfigure::<hal::api::Vulkan>(&surface.vulkan, &present);
+            unconfigure::<hal::api::Vulkan>(&surface, &present);
             #[cfg(metal)]
-            unconfigure::<hal::api::Metal>(&surface.metal, &present);
+            unconfigure::<hal::api::Metal>(&surface, &present);
             #[cfg(dx12)]
-            unconfigure::<hal::api::Dx12>(&surface.dx12, &present);
+            unconfigure::<hal::api::Dx12>(&surface, &present);
             #[cfg(gles)]
-            unconfigure::<hal::api::Gles>(&surface.gl, &present);
+            unconfigure::<hal::api::Gles>(&surface, &present);
         }
         drop(surface)
     }
 
     fn enumerate<A: HalApi>(
         &self,
-        _: A,
-        instance: &Option<A::Instance>,
         inputs: &AdapterInputs<markers::Adapter>,
         list: &mut Vec<AdapterId>,
     ) {
-        let inst = match *instance {
-            Some(ref inst) => inst,
+        let inst = match self
+            .instance
+            .instance_per_backend
+            .iter()
+            .find(|(backend, _)| backend == &A::VARIANT)
+        {
+            Some((_, inst)) => inst.as_ref(),
             None => return,
         };
         let id_backend = match inputs.find(A::VARIANT) {
@@ -700,8 +665,8 @@ impl Global {
             None => return,
         };
 
-        profiling::scope!("enumerating", &*format!("{:?}", A::VARIANT));
-        let hub = HalApi::hub(self);
+        profiling::scope!("enumerating", &*format!("{:?}", backend));
+        let hub: &crate::hub::Hub<A> = HalApi::hub(self);
 
         let hal_adapters = unsafe { inst.enumerate_adapters(None) };
         for raw in hal_adapters {
@@ -719,23 +684,13 @@ impl Global {
         let mut adapters = Vec::new();
 
         #[cfg(vulkan)]
-        self.enumerate(
-            hal::api::Vulkan,
-            &self.instance.vulkan,
-            &inputs,
-            &mut adapters,
-        );
+        self.enumerate::<hal::vulkan::Api>(&inputs, &mut adapters);
         #[cfg(metal)]
-        self.enumerate(
-            hal::api::Metal,
-            &self.instance.metal,
-            &inputs,
-            &mut adapters,
-        );
+        self.enumerate::<hal::metal::Api>(&inputs, &mut adapters);
         #[cfg(dx12)]
-        self.enumerate(hal::api::Dx12, &self.instance.dx12, &inputs, &mut adapters);
+        self.enumerate::<hal::dx12::Api>(&inputs, &mut adapters);
         #[cfg(gles)]
-        self.enumerate(hal::api::Gles, &self.instance.gl, &inputs, &mut adapters);
+        self.enumerate::<hal::gles::Api>(&inputs, &mut adapters);
 
         adapters
     }
@@ -744,7 +699,7 @@ impl Global {
         &self,
         selected: &mut usize,
         new_id: Option<AdapterId>,
-        mut list: Vec<hal::ExposedAdapter<A>>,
+        mut list: Vec<hal::DynExposedAdapter>,
     ) -> Option<AdapterId> {
         match selected.checked_sub(list.len()) {
             Some(left) => {
@@ -752,7 +707,7 @@ impl Global {
                 None
             }
             None => {
-                let adapter = Adapter::new(list.swap_remove(*selected));
+                let adapter = Adapter::<A>::new(list.swap_remove(*selected));
                 log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
                 let id = HalApi::hub(self)
                     .adapters
@@ -771,26 +726,27 @@ impl Global {
         profiling::scope!("Instance::request_adapter");
         api_log!("Instance::request_adapter");
 
-        fn gather<A: HalApi>(
-            _: A,
-            instance: Option<&A::Instance>,
+        fn gather(
+            backend: Backend,
+            instance: &Instance,
             inputs: &AdapterInputs<markers::Adapter>,
             compatible_surface: Option<&Surface>,
             force_software: bool,
             device_types: &mut Vec<wgt::DeviceType>,
-        ) -> (Option<Id<markers::Adapter>>, Vec<hal::ExposedAdapter<A>>) {
-            let id = inputs.find(A::VARIANT);
-            match (id, instance) {
+        ) -> (Option<Id<markers::Adapter>>, Vec<hal::DynExposedAdapter>) {
+            let id = inputs.find(backend);
+            match (id, instance.raw(backend)) {
                 (Some(id), Some(inst)) => {
                     let compatible_hal_surface =
-                        compatible_surface.and_then(|surface| A::surface_as_hal(surface));
+                        compatible_surface.and_then(|surface| surface.raw(backend));
                     let mut adapters = unsafe { inst.enumerate_adapters(compatible_hal_surface) };
                     if force_software {
                         adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu);
                     }
                     if let Some(surface) = compatible_surface {
-                        adapters
-                            .retain(|exposed| surface.get_capabilities_with_raw(exposed).is_ok());
+                        adapters.retain(|exposed| {
+                            surface.get_capabilities_with_raw(backend, exposed).is_ok()
+                        });
                     }
                     device_types.extend(adapters.iter().map(|ad| ad.info.device_type));
                     (id, adapters)
@@ -812,8 +768,8 @@ impl Global {
 
         #[cfg(vulkan)]
         let (id_vulkan, adapters_vk) = gather(
-            hal::api::Vulkan,
-            self.instance.vulkan.as_ref(),
+            Backend::Vulkan,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -821,8 +777,8 @@ impl Global {
         );
         #[cfg(metal)]
         let (id_metal, adapters_metal) = gather(
-            hal::api::Metal,
-            self.instance.metal.as_ref(),
+            Backend::Metal,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -830,8 +786,8 @@ impl Global {
         );
         #[cfg(dx12)]
         let (id_dx12, adapters_dx12) = gather(
-            hal::api::Dx12,
-            self.instance.dx12.as_ref(),
+            Backend::Dx12,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -839,8 +795,8 @@ impl Global {
         );
         #[cfg(gles)]
         let (id_gl, adapters_gl) = gather(
-            hal::api::Gles,
-            self.instance.gl.as_ref(),
+            Backend::Gl,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -899,19 +855,19 @@ impl Global {
 
         let mut selected = preferred_gpu.unwrap_or(0);
         #[cfg(vulkan)]
-        if let Some(id) = self.select(&mut selected, id_vulkan, adapters_vk) {
+        if let Some(id) = self.select::<hal::api::Vulkan>(&mut selected, id_vulkan, adapters_vk) {
             return Ok(id);
         }
         #[cfg(metal)]
-        if let Some(id) = self.select(&mut selected, id_metal, adapters_metal) {
+        if let Some(id) = self.select::<hal::api::Metal>(&mut selected, id_metal, adapters_metal) {
             return Ok(id);
         }
         #[cfg(dx12)]
-        if let Some(id) = self.select(&mut selected, id_dx12, adapters_dx12) {
+        if let Some(id) = self.select::<hal::api::Dx12>(&mut selected, id_dx12, adapters_dx12) {
             return Ok(id);
         }
         #[cfg(gles)]
-        if let Some(id) = self.select(&mut selected, id_gl, adapters_gl) {
+        if let Some(id) = self.select::<hal::api::Gles>(&mut selected, id_gl, adapters_gl) {
             return Ok(id);
         }
         let _ = selected;
@@ -925,7 +881,7 @@ impl Global {
     /// `hal_adapter` must be created from this global internal instance handle.
     pub unsafe fn create_adapter_from_hal<A: HalApi>(
         &self,
-        hal_adapter: hal::ExposedAdapter<A>,
+        hal_adapter: hal::DynExposedAdapter,
         input: Option<AdapterId>,
     ) -> AdapterId {
         profiling::scope!("Instance::create_adapter_from_hal");
@@ -934,13 +890,13 @@ impl Global {
 
         let id = match A::VARIANT {
             #[cfg(vulkan)]
-            Backend::Vulkan => fid.assign(Arc::new(Adapter::new(hal_adapter))),
+            Backend::Vulkan => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
             #[cfg(metal)]
-            Backend::Metal => fid.assign(Arc::new(Adapter::new(hal_adapter))),
+            Backend::Metal => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
             #[cfg(dx12)]
-            Backend::Dx12 => fid.assign(Arc::new(Adapter::new(hal_adapter))),
+            Backend::Dx12 => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
             #[cfg(gles)]
-            Backend::Gl => fid.assign(Arc::new(Adapter::new(hal_adapter))),
+            Backend::Gl => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
             _ => unreachable!(),
         };
         resource_log!("Created Adapter {:?}", id);
@@ -1076,7 +1032,7 @@ impl Global {
     pub unsafe fn create_device_from_hal<A: HalApi>(
         &self,
         adapter_id: AdapterId,
-        hal_device: OpenDevice<A>,
+        hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
         device_id_in: Option<DeviceId>,
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index d0f09a97f7..4ac286b497 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -30,7 +30,7 @@ const FRAME_TIMEOUT_MS: u32 = 1000;
 
 #[derive(Debug)]
 pub(crate) struct Presentation {
-    pub(crate) device: AnyDevice,
+    pub(crate) device: AnyDevice, // TODO(#5124): use device: Arc<Device>
     pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
     pub(crate) acquired_texture: Option<id::TextureId>,
 }
@@ -153,10 +153,9 @@ impl Global {
 
         let fence = device.fence.read();
 
-        let suf = A::surface_as_hal(surface.as_ref());
+        let suf = surface.raw(A::VARIANT).unwrap();
         let (texture_id, status) = match unsafe {
-            use hal::DynSurface;
-            suf.unwrap().acquire_texture(
+            suf.acquire_texture(
                 Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
                 fence.as_ref(),
             )
@@ -304,7 +303,7 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let suf = A::surface_as_hal(&surface);
+                let suf = surface.raw(A::VARIANT).unwrap();
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
                     resource::TextureInner::Surface { raw, parent_id } => {
@@ -312,7 +311,7 @@ impl Global {
                             log::error!("Presented frame is from a different surface");
                             Err(hal::SurfaceError::Lost)
                         } else {
-                            unsafe { queue.raw().present(suf.unwrap(), raw) }
+                            unsafe { queue.raw().present(suf, raw) }
                         }
                     }
                     _ => unreachable!(),
@@ -379,12 +378,11 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let suf = A::surface_as_hal(&surface);
+                let suf = surface.raw(A::VARIANT);
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
                     resource::TextureInner::Surface { raw, parent_id } => {
                         if surface_id == parent_id {
-                            use hal::DynSurface;
                             unsafe { suf.unwrap().discard_texture(raw) };
                         } else {
                             log::warn!("Surface texture is outdated");
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 6a3c02ece4..e6a33fa0fb 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -1271,7 +1271,10 @@ impl Global {
 
         let hub = A::hub(self);
         let adapter = hub.adapters.get(id).ok();
-        let hal_adapter = adapter.as_ref().map(|adapter| &adapter.raw.adapter);
+        let hal_adapter = adapter
+            .as_ref()
+            .map(|adapter| &adapter.raw.adapter)
+            .and_then(|adapter| adapter.as_any().downcast_ref());
 
         hal_adapter_callback(hal_adapter)
     }
@@ -1328,7 +1331,8 @@ impl Global {
         let surface = self.surfaces.get(id).ok();
         let hal_surface = surface
             .as_ref()
-            .and_then(|surface| A::surface_as_hal(surface));
+            .and_then(|surface| surface.raw(A::VARIANT))
+            .and_then(|surface| surface.as_any().downcast_ref());
 
         hal_surface_callback(hal_surface)
     }
diff --git a/wgpu-hal/src/dynamic/adapter.rs b/wgpu-hal/src/dynamic/adapter.rs
index 7f9b63a838..aebe8ec775 100644
--- a/wgpu-hal/src/dynamic/adapter.rs
+++ b/wgpu-hal/src/dynamic/adapter.rs
@@ -1,4 +1,6 @@
-use crate::{Adapter, DeviceError, SurfaceCapabilities, TextureFormatCapabilities};
+use crate::{
+    Adapter, Api, DeviceError, OpenDevice, SurfaceCapabilities, TextureFormatCapabilities,
+};
 
 use super::{DynDevice, DynQueue, DynResource, DynResourceExt, DynSurface};
 
@@ -7,6 +9,15 @@ pub struct DynOpenDevice {
     pub queue: Box<dyn DynQueue>,
 }
 
+impl<A: Api> From<OpenDevice<A>> for DynOpenDevice {
+    fn from(open_device: OpenDevice<A>) -> Self {
+        Self {
+            device: Box::new(open_device.device),
+            queue: Box::new(open_device.queue),
+        }
+    }
+}
+
 pub trait DynAdapter: DynResource {
     unsafe fn open(
         &self,
diff --git a/wgpu-hal/src/dynamic/instance.rs b/wgpu-hal/src/dynamic/instance.rs
index 80d834544d..4e811eb0cd 100644
--- a/wgpu-hal/src/dynamic/instance.rs
+++ b/wgpu-hal/src/dynamic/instance.rs
@@ -1,7 +1,7 @@
 // Box casts are needed, alternative would be a temporaries which are more verbose and not more expressive.
 #![allow(trivial_casts)]
 
-use crate::{Capabilities, Instance, InstanceError};
+use crate::{Api, Capabilities, ExposedAdapter, Instance, InstanceError};
 
 use super::{DynAdapter, DynResource, DynResourceExt as _, DynSurface};
 
@@ -12,6 +12,17 @@ pub struct DynExposedAdapter {
     pub capabilities: Capabilities,
 }
 
+impl<A: Api> From<ExposedAdapter<A>> for DynExposedAdapter {
+    fn from(exposed_adapter: ExposedAdapter<A>) -> Self {
+        Self {
+            adapter: Box::new(exposed_adapter.adapter),
+            info: exposed_adapter.info,
+            features: exposed_adapter.features,
+            capabilities: exposed_adapter.capabilities,
+        }
+    }
+}
+
 pub trait DynInstance: DynResource {
     unsafe fn create_surface(
         &self,
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index b787130ba4..32ee37183f 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -72,7 +72,10 @@ impl ContextWgpuCore {
         &self,
         hal_adapter: hal::ExposedAdapter<A>,
     ) -> wgc::id::AdapterId {
-        unsafe { self.0.create_adapter_from_hal(hal_adapter, None) }
+        unsafe {
+            self.0
+                .create_adapter_from_hal::<A>(hal_adapter.into(), None)
+        }
     }
 
     pub unsafe fn adapter_as_hal<
@@ -109,9 +112,9 @@ impl ContextWgpuCore {
             log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
         }
         let (device_id, queue_id, error) = unsafe {
-            self.0.create_device_from_hal(
+            self.0.create_device_from_hal::<A>(
                 *adapter,
-                hal_device,
+                hal_device.into(),
                 &desc.map_label(|l| l.map(Borrowed)),
                 None,
                 None,

From 3181251577477b27e3ccb431c9a8ec3c7ad8b8ea Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sun, 4 Aug 2024 23:01:46 +0200
Subject: [PATCH 214/226] Core adapter no longer uses any generics

---
 wgpu-core/src/binding_model.rs   | 12 ++++-----
 wgpu-core/src/command/bundle.rs  |  4 +--
 wgpu-core/src/command/mod.rs     |  4 +--
 wgpu-core/src/device/global.rs   |  6 ++---
 wgpu-core/src/device/queue.rs    |  4 +--
 wgpu-core/src/device/resource.rs |  8 +++---
 wgpu-core/src/hub.rs             |  2 +-
 wgpu-core/src/instance.rs        | 45 ++++++++++++++------------------
 wgpu-core/src/pipeline.rs        | 16 ++++++------
 wgpu-core/src/resource.rs        | 36 ++++++++++++++++---------
 wgpu-core/src/storage.rs         | 12 ++++++++-
 wgpu-hal/src/dynamic/instance.rs |  7 +++++
 12 files changed, 88 insertions(+), 68 deletions(-)

diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 825a96418c..2b45302513 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -530,10 +530,10 @@ impl<A: HalApi> Drop for BindGroupLayout<A> {
     }
 }
 
-crate::impl_resource_type!(BindGroupLayout);
+crate::impl_resource_type_generic!(BindGroupLayout);
 crate::impl_labeled!(BindGroupLayout);
 crate::impl_parent_device!(BindGroupLayout);
-crate::impl_storage_item!(BindGroupLayout);
+crate::impl_storage_item_generic!(BindGroupLayout);
 
 impl<A: HalApi> BindGroupLayout<A> {
     pub(crate) fn raw(&self) -> &dyn hal::DynBindGroupLayout {
@@ -761,10 +761,10 @@ impl<A: HalApi> PipelineLayout<A> {
     }
 }
 
-crate::impl_resource_type!(PipelineLayout);
+crate::impl_resource_type_generic!(PipelineLayout);
 crate::impl_labeled!(PipelineLayout);
 crate::impl_parent_device!(PipelineLayout);
-crate::impl_storage_item!(PipelineLayout);
+crate::impl_storage_item_generic!(PipelineLayout);
 
 #[repr(C)]
 #[derive(Clone, Debug, Hash, Eq, PartialEq)]
@@ -985,10 +985,10 @@ impl<A: HalApi> BindGroup<A> {
     }
 }
 
-crate::impl_resource_type!(BindGroup);
+crate::impl_resource_type_generic!(BindGroup);
 crate::impl_labeled!(BindGroup);
 crate::impl_parent_device!(BindGroup);
-crate::impl_storage_item!(BindGroup);
+crate::impl_storage_item_generic!(BindGroup);
 crate::impl_trackable!(BindGroup);
 
 #[derive(Clone, Debug, Error)]
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 2f040e615b..a8a3528647 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -1146,10 +1146,10 @@ impl<A: HalApi> RenderBundle<A> {
     }
 }
 
-crate::impl_resource_type!(RenderBundle);
+crate::impl_resource_type_generic!(RenderBundle);
 crate::impl_labeled!(RenderBundle);
 crate::impl_parent_device!(RenderBundle);
-crate::impl_storage_item!(RenderBundle);
+crate::impl_storage_item_generic!(RenderBundle);
 crate::impl_trackable!(RenderBundle);
 
 /// A render bundle's current index buffer state.
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index d16e7f6d05..df9360e775 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -526,10 +526,10 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 }
 
-crate::impl_resource_type!(CommandBuffer);
+crate::impl_resource_type_generic!(CommandBuffer);
 crate::impl_labeled!(CommandBuffer);
 crate::impl_parent_device!(CommandBuffer);
-crate::impl_storage_item!(CommandBuffer);
+crate::impl_storage_item_generic!(CommandBuffer);
 
 /// A stream of commands for a render pass or compute pass.
 ///
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index b50dcb9593..f456a00ca2 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -56,7 +56,7 @@ impl Global {
     ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> {
         profiling::scope!("Surface::get_capabilities");
         self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| {
-            let mut hal_caps = surface.get_capabilities::<A>(A::VARIANT, adapter)?;
+            let mut hal_caps = surface.get_capabilities(adapter)?;
 
             hal_caps.formats.sort_by_key(|f| !f.is_srgb());
 
@@ -73,7 +73,7 @@ impl Global {
 
     fn fetch_adapter_and_surface<
         A: HalApi,
-        F: FnOnce(&Adapter<A>, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
+        F: FnOnce(&Adapter, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
         B,
     >(
         &self,
@@ -1908,7 +1908,7 @@ impl Global {
                     Err(_) => break 'error E::InvalidSurface,
                 };
 
-                let caps = match surface.get_capabilities::<A>(A::VARIANT, &device.adapter) {
+                let caps = match surface.get_capabilities(&device.adapter) {
                     Ok(caps) => caps,
                     Err(_) => break 'error E::UnsupportedQueueFamily,
                 };
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index fbcbbdcbed..cca59e0b1a 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -55,7 +55,7 @@ impl<A: HalApi> Queue<A> {
     }
 }
 
-crate::impl_resource_type!(Queue);
+crate::impl_resource_type_generic!(Queue);
 // TODO: https://github.com/gfx-rs/wgpu/issues/4014
 impl<A: HalApi> Labeled for Queue<A> {
     fn label(&self) -> &str {
@@ -63,7 +63,7 @@ impl<A: HalApi> Labeled for Queue<A> {
     }
 }
 crate::impl_parent_device!(Queue);
-crate::impl_storage_item!(Queue);
+crate::impl_storage_item_generic!(Queue);
 
 impl<A: HalApi> Drop for Queue<A> {
     fn drop(&mut self) {
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 7801ccd059..a7ce999407 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -79,7 +79,7 @@ use super::{
 /// trackers should be locked only when needed for the shortest time possible
 pub struct Device<A: HalApi> {
     raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
-    pub(crate) adapter: Arc<Adapter<A>>,
+    pub(crate) adapter: Arc<Adapter>,
     pub(crate) queue: OnceCell<Weak<Queue<A>>>,
     queue_to_drop: OnceCell<Box<dyn hal::DynQueue>>,
     pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
@@ -222,7 +222,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn new(
         raw_device: Box<dyn hal::DynDevice>,
         raw_queue: &dyn hal::DynQueue,
-        adapter: &Arc<Adapter<A>>,
+        adapter: &Arc<Adapter>,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
         instance_flags: wgt::InstanceFlags,
@@ -3656,6 +3656,6 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-crate::impl_resource_type!(Device);
+crate::impl_resource_type_generic!(Device);
 crate::impl_labeled!(Device);
-crate::impl_storage_item!(Device);
+crate::impl_storage_item_generic!(Device);
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index 3dfbe57adc..a4d04f3f7c 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -170,7 +170,7 @@ impl HubReport {
 ///
 /// [`A::hub(global)`]: HalApi::hub
 pub struct Hub<A: HalApi> {
-    pub(crate) adapters: Registry<Adapter<A>>,
+    pub(crate) adapters: Registry<Adapter>,
     pub(crate) devices: Registry<Device<A>>,
     pub(crate) queues: Registry<Queue<A>>,
     pub(crate) pipeline_layouts: Registry<PipelineLayout<A>>,
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 75f8bb4d45..9c0a5fd3bb 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -136,21 +136,19 @@ impl crate::storage::StorageItem for Surface {
 }
 
 impl Surface {
-    pub fn get_capabilities<A: HalApi>(
+    pub fn get_capabilities(
         &self,
-        backend: Backend,
-        adapter: &Adapter<A>,
+        adapter: &Adapter,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
-        self.get_capabilities_with_raw(backend, &adapter.raw)
+        self.get_capabilities_with_raw(&adapter.raw)
     }
 
     pub fn get_capabilities_with_raw(
         &self,
-        backend: Backend,
         adapter: &hal::DynExposedAdapter,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
         let suf = self
-            .raw(backend)
+            .raw(adapter.backend())
             .ok_or(GetSurfaceSupportError::Unsupported)?;
         profiling::scope!("surface_capabilities");
         let caps = unsafe { adapter.adapter.surface_capabilities(suf) }
@@ -166,12 +164,11 @@ impl Surface {
     }
 }
 
-pub struct Adapter<A: HalApi> {
+pub struct Adapter {
     pub(crate) raw: hal::DynExposedAdapter,
-    _marker: std::marker::PhantomData<A>,
 }
 
-impl<A: HalApi> Adapter<A> {
+impl Adapter {
     fn new(mut raw: hal::DynExposedAdapter) -> Self {
         // WebGPU requires this offset alignment as lower bound on all adapters.
         const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32;
@@ -185,10 +182,7 @@ impl<A: HalApi> Adapter<A> {
             .min_storage_buffer_offset_alignment
             .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND);
 
-        Self {
-            raw,
-            _marker: std::marker::PhantomData,
-        }
+        Self { raw }
     }
 
     pub fn is_surface_supported(&self, surface: &Surface) -> bool {
@@ -196,7 +190,7 @@ impl<A: HalApi> Adapter<A> {
         //
         // This could occur if the user is running their app on Wayland but Vulkan does not support
         // VK_KHR_wayland_surface.
-        surface.get_capabilities(A::VARIANT, self).is_ok()
+        surface.get_capabilities(self).is_ok()
     }
 
     pub(crate) fn get_texture_format_features(
@@ -268,7 +262,7 @@ impl<A: HalApi> Adapter<A> {
     }
 
     #[allow(clippy::type_complexity)]
-    fn create_device_and_queue_from_hal(
+    fn create_device_and_queue_from_hal<A: HalApi>(
         self: &Arc<Self>,
         hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
@@ -294,7 +288,7 @@ impl<A: HalApi> Adapter<A> {
     }
 
     #[allow(clippy::type_complexity)]
-    fn create_device_and_queue(
+    fn create_device_and_queue<A: HalApi>(
         self: &Arc<Self>,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
@@ -707,9 +701,9 @@ impl Global {
                 None
             }
             None => {
-                let adapter = Adapter::<A>::new(list.swap_remove(*selected));
-                log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
-                let id = HalApi::hub(self)
+                let adapter = Adapter::new(list.swap_remove(*selected));
+                log::info!("Adapter {:?}", adapter.raw.info);
+                let id = A::hub(self)
                     .adapters
                     .prepare(new_id)
                     .assign(Arc::new(adapter));
@@ -744,9 +738,8 @@ impl Global {
                         adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu);
                     }
                     if let Some(surface) = compatible_surface {
-                        adapters.retain(|exposed| {
-                            surface.get_capabilities_with_raw(backend, exposed).is_ok()
-                        });
+                        adapters
+                            .retain(|exposed| surface.get_capabilities_with_raw(exposed).is_ok());
                     }
                     device_types.extend(adapters.iter().map(|ad| ad.info.device_type));
                     (id, adapters)
@@ -890,13 +883,13 @@ impl Global {
 
         let id = match A::VARIANT {
             #[cfg(vulkan)]
-            Backend::Vulkan => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
+            Backend::Vulkan => fid.assign(Arc::new(Adapter::new(hal_adapter))),
             #[cfg(metal)]
-            Backend::Metal => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
+            Backend::Metal => fid.assign(Arc::new(Adapter::new(hal_adapter))),
             #[cfg(dx12)]
-            Backend::Dx12 => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
+            Backend::Dx12 => fid.assign(Arc::new(Adapter::new(hal_adapter))),
             #[cfg(gles)]
-            Backend::Gl => fid.assign(Arc::new(Adapter::<A>::new(hal_adapter))),
+            Backend::Gl => fid.assign(Arc::new(Adapter::new(hal_adapter))),
             _ => unreachable!(),
         };
         resource_log!("Created Adapter {:?}", id);
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 80929c3b87..7e58962dbc 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -65,10 +65,10 @@ impl<A: HalApi> Drop for ShaderModule<A> {
     }
 }
 
-crate::impl_resource_type!(ShaderModule);
+crate::impl_resource_type_generic!(ShaderModule);
 crate::impl_labeled!(ShaderModule);
 crate::impl_parent_device!(ShaderModule);
-crate::impl_storage_item!(ShaderModule);
+crate::impl_storage_item_generic!(ShaderModule);
 
 impl<A: HalApi> ShaderModule<A> {
     pub(crate) fn raw(&self) -> &dyn hal::DynShaderModule {
@@ -262,10 +262,10 @@ impl<A: HalApi> Drop for ComputePipeline<A> {
     }
 }
 
-crate::impl_resource_type!(ComputePipeline);
+crate::impl_resource_type_generic!(ComputePipeline);
 crate::impl_labeled!(ComputePipeline);
 crate::impl_parent_device!(ComputePipeline);
-crate::impl_storage_item!(ComputePipeline);
+crate::impl_storage_item_generic!(ComputePipeline);
 crate::impl_trackable!(ComputePipeline);
 
 impl<A: HalApi> ComputePipeline<A> {
@@ -316,10 +316,10 @@ impl<A: HalApi> Drop for PipelineCache<A> {
     }
 }
 
-crate::impl_resource_type!(PipelineCache);
+crate::impl_resource_type_generic!(PipelineCache);
 crate::impl_labeled!(PipelineCache);
 crate::impl_parent_device!(PipelineCache);
-crate::impl_storage_item!(PipelineCache);
+crate::impl_storage_item_generic!(PipelineCache);
 
 impl<A: HalApi> PipelineCache<A> {
     pub(crate) fn raw(&self) -> &dyn hal::DynPipelineCache {
@@ -615,10 +615,10 @@ impl<A: HalApi> Drop for RenderPipeline<A> {
     }
 }
 
-crate::impl_resource_type!(RenderPipeline);
+crate::impl_resource_type_generic!(RenderPipeline);
 crate::impl_labeled!(RenderPipeline);
 crate::impl_parent_device!(RenderPipeline);
-crate::impl_storage_item!(RenderPipeline);
+crate::impl_storage_item_generic!(RenderPipeline);
 crate::impl_trackable!(RenderPipeline);
 
 impl<A: HalApi> RenderPipeline<A> {
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index e6a33fa0fb..2b06799a24 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -135,8 +135,9 @@ pub(crate) trait ResourceType {
     const TYPE: &'static str;
 }
 
+// TODO(#5124): Remove the typed version.
 #[macro_export]
-macro_rules! impl_resource_type {
+macro_rules! impl_resource_type_generic {
     ($ty:ident) => {
         impl<A: HalApi> $crate::resource::ResourceType for $ty<A> {
             const TYPE: &'static str = stringify!($ty);
@@ -144,6 +145,15 @@ macro_rules! impl_resource_type {
     };
 }
 
+#[macro_export]
+macro_rules! impl_resource_type {
+    ($ty:ident) => {
+        impl $crate::resource::ResourceType for $ty {
+            const TYPE: &'static str = stringify!($ty);
+        }
+    };
+}
+
 pub(crate) trait Labeled: ResourceType {
     /// Returns a string identifying this resource for logging and errors.
     ///
@@ -751,10 +761,10 @@ pub enum CreateBufferError {
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
-crate::impl_resource_type!(Buffer);
+crate::impl_resource_type_generic!(Buffer);
 crate::impl_labeled!(Buffer);
 crate::impl_parent_device!(Buffer);
-crate::impl_storage_item!(Buffer);
+crate::impl_storage_item_generic!(Buffer);
 crate::impl_trackable!(Buffer);
 
 /// A buffer that has been marked as destroyed and is staged for actual deletion soon.
@@ -913,8 +923,8 @@ impl<A: HalApi> StagingBuffer<A> {
     }
 }
 
-crate::impl_resource_type!(StagingBuffer);
-crate::impl_storage_item!(StagingBuffer);
+crate::impl_resource_type_generic!(StagingBuffer);
+crate::impl_storage_item_generic!(StagingBuffer);
 
 #[derive(Debug)]
 pub struct FlushedStagingBuffer<A: HalApi> {
@@ -1498,10 +1508,10 @@ pub enum CreateTextureError {
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
-crate::impl_resource_type!(Texture);
+crate::impl_resource_type_generic!(Texture);
 crate::impl_labeled!(Texture);
 crate::impl_parent_device!(Texture);
-crate::impl_storage_item!(Texture);
+crate::impl_storage_item_generic!(Texture);
 crate::impl_trackable!(Texture);
 
 impl<A: HalApi> Borrow<TextureSelector> for Texture<A> {
@@ -1666,10 +1676,10 @@ pub enum CreateTextureViewError {
 #[non_exhaustive]
 pub enum TextureViewDestroyError {}
 
-crate::impl_resource_type!(TextureView);
+crate::impl_resource_type_generic!(TextureView);
 crate::impl_labeled!(TextureView);
 crate::impl_parent_device!(TextureView);
-crate::impl_storage_item!(TextureView);
+crate::impl_storage_item_generic!(TextureView);
 crate::impl_trackable!(TextureView);
 
 /// Describes a [`Sampler`]
@@ -1775,10 +1785,10 @@ pub enum CreateSamplerError {
     MissingFeatures(#[from] MissingFeatures),
 }
 
-crate::impl_resource_type!(Sampler);
+crate::impl_resource_type_generic!(Sampler);
 crate::impl_labeled!(Sampler);
 crate::impl_parent_device!(Sampler);
-crate::impl_storage_item!(Sampler);
+crate::impl_storage_item_generic!(Sampler);
 crate::impl_trackable!(Sampler);
 
 #[derive(Clone, Debug, Error)]
@@ -1817,10 +1827,10 @@ impl<A: HalApi> Drop for QuerySet<A> {
     }
 }
 
-crate::impl_resource_type!(QuerySet);
+crate::impl_resource_type_generic!(QuerySet);
 crate::impl_labeled!(QuerySet);
 crate::impl_parent_device!(QuerySet);
-crate::impl_storage_item!(QuerySet);
+crate::impl_storage_item_generic!(QuerySet);
 crate::impl_trackable!(QuerySet);
 
 impl<A: HalApi> QuerySet<A> {
diff --git a/wgpu-core/src/storage.rs b/wgpu-core/src/storage.rs
index fda9cbd036..0adcf51abd 100644
--- a/wgpu-core/src/storage.rs
+++ b/wgpu-core/src/storage.rs
@@ -28,8 +28,9 @@ pub(crate) trait StorageItem: ResourceType {
     type Marker: Marker;
 }
 
+// TODO(#5124): Remove the typed version.
 #[macro_export]
-macro_rules! impl_storage_item {
+macro_rules! impl_storage_item_generic {
     ($ty:ident) => {
         impl<A: HalApi> $crate::storage::StorageItem for $ty<A> {
             type Marker = $crate::id::markers::$ty;
@@ -37,6 +38,15 @@ macro_rules! impl_storage_item {
     };
 }
 
+#[macro_export]
+macro_rules! impl_storage_item {
+    ($ty:ident) => {
+        impl $crate::storage::StorageItem for $ty {
+            type Marker = $crate::id::markers::$ty;
+        }
+    };
+}
+
 /// A table of `T` values indexed by the id type `I`.
 ///
 /// `Storage` implements [`std::ops::Index`], accepting `Id` values as
diff --git a/wgpu-hal/src/dynamic/instance.rs b/wgpu-hal/src/dynamic/instance.rs
index 4e811eb0cd..6bac974b17 100644
--- a/wgpu-hal/src/dynamic/instance.rs
+++ b/wgpu-hal/src/dynamic/instance.rs
@@ -12,6 +12,13 @@ pub struct DynExposedAdapter {
     pub capabilities: Capabilities,
 }
 
+impl DynExposedAdapter {
+    /// Returns the backend this adapter is using.
+    pub fn backend(&self) -> wgt::Backend {
+        self.info.backend
+    }
+}
+
 impl<A: Api> From<ExposedAdapter<A>> for DynExposedAdapter {
     fn from(exposed_adapter: ExposedAdapter<A>) -> Self {
         Self {

From 24498f04d48033b093dd0b69a9c41b295610f1af Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 5 Aug 2024 00:32:03 +0200
Subject: [PATCH 215/226] The second unraveling: hub and all types on it are
 generic free! gfx_select macros are empty husks now that are waiting to be
 removed

---
 deno_webgpu/lib.rs                        |  20 +-
 deno_webgpu/surface.rs                    |  14 +-
 player/src/lib.rs                         | 111 ++++---
 tests/tests/mem_leaks.rs                  |  32 +-
 wgpu-core/src/binding_model.rs            |  89 +++--
 wgpu-core/src/command/bind.rs             |  59 ++--
 wgpu-core/src/command/bundle.rs           | 149 +++++----
 wgpu-core/src/command/clear.rs            |  21 +-
 wgpu-core/src/command/compute.rs          | 203 ++++++------
 wgpu-core/src/command/compute_command.rs  |  23 +-
 wgpu-core/src/command/dyn_compute_pass.rs |   4 +-
 wgpu-core/src/command/dyn_render_pass.rs  |   4 +-
 wgpu-core/src/command/memory_init.rs      |  52 ++-
 wgpu-core/src/command/mod.rs              |  76 ++---
 wgpu-core/src/command/query.rs            |  53 ++-
 wgpu-core/src/command/render.rs           | 377 +++++++++++-----------
 wgpu-core/src/command/render_command.rs   |  33 +-
 wgpu-core/src/command/timestamp_writes.rs |   6 +-
 wgpu-core/src/command/transfer.rs         |  51 ++-
 wgpu-core/src/device/any_device.rs        | 102 ------
 wgpu-core/src/device/global.rs            | 370 ++++++++++-----------
 wgpu-core/src/device/life.rs              |  40 ++-
 wgpu-core/src/device/mod.rs               |  20 +-
 wgpu-core/src/device/queue.rs             | 122 ++++---
 wgpu-core/src/device/resource.rs          | 125 +++----
 wgpu-core/src/global.rs                   |  79 +----
 wgpu-core/src/hal_api.rs                  |  24 --
 wgpu-core/src/hub.rs                      | 120 +++----
 wgpu-core/src/init_tracker/buffer.rs      |  18 +-
 wgpu-core/src/init_tracker/texture.rs     |  12 +-
 wgpu-core/src/instance.rs                 | 224 ++++++-------
 wgpu-core/src/lib.rs                      |  11 +-
 wgpu-core/src/pipeline.rs                 |  88 +++--
 wgpu-core/src/present.rs                  |  47 +--
 wgpu-core/src/registry.rs                 |  22 +-
 wgpu-core/src/resource.rs                 | 178 +++++-----
 wgpu-core/src/storage.rs                  |  10 -
 wgpu-core/src/track/buffer.rs             |  67 ++--
 wgpu-core/src/track/mod.rs                | 109 ++++---
 wgpu-core/src/track/texture.rs            |  83 +++--
 wgpu/src/api/surface_texture.rs           |   2 -
 wgpu/src/backend/webgpu.rs                |   8 +-
 wgpu/src/backend/wgpu_core.rs             |  31 +-
 wgpu/src/context.rs                       |  22 +-
 44 files changed, 1465 insertions(+), 1846 deletions(-)
 delete mode 100644 wgpu-core/src/device/any_device.rs

diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index c1822ee2bc..86120be713 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -23,6 +23,7 @@ pub const UNSTABLE_FEATURE_NAME: &str = "webgpu";
 
 #[macro_use]
 mod macros {
+    // TODO(#5124): remove this macro.
     macro_rules! gfx_select {
     ($id:expr => $p0:ident.$p1:tt.$method:ident $params:tt) => {
       gfx_select!($id => {$p0.$p1}, $method $params)
@@ -33,24 +34,7 @@ mod macros {
     };
 
     ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-      match $id.backend() {
-        #[cfg(any(
-            all(not(target_arch = "wasm32"), not(target_os = "ios"), not(target_os = "macos")),
-            feature = "vulkan-portability"
-        ))]
-        wgpu_types::Backend::Vulkan => $($c)*.$method::<wgpu_core::api::Vulkan> $params,
-        #[cfg(all(not(target_arch = "wasm32"), any(target_os = "ios", target_os = "macos")))]
-        wgpu_types::Backend::Metal => $($c)*.$method::<wgpu_core::api::Metal> $params,
-        #[cfg(all(not(target_arch = "wasm32"), windows))]
-        wgpu_types::Backend::Dx12 => $($c)*.$method::<wgpu_core::api::Dx12> $params,
-        #[cfg(any(
-            all(unix, not(target_os = "macos"), not(target_os = "ios")),
-            feature = "angle",
-            target_arch = "wasm32"
-        ))]
-        wgpu_types::Backend::Gl => $($c)*.$method::<wgpu_core::api::Gles> $params,
-        other => panic!("Unexpected backend {:?}", other),
-      }
+        $($c)*.$method $params
     };
   }
 
diff --git a/deno_webgpu/surface.rs b/deno_webgpu/surface.rs
index a8b984eefe..9d9ba0d573 100644
--- a/deno_webgpu/surface.rs
+++ b/deno_webgpu/surface.rs
@@ -72,14 +72,10 @@ pub fn op_webgpu_surface_configure(
 #[serde]
 pub fn op_webgpu_surface_get_current_texture(
     state: &mut OpState,
-    #[smi] device_rid: ResourceId,
+    #[smi] _device_rid: ResourceId,
     #[smi] surface_rid: ResourceId,
 ) -> Result<WebGpuResult, AnyError> {
     let instance = state.borrow::<super::Instance>();
-    let device_resource = state
-        .resource_table
-        .get::<super::WebGpuDevice>(device_rid)?;
-    let device = device_resource.1;
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
@@ -102,18 +98,14 @@ pub fn op_webgpu_surface_get_current_texture(
 #[op2(fast)]
 pub fn op_webgpu_surface_present(
     state: &mut OpState,
-    #[smi] device_rid: ResourceId,
+    #[smi] _device_rid: ResourceId,
     #[smi] surface_rid: ResourceId,
 ) -> Result<(), AnyError> {
     let instance = state.borrow::<super::Instance>();
-    let device_resource = state
-        .resource_table
-        .get::<super::WebGpuDevice>(device_rid)?;
-    let device = device_resource.1;
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
-    let _ = gfx_select!(device => instance.surface_present(surface))?;
+    instance.surface_present(surface)?;
 
     Ok(())
 }
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 5efeff1537..8ea4e775bd 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -8,12 +8,12 @@ use wgc::device::trace;
 use std::{borrow::Cow, fs, path::Path};
 
 pub trait GlobalPlay {
-    fn encode_commands<A: wgc::hal_api::HalApi>(
+    fn encode_commands(
         &self,
         encoder: wgc::id::CommandEncoderId,
         commands: Vec<trace::Command>,
     ) -> wgc::id::CommandBufferId;
-    fn process<A: wgc::hal_api::HalApi>(
+    fn process(
         &self,
         device: wgc::id::DeviceId,
         queue: wgc::id::QueueId,
@@ -24,7 +24,7 @@ pub trait GlobalPlay {
 }
 
 impl GlobalPlay for wgc::global::Global {
-    fn encode_commands<A: wgc::hal_api::HalApi>(
+    fn encode_commands(
         &self,
         encoder: wgc::id::CommandEncoderId,
         commands: Vec<trace::Command>,
@@ -38,33 +38,33 @@ impl GlobalPlay for wgc::global::Global {
                     dst_offset,
                     size,
                 } => self
-                    .command_encoder_copy_buffer_to_buffer::<A>(
+                    .command_encoder_copy_buffer_to_buffer(
                         encoder, src, src_offset, dst, dst_offset, size,
                     )
                     .unwrap(),
                 trace::Command::CopyBufferToTexture { src, dst, size } => self
-                    .command_encoder_copy_buffer_to_texture::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_buffer_to_texture(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::CopyTextureToBuffer { src, dst, size } => self
-                    .command_encoder_copy_texture_to_buffer::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_texture_to_buffer(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::CopyTextureToTexture { src, dst, size } => self
-                    .command_encoder_copy_texture_to_texture::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_texture_to_texture(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::ClearBuffer { dst, offset, size } => self
-                    .command_encoder_clear_buffer::<A>(encoder, dst, offset, size)
+                    .command_encoder_clear_buffer(encoder, dst, offset, size)
                     .unwrap(),
                 trace::Command::ClearTexture {
                     dst,
                     subresource_range,
                 } => self
-                    .command_encoder_clear_texture::<A>(encoder, dst, &subresource_range)
+                    .command_encoder_clear_texture(encoder, dst, &subresource_range)
                     .unwrap(),
                 trace::Command::WriteTimestamp {
                     query_set_id,
                     query_index,
                 } => self
-                    .command_encoder_write_timestamp::<A>(encoder, query_set_id, query_index)
+                    .command_encoder_write_timestamp(encoder, query_set_id, query_index)
                     .unwrap(),
                 trace::Command::ResolveQuerySet {
                     query_set_id,
@@ -73,7 +73,7 @@ impl GlobalPlay for wgc::global::Global {
                     destination,
                     destination_offset,
                 } => self
-                    .command_encoder_resolve_query_set::<A>(
+                    .command_encoder_resolve_query_set(
                         encoder,
                         query_set_id,
                         start_query,
@@ -83,19 +83,19 @@ impl GlobalPlay for wgc::global::Global {
                     )
                     .unwrap(),
                 trace::Command::PushDebugGroup(marker) => self
-                    .command_encoder_push_debug_group::<A>(encoder, &marker)
+                    .command_encoder_push_debug_group(encoder, &marker)
                     .unwrap(),
                 trace::Command::PopDebugGroup => {
-                    self.command_encoder_pop_debug_group::<A>(encoder).unwrap()
+                    self.command_encoder_pop_debug_group(encoder).unwrap()
                 }
                 trace::Command::InsertDebugMarker(marker) => self
-                    .command_encoder_insert_debug_marker::<A>(encoder, &marker)
+                    .command_encoder_insert_debug_marker(encoder, &marker)
                     .unwrap(),
                 trace::Command::RunComputePass {
                     base,
                     timestamp_writes,
                 } => {
-                    self.compute_pass_end_with_unresolved_commands::<A>(
+                    self.compute_pass_end_with_unresolved_commands(
                         encoder,
                         base,
                         timestamp_writes.as_ref(),
@@ -109,7 +109,7 @@ impl GlobalPlay for wgc::global::Global {
                     timestamp_writes,
                     occlusion_query_set_id,
                 } => {
-                    self.render_pass_end_with_unresolved_commands::<A>(
+                    self.render_pass_end_with_unresolved_commands(
                         encoder,
                         base,
                         &target_colors,
@@ -121,15 +121,15 @@ impl GlobalPlay for wgc::global::Global {
                 }
             }
         }
-        let (cmd_buf, error) = self
-            .command_encoder_finish::<A>(encoder, &wgt::CommandBufferDescriptor { label: None });
+        let (cmd_buf, error) =
+            self.command_encoder_finish(encoder, &wgt::CommandBufferDescriptor { label: None });
         if let Some(e) = error {
             panic!("{e}");
         }
         cmd_buf
     }
 
-    fn process<A: wgc::hal_api::HalApi>(
+    fn process(
         &self,
         device: wgc::id::DeviceId,
         queue: wgc::id::QueueId,
@@ -150,83 +150,83 @@ impl GlobalPlay for wgc::global::Global {
                 panic!("Unexpected Surface action: winit feature is not enabled")
             }
             Action::CreateBuffer(id, desc) => {
-                let (_, error) = self.device_create_buffer::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_buffer(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::FreeBuffer(id) => {
-                self.buffer_destroy::<A>(id).unwrap();
+                self.buffer_destroy(id).unwrap();
             }
             Action::DestroyBuffer(id) => {
-                self.buffer_drop::<A>(id);
+                self.buffer_drop(id);
             }
             Action::CreateTexture(id, desc) => {
-                let (_, error) = self.device_create_texture::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_texture(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::FreeTexture(id) => {
-                self.texture_destroy::<A>(id).unwrap();
+                self.texture_destroy(id).unwrap();
             }
             Action::DestroyTexture(id) => {
-                self.texture_drop::<A>(id);
+                self.texture_drop(id);
             }
             Action::CreateTextureView {
                 id,
                 parent_id,
                 desc,
             } => {
-                let (_, error) = self.texture_create_view::<A>(parent_id, &desc, Some(id));
+                let (_, error) = self.texture_create_view(parent_id, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyTextureView(id) => {
-                self.texture_view_drop::<A>(id).unwrap();
+                self.texture_view_drop(id).unwrap();
             }
             Action::CreateSampler(id, desc) => {
-                let (_, error) = self.device_create_sampler::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_sampler(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroySampler(id) => {
-                self.sampler_drop::<A>(id);
+                self.sampler_drop(id);
             }
             Action::GetSurfaceTexture { id, parent_id } => {
-                self.surface_get_current_texture::<A>(parent_id, Some(id))
+                self.surface_get_current_texture(parent_id, Some(id))
                     .unwrap()
                     .texture_id
                     .unwrap();
             }
             Action::CreateBindGroupLayout(id, desc) => {
-                let (_, error) = self.device_create_bind_group_layout::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_bind_group_layout(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyBindGroupLayout(id) => {
-                self.bind_group_layout_drop::<A>(id);
+                self.bind_group_layout_drop(id);
             }
             Action::CreatePipelineLayout(id, desc) => {
-                let (_, error) = self.device_create_pipeline_layout::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_pipeline_layout(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyPipelineLayout(id) => {
-                self.pipeline_layout_drop::<A>(id);
+                self.pipeline_layout_drop(id);
             }
             Action::CreateBindGroup(id, desc) => {
-                let (_, error) = self.device_create_bind_group::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_bind_group(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyBindGroup(id) => {
-                self.bind_group_drop::<A>(id);
+                self.bind_group_drop(id);
             }
             Action::CreateShaderModule { id, desc, data } => {
                 log::debug!("Creating shader from {}", data);
@@ -239,14 +239,13 @@ impl GlobalPlay for wgc::global::Global {
                 } else {
                     panic!("Unknown shader {}", data);
                 };
-                let (_, error) =
-                    self.device_create_shader_module::<A>(device, &desc, source, Some(id));
+                let (_, error) = self.device_create_shader_module(device, &desc, source, Some(id));
                 if let Some(e) = error {
                     println!("shader compilation error:\n---{code}\n---\n{e}");
                 }
             }
             Action::DestroyShaderModule(id) => {
-                self.shader_module_drop::<A>(id);
+                self.shader_module_drop(id);
             }
             Action::CreateComputePipeline {
                 id,
@@ -261,13 +260,13 @@ impl GlobalPlay for wgc::global::Global {
                             group_ids: &ic.group_ids,
                         });
                 let (_, error) =
-                    self.device_create_compute_pipeline::<A>(device, &desc, Some(id), implicit_ids);
+                    self.device_create_compute_pipeline(device, &desc, Some(id), implicit_ids);
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyComputePipeline(id) => {
-                self.compute_pipeline_drop::<A>(id);
+                self.compute_pipeline_drop(id);
             }
             Action::CreateRenderPipeline {
                 id,
@@ -282,24 +281,24 @@ impl GlobalPlay for wgc::global::Global {
                             group_ids: &ic.group_ids,
                         });
                 let (_, error) =
-                    self.device_create_render_pipeline::<A>(device, &desc, Some(id), implicit_ids);
+                    self.device_create_render_pipeline(device, &desc, Some(id), implicit_ids);
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyRenderPipeline(id) => {
-                self.render_pipeline_drop::<A>(id);
+                self.render_pipeline_drop(id);
             }
             Action::CreatePipelineCache { id, desc } => {
-                let _ = unsafe { self.device_create_pipeline_cache::<A>(device, &desc, Some(id)) };
+                let _ = unsafe { self.device_create_pipeline_cache(device, &desc, Some(id)) };
             }
             Action::DestroyPipelineCache(id) => {
-                self.pipeline_cache_drop::<A>(id);
+                self.pipeline_cache_drop(id);
             }
             Action::CreateRenderBundle { id, desc, base } => {
                 let bundle =
                     wgc::command::RenderBundleEncoder::new(&desc, device, Some(base)).unwrap();
-                let (_, error) = self.render_bundle_encoder_finish::<A>(
+                let (_, error) = self.render_bundle_encoder_finish(
                     bundle,
                     &wgt::RenderBundleDescriptor { label: desc.label },
                     Some(id),
@@ -309,16 +308,16 @@ impl GlobalPlay for wgc::global::Global {
                 }
             }
             Action::DestroyRenderBundle(id) => {
-                self.render_bundle_drop::<A>(id);
+                self.render_bundle_drop(id);
             }
             Action::CreateQuerySet { id, desc } => {
-                let (_, error) = self.device_create_query_set::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_query_set(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyQuerySet(id) => {
-                self.query_set_drop::<A>(id);
+                self.query_set_drop(id);
             }
             Action::WriteBuffer {
                 id,
@@ -329,10 +328,10 @@ impl GlobalPlay for wgc::global::Global {
                 let bin = std::fs::read(dir.join(data)).unwrap();
                 let size = (range.end - range.start) as usize;
                 if queued {
-                    self.queue_write_buffer::<A>(queue, id, range.start, &bin)
+                    self.queue_write_buffer(queue, id, range.start, &bin)
                         .unwrap();
                 } else {
-                    self.device_set_buffer_data::<A>(id, range.start, &bin[..size])
+                    self.device_set_buffer_data(id, range.start, &bin[..size])
                         .unwrap();
                 }
             }
@@ -343,14 +342,14 @@ impl GlobalPlay for wgc::global::Global {
                 size,
             } => {
                 let bin = std::fs::read(dir.join(data)).unwrap();
-                self.queue_write_texture::<A>(queue, &to, &bin, &layout, &size)
+                self.queue_write_texture(queue, &to, &bin, &layout, &size)
                     .unwrap();
             }
             Action::Submit(_index, ref commands) if commands.is_empty() => {
-                self.queue_submit::<A>(queue, &[]).unwrap();
+                self.queue_submit(queue, &[]).unwrap();
             }
             Action::Submit(_index, commands) => {
-                let (encoder, error) = self.device_create_command_encoder::<A>(
+                let (encoder, error) = self.device_create_command_encoder(
                     device,
                     &wgt::CommandEncoderDescriptor { label: None },
                     Some(
@@ -362,8 +361,8 @@ impl GlobalPlay for wgc::global::Global {
                 if let Some(e) = error {
                     panic!("{e}");
                 }
-                let cmdbuf = self.encode_commands::<A>(encoder, commands);
-                self.queue_submit::<A>(queue, &[cmdbuf]).unwrap();
+                let cmdbuf = self.encode_commands(encoder, commands);
+                self.queue_submit(queue, &[cmdbuf]).unwrap();
             }
         }
     }
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index c0840f63fb..75de0776e8 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -13,7 +13,7 @@ async fn draw_test_with_reports(
     use wgpu::util::DeviceExt;
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.devices.num_allocated, 1);
     assert_eq!(report.queues.num_allocated, 1);
 
@@ -22,7 +22,7 @@ async fn draw_test_with_reports(
         .create_shader_module(wgpu::include_wgsl!("./vertex_indices/draw.vert.wgsl"));
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.shader_modules.num_allocated, 1);
 
     let bgl = ctx
@@ -42,7 +42,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 0);
     assert_eq!(report.bind_groups.num_allocated, 0);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -55,7 +55,7 @@ async fn draw_test_with_reports(
     });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
 
     let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
@@ -68,7 +68,7 @@ async fn draw_test_with_reports(
     });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -82,7 +82,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.pipeline_layouts.num_allocated, 1);
     assert_eq!(report.render_pipelines.num_allocated, 0);
@@ -117,7 +117,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -129,7 +129,7 @@ async fn draw_test_with_reports(
     drop(shader);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.shader_modules.num_allocated, 0);
     assert_eq!(report.shader_modules.num_kept_from_user, 0);
     assert_eq!(report.textures.num_allocated, 0);
@@ -157,7 +157,7 @@ async fn draw_test_with_reports(
     let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.texture_views.num_allocated, 1);
     assert_eq!(report.textures.num_allocated, 1);
@@ -165,7 +165,7 @@ async fn draw_test_with_reports(
     drop(texture);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.texture_views.num_allocated, 1);
     assert_eq!(report.texture_views.num_kept_from_user, 1);
@@ -177,7 +177,7 @@ async fn draw_test_with_reports(
         .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.command_buffers.num_allocated, 1);
     assert_eq!(report.buffers.num_allocated, 1);
 
@@ -197,7 +197,7 @@ async fn draw_test_with_reports(
     rpass.set_bind_group(0, &bg, &[]);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -220,7 +220,7 @@ async fn draw_test_with_reports(
     drop(buffer);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.command_buffers.num_kept_from_user, 1);
     assert_eq!(report.render_pipelines.num_kept_from_user, 0);
     assert_eq!(report.pipeline_layouts.num_kept_from_user, 0);
@@ -242,7 +242,7 @@ async fn draw_test_with_reports(
 
     // TODO: fix in https://github.com/gfx-rs/wgpu/pull/5141
     // let global_report = ctx.instance.generate_report().unwrap();
-    // let report = global_report.hub_report(ctx.adapter_info.backend);
+    // let report = global_report.hub_report();
     // assert_eq!(report.command_buffers.num_allocated, 0);
 
     ctx.async_poll(wgpu::Maintain::wait_for(submit_index))
@@ -250,7 +250,7 @@ async fn draw_test_with_reports(
         .panic_on_timeout();
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
 
     assert_eq!(report.render_pipelines.num_allocated, 0);
     assert_eq!(report.bind_groups.num_allocated, 0);
@@ -265,7 +265,7 @@ async fn draw_test_with_reports(
     drop(ctx.adapter);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
 
     assert_eq!(report.queues.num_kept_from_user, 0);
     assert_eq!(report.textures.num_kept_from_user, 0);
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 2b45302513..d8a8b32d2f 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -2,7 +2,6 @@ use crate::{
     device::{
         bgl, Device, DeviceError, MissingDownlevelFlags, MissingFeatures, SHADER_STAGE_COUNT,
     },
-    hal_api::HalApi,
     id::{BindGroupLayoutId, BufferId, SamplerId, TextureViewId},
     init_tracker::{BufferInitTrackerAction, TextureInitTrackerAction},
     pipeline::{ComputePipeline, RenderPipeline},
@@ -417,12 +416,12 @@ pub struct BindGroupEntry<'a> {
 
 /// Bindable resource and the slot to bind it to.
 #[derive(Clone, Debug)]
-pub struct ResolvedBindGroupEntry<'a, A: HalApi> {
+pub struct ResolvedBindGroupEntry<'a> {
     /// Slot for which binding provides resource. Corresponds to an entry of the same
     /// binding index in the [`BindGroupLayoutDescriptor`].
     pub binding: u32,
     /// Resource to attach to the binding
-    pub resource: ResolvedBindingResource<'a, A>,
+    pub resource: ResolvedBindingResource<'a>,
 }
 
 /// Describes a group of bindings and the resources to be bound.
@@ -441,15 +440,15 @@ pub struct BindGroupDescriptor<'a> {
 
 /// Describes a group of bindings and the resources to be bound.
 #[derive(Clone, Debug)]
-pub struct ResolvedBindGroupDescriptor<'a, A: HalApi> {
+pub struct ResolvedBindGroupDescriptor<'a> {
     /// Debug label of the bind group.
     ///
     /// This will show up in graphics debuggers for easy identification.
     pub label: Label<'a>,
     /// The [`BindGroupLayout`] that corresponds to this bind group.
-    pub layout: Arc<BindGroupLayout<A>>,
+    pub layout: Arc<BindGroupLayout>,
     /// The resources to bind to this bind group.
-    pub entries: Cow<'a, [ResolvedBindGroupEntry<'a, A>]>,
+    pub entries: Cow<'a, [ResolvedBindGroupEntry<'a>]>,
 }
 
 /// Describes a [`BindGroupLayout`].
@@ -468,13 +467,13 @@ pub struct BindGroupLayoutDescriptor<'a> {
 /// used with a specific pipeline. This constraint only happens when
 /// the BGLs have been derived from a pipeline without a layout.
 #[derive(Debug)]
-pub(crate) enum ExclusivePipeline<A: HalApi> {
+pub(crate) enum ExclusivePipeline {
     None,
-    Render(Weak<RenderPipeline<A>>),
-    Compute(Weak<ComputePipeline<A>>),
+    Render(Weak<RenderPipeline>),
+    Compute(Weak<ComputePipeline>),
 }
 
-impl<A: HalApi> std::fmt::Display for ExclusivePipeline<A> {
+impl std::fmt::Display for ExclusivePipeline {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             ExclusivePipeline::None => f.write_str("None"),
@@ -498,9 +497,9 @@ impl<A: HalApi> std::fmt::Display for ExclusivePipeline<A> {
 
 /// Bind group layout.
 #[derive(Debug)]
-pub struct BindGroupLayout<A: HalApi> {
+pub struct BindGroupLayout {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynBindGroupLayout>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) entries: bgl::EntryMap,
     /// It is very important that we know if the bind group comes from the BGL pool.
     ///
@@ -509,14 +508,14 @@ pub struct BindGroupLayout<A: HalApi> {
     /// We cannot unconditionally remove from the pool, as BGLs that don't come from the pool
     /// (derived BGLs) must not be removed.
     pub(crate) origin: bgl::Origin,
-    pub(crate) exclusive_pipeline: OnceCell<ExclusivePipeline<A>>,
+    pub(crate) exclusive_pipeline: OnceCell<ExclusivePipeline>,
     #[allow(unused)]
     pub(crate) binding_count_validator: BindingTypeMaxCountValidator,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for BindGroupLayout<A> {
+impl Drop for BindGroupLayout {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         if matches!(self.origin, bgl::Origin::Pool) {
@@ -530,12 +529,12 @@ impl<A: HalApi> Drop for BindGroupLayout<A> {
     }
 }
 
-crate::impl_resource_type_generic!(BindGroupLayout);
+crate::impl_resource_type!(BindGroupLayout);
 crate::impl_labeled!(BindGroupLayout);
 crate::impl_parent_device!(BindGroupLayout);
-crate::impl_storage_item_generic!(BindGroupLayout);
+crate::impl_storage_item!(BindGroupLayout);
 
-impl<A: HalApi> BindGroupLayout<A> {
+impl BindGroupLayout {
     pub(crate) fn raw(&self) -> &dyn hal::DynBindGroupLayout {
         self.raw.as_ref()
     }
@@ -631,14 +630,14 @@ pub struct PipelineLayoutDescriptor<'a> {
 ///
 /// A `PipelineLayoutDescriptor` can be used to create a pipeline layout.
 #[derive(Debug)]
-pub struct ResolvedPipelineLayoutDescriptor<'a, A: HalApi> {
+pub struct ResolvedPipelineLayoutDescriptor<'a> {
     /// Debug label of the pipeline layout.
     ///
     /// This will show up in graphics debuggers for easy identification.
     pub label: Label<'a>,
     /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
     /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
-    pub bind_group_layouts: Cow<'a, [Arc<BindGroupLayout<A>>]>,
+    pub bind_group_layouts: Cow<'a, [Arc<BindGroupLayout>]>,
     /// Set of push constant ranges this pipeline uses. Each shader stage that
     /// uses push constants must define the range in push constant memory that
     /// corresponds to its single `layout(push_constant)` uniform block.
@@ -650,16 +649,16 @@ pub struct ResolvedPipelineLayoutDescriptor<'a, A: HalApi> {
 }
 
 #[derive(Debug)]
-pub struct PipelineLayout<A: HalApi> {
+pub struct PipelineLayout {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineLayout>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout<A>>, { hal::MAX_BIND_GROUPS }>,
+    pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout>, { hal::MAX_BIND_GROUPS }>,
     pub(crate) push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>,
 }
 
-impl<A: HalApi> Drop for PipelineLayout<A> {
+impl Drop for PipelineLayout {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -670,7 +669,7 @@ impl<A: HalApi> Drop for PipelineLayout<A> {
     }
 }
 
-impl<A: HalApi> PipelineLayout<A> {
+impl PipelineLayout {
     pub(crate) fn raw(&self) -> &dyn hal::DynPipelineLayout {
         self.raw.as_ref()
     }
@@ -761,10 +760,10 @@ impl<A: HalApi> PipelineLayout<A> {
     }
 }
 
-crate::impl_resource_type_generic!(PipelineLayout);
+crate::impl_resource_type!(PipelineLayout);
 crate::impl_labeled!(PipelineLayout);
 crate::impl_parent_device!(PipelineLayout);
-crate::impl_storage_item_generic!(PipelineLayout);
+crate::impl_storage_item!(PipelineLayout);
 
 #[repr(C)]
 #[derive(Clone, Debug, Hash, Eq, PartialEq)]
@@ -776,8 +775,8 @@ pub struct BufferBinding {
 }
 
 #[derive(Clone, Debug)]
-pub struct ResolvedBufferBinding<A: HalApi> {
-    pub buffer: Arc<Buffer<A>>,
+pub struct ResolvedBufferBinding {
+    pub buffer: Arc<Buffer>,
     pub offset: wgt::BufferAddress,
     pub size: Option<wgt::BufferSize>,
 }
@@ -798,13 +797,13 @@ pub enum BindingResource<'a> {
 // Note: Duplicated in `wgpu-rs` as `BindingResource`
 // They're different enough that it doesn't make sense to share a common type
 #[derive(Debug, Clone)]
-pub enum ResolvedBindingResource<'a, A: HalApi> {
-    Buffer(ResolvedBufferBinding<A>),
-    BufferArray(Cow<'a, [ResolvedBufferBinding<A>]>),
-    Sampler(Arc<Sampler<A>>),
-    SamplerArray(Cow<'a, [Arc<Sampler<A>>]>),
-    TextureView(Arc<TextureView<A>>),
-    TextureViewArray(Cow<'a, [Arc<TextureView<A>>]>),
+pub enum ResolvedBindingResource<'a> {
+    Buffer(ResolvedBufferBinding),
+    BufferArray(Cow<'a, [ResolvedBufferBinding]>),
+    Sampler(Arc<Sampler>),
+    SamplerArray(Cow<'a, [Arc<Sampler>]>),
+    TextureView(Arc<TextureView>),
+    TextureViewArray(Cow<'a, [Arc<TextureView>]>),
 }
 
 #[derive(Clone, Debug, Error)]
@@ -886,23 +885,23 @@ pub(crate) fn buffer_binding_type_alignment(
 }
 
 #[derive(Debug)]
-pub struct BindGroup<A: HalApi> {
+pub struct BindGroup {
     pub(crate) raw: Snatchable<Box<dyn hal::DynBindGroup>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) layout: Arc<BindGroupLayout<A>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) layout: Arc<BindGroupLayout>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) used: BindGroupStates<A>,
-    pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction<A>>,
-    pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction<A>>,
+    pub(crate) used: BindGroupStates,
+    pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction>,
+    pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction>,
     pub(crate) dynamic_binding_info: Vec<BindGroupDynamicBindingData>,
     /// Actual binding sizes for buffers that don't have `min_binding_size`
     /// specified in BGL. Listed in the order of iteration of `BGL.entries`.
     pub(crate) late_buffer_binding_sizes: Vec<wgt::BufferSize>,
 }
 
-impl<A: HalApi> Drop for BindGroup<A> {
+impl Drop for BindGroup {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
@@ -913,7 +912,7 @@ impl<A: HalApi> Drop for BindGroup<A> {
     }
 }
 
-impl<A: HalApi> BindGroup<A> {
+impl BindGroup {
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
@@ -985,10 +984,10 @@ impl<A: HalApi> BindGroup<A> {
     }
 }
 
-crate::impl_resource_type_generic!(BindGroup);
+crate::impl_resource_type!(BindGroup);
 crate::impl_labeled!(BindGroup);
 crate::impl_parent_device!(BindGroup);
-crate::impl_storage_item_generic!(BindGroup);
+crate::impl_storage_item!(BindGroup);
 crate::impl_trackable!(BindGroup);
 
 #[derive(Clone, Debug, Error)]
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 7e3d9ce9cd..620027994f 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -3,7 +3,6 @@ use std::sync::Arc;
 use crate::{
     binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout},
     device::SHADER_STAGE_COUNT,
-    hal_api::HalApi,
     pipeline::LateSizedBufferGroup,
     resource::{Labeled, ResourceErrorIdent},
 };
@@ -19,7 +18,6 @@ mod compat {
     use crate::{
         binding_model::BindGroupLayout,
         error::MultiError,
-        hal_api::HalApi,
         resource::{Labeled, ParentDevice, ResourceErrorIdent},
     };
     use std::{
@@ -38,12 +36,12 @@ mod compat {
     }
 
     #[derive(Debug, Clone)]
-    struct Entry<A: HalApi> {
-        assigned: Option<Arc<BindGroupLayout<A>>>,
-        expected: Option<Arc<BindGroupLayout<A>>>,
+    struct Entry {
+        assigned: Option<Arc<BindGroupLayout>>,
+        expected: Option<Arc<BindGroupLayout>>,
     }
 
-    impl<A: HalApi> Entry<A> {
+    impl Entry {
         fn empty() -> Self {
             Self {
                 assigned: None,
@@ -192,11 +190,11 @@ mod compat {
     }
 
     #[derive(Debug, Default)]
-    pub(crate) struct BoundBindGroupLayouts<A: HalApi> {
-        entries: ArrayVec<Entry<A>, { hal::MAX_BIND_GROUPS }>,
+    pub(crate) struct BoundBindGroupLayouts {
+        entries: ArrayVec<Entry, { hal::MAX_BIND_GROUPS }>,
     }
 
-    impl<A: HalApi> BoundBindGroupLayouts<A> {
+    impl BoundBindGroupLayouts {
         pub fn new() -> Self {
             Self {
                 entries: (0..hal::MAX_BIND_GROUPS).map(|_| Entry::empty()).collect(),
@@ -214,7 +212,7 @@ mod compat {
 
         pub fn update_expectations(
             &mut self,
-            expectations: &[Arc<BindGroupLayout<A>>],
+            expectations: &[Arc<BindGroupLayout>],
         ) -> Range<usize> {
             let start_index = self
                 .entries
@@ -236,7 +234,7 @@ mod compat {
             self.make_range(start_index)
         }
 
-        pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout<A>>) -> Range<usize> {
+        pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout>) -> Range<usize> {
             self.entries[index].assigned = Some(value);
             self.make_range(index)
         }
@@ -283,9 +281,9 @@ struct LateBufferBinding {
     bound_size: wgt::BufferAddress,
 }
 
-#[derive(Debug)]
-pub(super) struct EntryPayload<A: HalApi> {
-    pub(super) group: Option<Arc<BindGroup<A>>>,
+#[derive(Debug, Default)]
+pub(super) struct EntryPayload {
+    pub(super) group: Option<Arc<BindGroup>>,
     pub(super) dynamic_offsets: Vec<wgt::DynamicOffset>,
     late_buffer_bindings: Vec<LateBufferBinding>,
     /// Since `LateBufferBinding` may contain information about the bindings
@@ -293,18 +291,7 @@ pub(super) struct EntryPayload<A: HalApi> {
     pub(super) late_bindings_effective_count: usize,
 }
 
-impl<A: HalApi> Default for EntryPayload<A> {
-    fn default() -> Self {
-        Self {
-            group: None,
-            dynamic_offsets: Default::default(),
-            late_buffer_bindings: Default::default(),
-            late_bindings_effective_count: Default::default(),
-        }
-    }
-}
-
-impl<A: HalApi> EntryPayload<A> {
+impl EntryPayload {
     fn reset(&mut self) {
         self.group = None;
         self.dynamic_offsets.clear();
@@ -314,13 +301,13 @@ impl<A: HalApi> EntryPayload<A> {
 }
 
 #[derive(Debug, Default)]
-pub(super) struct Binder<A: HalApi> {
-    pub(super) pipeline_layout: Option<Arc<PipelineLayout<A>>>,
-    manager: compat::BoundBindGroupLayouts<A>,
-    payloads: [EntryPayload<A>; hal::MAX_BIND_GROUPS],
+pub(super) struct Binder {
+    pub(super) pipeline_layout: Option<Arc<PipelineLayout>>,
+    manager: compat::BoundBindGroupLayouts,
+    payloads: [EntryPayload; hal::MAX_BIND_GROUPS],
 }
 
-impl<A: HalApi> Binder<A> {
+impl Binder {
     pub(super) fn new() -> Self {
         Self {
             pipeline_layout: None,
@@ -338,9 +325,9 @@ impl<A: HalApi> Binder<A> {
 
     pub(super) fn change_pipeline_layout<'a>(
         &'a mut self,
-        new: &Arc<PipelineLayout<A>>,
+        new: &Arc<PipelineLayout>,
         late_sized_buffer_groups: &[LateSizedBufferGroup],
-    ) -> (usize, &'a [EntryPayload<A>]) {
+    ) -> (usize, &'a [EntryPayload]) {
         let old_id_opt = self.pipeline_layout.replace(new.clone());
 
         let mut bind_range = self.manager.update_expectations(&new.bind_group_layouts);
@@ -380,9 +367,9 @@ impl<A: HalApi> Binder<A> {
     pub(super) fn assign_group<'a>(
         &'a mut self,
         index: usize,
-        bind_group: &Arc<BindGroup<A>>,
+        bind_group: &Arc<BindGroup>,
         offsets: &[wgt::DynamicOffset],
-    ) -> &'a [EntryPayload<A>] {
+    ) -> &'a [EntryPayload] {
         let payload = &mut self.payloads[index];
         payload.group = Some(bind_group.clone());
         payload.dynamic_offsets.clear();
@@ -412,7 +399,7 @@ impl<A: HalApi> Binder<A> {
         &self.payloads[bind_range]
     }
 
-    pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup<A>>> + '_ {
+    pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup>> + '_ {
         let payloads = &self.payloads;
         self.manager
             .list_active()
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index a8a3528647..56f7d551b0 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -88,7 +88,6 @@ use crate::{
         AttachmentData, Device, DeviceError, MissingDownlevelFlags, RenderPassContext,
         SHADER_STAGE_COUNT,
     },
-    hal_api::HalApi,
     hub::Hub,
     id,
     init_tracker::{BufferInitTrackerAction, MemoryInitKind, TextureInitTrackerAction},
@@ -110,8 +109,8 @@ use super::{
 };
 
 /// <https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw>
-fn validate_draw<A: HalApi>(
-    vertex: &[Option<VertexState<A>>],
+fn validate_draw(
+    vertex: &[Option<VertexState>],
     step: &[VertexStep],
     first_vertex: u32,
     vertex_count: u32,
@@ -151,10 +150,10 @@ fn validate_draw<A: HalApi>(
 }
 
 // See https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-drawindexed
-fn validate_indexed_draw<A: HalApi>(
-    vertex: &[Option<VertexState<A>>],
+fn validate_indexed_draw(
+    vertex: &[Option<VertexState>],
     step: &[VertexStep],
-    index_state: &IndexState<A>,
+    index_state: &IndexState,
     first_index: u32,
     index_count: u32,
     first_instance: u32,
@@ -339,12 +338,12 @@ impl RenderBundleEncoder {
     /// and accumulate buffer and texture initialization actions.
     ///
     /// [`ExecuteBundle`]: RenderCommand::ExecuteBundle
-    pub(crate) fn finish<A: HalApi>(
+    pub(crate) fn finish(
         self,
         desc: &RenderBundleDescriptor,
-        device: &Arc<Device<A>>,
-        hub: &Hub<A>,
-    ) -> Result<Arc<RenderBundle<A>>, RenderBundleError> {
+        device: &Arc<Device>,
+        hub: &Hub,
+    ) -> Result<Arc<RenderBundle>, RenderBundleError> {
         let scope = PassErrorScope::Bundle;
 
         device.check_is_valid().map_pass_err(scope)?;
@@ -577,9 +576,9 @@ impl RenderBundleEncoder {
     }
 }
 
-fn set_bind_group<A: HalApi>(
-    state: &mut State<A>,
-    bind_group_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<BindGroup<A>>>,
+fn set_bind_group(
+    state: &mut State,
+    bind_group_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<BindGroup>>,
     dynamic_offsets: &[u32],
     index: u32,
     num_dynamic_offsets: usize,
@@ -622,9 +621,9 @@ fn set_bind_group<A: HalApi>(
     Ok(())
 }
 
-fn set_pipeline<A: HalApi>(
-    state: &mut State<A>,
-    pipeline_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<RenderPipeline<A>>>,
+fn set_pipeline(
+    state: &mut State,
+    pipeline_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<RenderPipeline>>,
     context: &RenderPassContext,
     is_depth_read_only: bool,
     is_stencil_read_only: bool,
@@ -665,9 +664,9 @@ fn set_pipeline<A: HalApi>(
     Ok(())
 }
 
-fn set_index_buffer<A: HalApi>(
-    state: &mut State<A>,
-    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer<A>>>,
+fn set_index_buffer(
+    state: &mut State,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
     buffer_id: id::Id<id::markers::Buffer>,
     index_format: wgt::IndexFormat,
     offset: u64,
@@ -700,9 +699,9 @@ fn set_index_buffer<A: HalApi>(
     Ok(())
 }
 
-fn set_vertex_buffer<A: HalApi>(
-    state: &mut State<A>,
-    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer<A>>>,
+fn set_vertex_buffer(
+    state: &mut State,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
     slot: u32,
     buffer_id: id::Id<id::markers::Buffer>,
     offset: u64,
@@ -744,8 +743,8 @@ fn set_vertex_buffer<A: HalApi>(
     Ok(())
 }
 
-fn set_push_constant<A: HalApi>(
-    state: &mut State<A>,
+fn set_push_constant(
+    state: &mut State,
     stages: wgt::ShaderStages,
     offset: u32,
     size_bytes: u32,
@@ -769,8 +768,8 @@ fn set_push_constant<A: HalApi>(
     Ok(())
 }
 
-fn draw<A: HalApi>(
-    state: &mut State<A>,
+fn draw(
+    state: &mut State,
     dynamic_offsets: &[u32],
     vertex_count: u32,
     instance_count: u32,
@@ -802,8 +801,8 @@ fn draw<A: HalApi>(
     Ok(())
 }
 
-fn draw_indexed<A: HalApi>(
-    state: &mut State<A>,
+fn draw_indexed(
+    state: &mut State,
     dynamic_offsets: &[u32],
     index_count: u32,
     instance_count: u32,
@@ -843,10 +842,10 @@ fn draw_indexed<A: HalApi>(
     Ok(())
 }
 
-fn multi_draw_indirect<A: HalApi>(
-    state: &mut State<A>,
+fn multi_draw_indirect(
+    state: &mut State,
     dynamic_offsets: &[u32],
-    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer<A>>>,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
     buffer_id: id::Id<id::markers::Buffer>,
     offset: u64,
     indexed: bool,
@@ -923,16 +922,16 @@ pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
 // The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle,
 // or Metal indirect command buffer.
 #[derive(Debug)]
-pub struct RenderBundle<A: HalApi> {
+pub struct RenderBundle {
     // Normalized command stream. It can be executed verbatim,
     // without re-binding anything on the pipeline change.
-    base: BasePass<ArcRenderCommand<A>>,
+    base: BasePass<ArcRenderCommand>,
     pub(super) is_depth_read_only: bool,
     pub(super) is_stencil_read_only: bool,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) used: RenderBundleScope<A>,
-    pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction<A>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) used: RenderBundleScope,
+    pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction>,
     pub(super) context: RenderPassContext,
     /// The `label` from the descriptor used to create the resource.
     label: String,
@@ -940,18 +939,18 @@ pub struct RenderBundle<A: HalApi> {
     discard_hal_labels: bool,
 }
 
-impl<A: HalApi> Drop for RenderBundle<A> {
+impl Drop for RenderBundle {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
     }
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for RenderBundle<A> {}
+unsafe impl Send for RenderBundle {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for RenderBundle<A> {}
+unsafe impl Sync for RenderBundle {}
 
-impl<A: HalApi> RenderBundle<A> {
+impl RenderBundle {
     /// Actually encode the contents into a native command buffer.
     ///
     /// This is partially duplicating the logic of `render_pass_end`.
@@ -967,7 +966,7 @@ impl<A: HalApi> RenderBundle<A> {
         snatch_guard: &SnatchGuard,
     ) -> Result<(), ExecutionError> {
         let mut offsets = self.base.dynamic_offsets.as_slice();
-        let mut pipeline_layout = None::<Arc<PipelineLayout<A>>>;
+        let mut pipeline_layout = None::<Arc<PipelineLayout>>;
         if !self.discard_hal_labels {
             if let Some(ref label) = self.base.label {
                 unsafe { raw.begin_debug_marker(label) };
@@ -1146,10 +1145,10 @@ impl<A: HalApi> RenderBundle<A> {
     }
 }
 
-crate::impl_resource_type_generic!(RenderBundle);
+crate::impl_resource_type!(RenderBundle);
 crate::impl_labeled!(RenderBundle);
 crate::impl_parent_device!(RenderBundle);
-crate::impl_storage_item_generic!(RenderBundle);
+crate::impl_storage_item!(RenderBundle);
 crate::impl_trackable!(RenderBundle);
 
 /// A render bundle's current index buffer state.
@@ -1158,14 +1157,14 @@ crate::impl_trackable!(RenderBundle);
 /// and calls [`State::flush_index`] before any indexed draw command to produce
 /// a `SetIndexBuffer` command if one is necessary.
 #[derive(Debug)]
-struct IndexState<A: HalApi> {
-    buffer: Arc<Buffer<A>>,
+struct IndexState {
+    buffer: Arc<Buffer>,
     format: wgt::IndexFormat,
     range: Range<wgt::BufferAddress>,
     is_dirty: bool,
 }
 
-impl<A: HalApi> IndexState<A> {
+impl IndexState {
     /// Return the number of entries in the current index buffer.
     ///
     /// Panic if no index buffer has been set.
@@ -1180,7 +1179,7 @@ impl<A: HalApi> IndexState<A> {
 
     /// Generate a `SetIndexBuffer` command to prepare for an indexed draw
     /// command, if needed.
-    fn flush(&mut self) -> Option<ArcRenderCommand<A>> {
+    fn flush(&mut self) -> Option<ArcRenderCommand> {
         if self.is_dirty {
             self.is_dirty = false;
             Some(ArcRenderCommand::SetIndexBuffer {
@@ -1205,14 +1204,14 @@ impl<A: HalApi> IndexState<A> {
 ///
 /// [`flush`]: IndexState::flush
 #[derive(Debug)]
-struct VertexState<A: HalApi> {
-    buffer: Arc<Buffer<A>>,
+struct VertexState {
+    buffer: Arc<Buffer>,
     range: Range<wgt::BufferAddress>,
     is_dirty: bool,
 }
 
-impl<A: HalApi> VertexState<A> {
-    fn new(buffer: Arc<Buffer<A>>, range: Range<wgt::BufferAddress>) -> Self {
+impl VertexState {
+    fn new(buffer: Arc<Buffer>, range: Range<wgt::BufferAddress>) -> Self {
         Self {
             buffer,
             range,
@@ -1223,7 +1222,7 @@ impl<A: HalApi> VertexState<A> {
     /// Generate a `SetVertexBuffer` command for this slot, if necessary.
     ///
     /// `slot` is the index of the vertex buffer slot that `self` tracks.
-    fn flush(&mut self, slot: u32) -> Option<ArcRenderCommand<A>> {
+    fn flush(&mut self, slot: u32) -> Option<ArcRenderCommand> {
         if self.is_dirty {
             self.is_dirty = false;
             Some(ArcRenderCommand::SetVertexBuffer {
@@ -1240,9 +1239,9 @@ impl<A: HalApi> VertexState<A> {
 
 /// A bind group that has been set at a particular index during render bundle encoding.
 #[derive(Debug)]
-struct BindState<A: HalApi> {
+struct BindState {
     /// The id of the bind group set at this index.
-    bind_group: Arc<BindGroup<A>>,
+    bind_group: Arc<BindGroup>,
 
     /// The range of dynamic offsets for this bind group, in the original
     /// command stream's `BassPass::dynamic_offsets` array.
@@ -1254,9 +1253,9 @@ struct BindState<A: HalApi> {
 }
 
 /// The bundle's current pipeline, and some cached information needed for validation.
-struct PipelineState<A: HalApi> {
+struct PipelineState {
     /// The pipeline
-    pipeline: Arc<RenderPipeline<A>>,
+    pipeline: Arc<RenderPipeline>,
 
     /// How this pipeline's vertex shader traverses each vertex buffer, indexed
     /// by vertex buffer slot number.
@@ -1270,8 +1269,8 @@ struct PipelineState<A: HalApi> {
     used_bind_groups: usize,
 }
 
-impl<A: HalApi> PipelineState<A> {
-    fn new(pipeline: &Arc<RenderPipeline<A>>) -> Self {
+impl PipelineState {
+    fn new(pipeline: &Arc<RenderPipeline>) -> Self {
         Self {
             pipeline: pipeline.clone(),
             steps: pipeline.vertex_steps.to_vec(),
@@ -1287,7 +1286,7 @@ impl<A: HalApi> PipelineState<A> {
 
     /// Return a sequence of commands to zero the push constant ranges this
     /// pipeline uses. If no initialization is necessary, return `None`.
-    fn zero_push_constants(&self) -> Option<impl Iterator<Item = ArcRenderCommand<A>>> {
+    fn zero_push_constants(&self) -> Option<impl Iterator<Item = ArcRenderCommand>> {
         if !self.push_constant_ranges.is_empty() {
             let nonoverlapping_ranges =
                 super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges);
@@ -1318,22 +1317,22 @@ impl<A: HalApi> PipelineState<A> {
 ///
 /// [`SetBindGroup`]: RenderCommand::SetBindGroup
 /// [`SetIndexBuffer`]: RenderCommand::SetIndexBuffer
-struct State<A: HalApi> {
+struct State {
     /// Resources used by this bundle. This will become [`RenderBundle::used`].
-    trackers: RenderBundleScope<A>,
+    trackers: RenderBundleScope,
 
     /// The currently set pipeline, if any.
-    pipeline: Option<PipelineState<A>>,
+    pipeline: Option<PipelineState>,
 
     /// The bind group set at each index, if any.
-    bind: ArrayVec<Option<BindState<A>>, { hal::MAX_BIND_GROUPS }>,
+    bind: ArrayVec<Option<BindState>, { hal::MAX_BIND_GROUPS }>,
 
     /// The state of each vertex buffer slot.
-    vertex: ArrayVec<Option<VertexState<A>>, { hal::MAX_VERTEX_BUFFERS }>,
+    vertex: ArrayVec<Option<VertexState>, { hal::MAX_VERTEX_BUFFERS }>,
 
     /// The current index buffer, if one has been set. We flush this state
     /// before indexed draw commands.
-    index: Option<IndexState<A>>,
+    index: Option<IndexState>,
 
     /// Dynamic offset values used by the cleaned-up command sequence.
     ///
@@ -1343,16 +1342,16 @@ struct State<A: HalApi> {
     /// [`dynamic_offsets`]: BasePass::dynamic_offsets
     flat_dynamic_offsets: Vec<wgt::DynamicOffset>,
 
-    device: Arc<Device<A>>,
-    commands: Vec<ArcRenderCommand<A>>,
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_init_actions: Vec<TextureInitTrackerAction<A>>,
+    device: Arc<Device>,
+    commands: Vec<ArcRenderCommand>,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_init_actions: Vec<TextureInitTrackerAction>,
     next_dynamic_offset: usize,
 }
 
-impl<A: HalApi> State<A> {
+impl State {
     /// Return the current pipeline state. Return an error if none is set.
-    fn pipeline(&self) -> Result<&PipelineState<A>, RenderBundleErrorInner> {
+    fn pipeline(&self) -> Result<&PipelineState, RenderBundleErrorInner> {
         self.pipeline
             .as_ref()
             .ok_or(DrawError::MissingPipeline.into())
@@ -1368,7 +1367,7 @@ impl<A: HalApi> State<A> {
     fn set_bind_group(
         &mut self,
         slot: u32,
-        bind_group: &Arc<BindGroup<A>>,
+        bind_group: &Arc<BindGroup>,
         dynamic_offsets: Range<usize>,
     ) {
         // If this call wouldn't actually change this index's state, we can
@@ -1407,7 +1406,7 @@ impl<A: HalApi> State<A> {
     ///
     /// - Changing the push constant ranges at all requires re-establishing
     ///   all bind groups.
-    fn invalidate_bind_groups(&mut self, new: &PipelineState<A>, layout: &PipelineLayout<A>) {
+    fn invalidate_bind_groups(&mut self, new: &PipelineState, layout: &PipelineLayout) {
         match self.pipeline {
             None => {
                 // Establishing entirely new pipeline state.
@@ -1441,7 +1440,7 @@ impl<A: HalApi> State<A> {
     /// Set the bundle's current index buffer and its associated parameters.
     fn set_index_buffer(
         &mut self,
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         format: wgt::IndexFormat,
         range: Range<wgt::BufferAddress>,
     ) {
diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index 487bdf756b..944dd40af4 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -8,7 +8,6 @@ use crate::{
     device::DeviceError,
     get_lowest_common_denom,
     global::Global,
-    hal_api::HalApi,
     id::{BufferId, CommandEncoderId, TextureId},
     init_tracker::{MemoryInitKind, TextureInitRange},
     resource::{
@@ -79,7 +78,7 @@ whereas subesource range specified start {subresource_base_array_layer} and coun
 }
 
 impl Global {
-    pub fn command_encoder_clear_buffer<A: HalApi>(
+    pub fn command_encoder_clear_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         dst: BufferId,
@@ -89,7 +88,7 @@ impl Global {
         profiling::scope!("CommandEncoder::clear_buffer");
         api_log!("CommandEncoder::clear_buffer {dst:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -172,7 +171,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_clear_texture<A: HalApi>(
+    pub fn command_encoder_clear_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         dst: TextureId,
@@ -181,7 +180,7 @@ impl Global {
         profiling::scope!("CommandEncoder::clear_texture");
         api_log!("CommandEncoder::clear_texture {dst:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -268,8 +267,8 @@ impl Global {
     }
 }
 
-pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
-    dst_texture: &Arc<Texture<A>>,
+pub(crate) fn clear_texture<T: TextureTrackerSetSingle>(
+    dst_texture: &Arc<Texture>,
     range: TextureInitRange,
     encoder: &mut dyn hal::DynCommandEncoder,
     texture_tracker: &mut T,
@@ -440,8 +439,8 @@ fn clear_texture_via_buffer_copies(
     }
 }
 
-fn clear_texture_via_render_passes<A: HalApi>(
-    dst_texture: &Texture<A>,
+fn clear_texture_via_render_passes(
+    dst_texture: &Texture,
     range: TextureInitRange,
     is_color: bool,
     encoder: &mut dyn hal::DynCommandEncoder,
@@ -461,7 +460,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
             let (color_attachments, depth_stencil_attachment) = if is_color {
                 color_attachments_tmp = [Some(hal::ColorAttachment {
                     target: hal::Attachment {
-                        view: Texture::<A>::get_clear_view(
+                        view: Texture::get_clear_view(
                             &dst_texture.clear_mode,
                             &dst_texture.desc,
                             mip_level,
@@ -479,7 +478,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                     &[][..],
                     Some(hal::DepthStencilAttachment {
                         target: hal::Attachment {
-                            view: Texture::<A>::get_clear_view(
+                            view: Texture::get_clear_view(
                                 &dst_texture.clear_mode,
                                 &dst_texture.desc,
                                 mip_level,
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index a23370527f..93e7c15168 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -13,7 +13,6 @@ use crate::{
     },
     device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     init_tracker::{BufferInitTrackerAction, MemoryInitKind},
     pipeline::ComputePipeline,
@@ -34,28 +33,28 @@ use std::{fmt, mem, str};
 
 use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions, DynComputePass};
 
-pub struct ComputePass<A: HalApi> {
+pub struct ComputePass {
     /// All pass data & records is stored here.
     ///
     /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
     /// Any attempt to record more commands will result in a validation error.
-    base: Option<BasePass<ArcComputeCommand<A>>>,
+    base: Option<BasePass<ArcComputeCommand>>,
 
     /// Parent command buffer that this pass records commands into.
     ///
     /// If it is none, this pass is invalid and any operation on it will return an error.
-    parent: Option<Arc<CommandBuffer<A>>>,
+    parent: Option<Arc<CommandBuffer>>,
 
-    timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+    timestamp_writes: Option<ArcPassTimestampWrites>,
 
     // Resource binding dedupe state.
     current_bind_groups: BindGroupStateChange,
     current_pipeline: StateChange<id::ComputePipelineId>,
 }
 
-impl<A: HalApi> ComputePass<A> {
+impl ComputePass {
     /// If the parent command buffer is invalid, the returned pass will be invalid.
-    fn new(parent: Option<Arc<CommandBuffer<A>>>, desc: ArcComputePassDescriptor<A>) -> Self {
+    fn new(parent: Option<Arc<CommandBuffer>>, desc: ArcComputePassDescriptor) -> Self {
         let ArcComputePassDescriptor {
             label,
             timestamp_writes,
@@ -79,7 +78,7 @@ impl<A: HalApi> ComputePass<A> {
     fn base_mut<'a>(
         &'a mut self,
         scope: PassErrorScope,
-    ) -> Result<&'a mut BasePass<ArcComputeCommand<A>>, ComputePassError> {
+    ) -> Result<&'a mut BasePass<ArcComputeCommand>, ComputePassError> {
         self.base
             .as_mut()
             .ok_or(ComputePassErrorInner::PassEnded)
@@ -87,7 +86,7 @@ impl<A: HalApi> ComputePass<A> {
     }
 }
 
-impl<A: HalApi> fmt::Debug for ComputePass<A> {
+impl fmt::Debug for ComputePass {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self.parent {
             Some(ref cmd_buf) => write!(f, "ComputePass {{ parent: {} }}", cmd_buf.error_ident()),
@@ -103,10 +102,10 @@ pub struct ComputePassDescriptor<'a> {
     pub timestamp_writes: Option<&'a PassTimestampWrites>,
 }
 
-struct ArcComputePassDescriptor<'a, A: HalApi> {
+struct ArcComputePassDescriptor<'a> {
     pub label: &'a Label<'a>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+    pub timestamp_writes: Option<ArcPassTimestampWrites>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -200,36 +199,36 @@ where
     }
 }
 
-struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi> {
-    binder: Binder<A>,
-    pipeline: Option<Arc<ComputePipeline<A>>>,
-    scope: UsageScope<'scope, A>,
+struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
+    binder: Binder,
+    pipeline: Option<Arc<ComputePipeline>>,
+    scope: UsageScope<'scope>,
     debug_scope_depth: u32,
 
     snatch_guard: SnatchGuard<'snatch_guard>,
 
-    device: &'cmd_buf Arc<Device<A>>,
+    device: &'cmd_buf Arc<Device>,
 
     raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
 
-    tracker: &'cmd_buf mut Tracker<A>,
-    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions<A>,
+    tracker: &'cmd_buf mut Tracker,
+    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
+    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
 
     temp_offsets: Vec<u32>,
     dynamic_offset_count: usize,
     string_offset: usize,
-    active_query: Option<(Arc<resource::QuerySet<A>>, u32)>,
+    active_query: Option<(Arc<resource::QuerySet>, u32)>,
 
-    intermediate_trackers: Tracker<A>,
+    intermediate_trackers: Tracker,
 
     /// Immediate texture inits required because of prior discards. Need to
     /// be inserted before texture reads.
-    pending_discard_init_fixups: SurfacesInDiscardState<A>,
+    pending_discard_init_fixups: SurfacesInDiscardState,
 }
 
-impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi>
-    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A>
+impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
 {
     fn is_ready(&self) -> Result<(), DispatchError> {
         if let Some(pipeline) = self.pipeline.as_ref() {
@@ -285,12 +284,12 @@ impl Global {
     /// Any operation on an invalid pass will return an error.
     ///
     /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
-    pub fn command_encoder_create_compute_pass<A: HalApi>(
+    pub fn command_encoder_create_compute_pass(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &ComputePassDescriptor<'_>,
-    ) -> (ComputePass<A>, Option<CommandEncoderError>) {
-        let hub = A::hub(self);
+    ) -> (ComputePass, Option<CommandEncoderError>) {
+        let hub = &self.hub;
 
         let mut arc_desc = ArcComputePassDescriptor {
             label: &desc.label,
@@ -333,19 +332,16 @@ impl Global {
     ///
     /// If creation fails, an invalid pass is returned.
     /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_compute_pass_dyn<A: HalApi>(
+    pub fn command_encoder_create_compute_pass_dyn(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &ComputePassDescriptor,
     ) -> (Box<dyn DynComputePass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_compute_pass::<A>(encoder_id, desc);
+        let (pass, err) = self.command_encoder_create_compute_pass(encoder_id, desc);
         (Box::new(pass), err)
     }
 
-    pub fn compute_pass_end<A: HalApi>(
-        &self,
-        pass: &mut ComputePass<A>,
-    ) -> Result<(), ComputePassError> {
+    pub fn compute_pass_end(&self, pass: &mut ComputePass) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::Pass;
 
         let cmd_buf = pass
@@ -366,13 +362,13 @@ impl Global {
 
     #[doc(hidden)]
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn compute_pass_end_with_unresolved_commands<A: HalApi>(
+    pub fn compute_pass_end_with_unresolved_commands(
         &self,
         encoder_id: id::CommandEncoderId,
         base: BasePass<super::ComputeCommand>,
         timestamp_writes: Option<&PassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let scope = PassErrorScope::Pass;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
@@ -400,7 +396,7 @@ impl Global {
         }
 
         let commands =
-            super::ComputeCommand::resolve_compute_command_ids(A::hub(self), &base.commands)?;
+            super::ComputeCommand::resolve_compute_command_ids(&self.hub, &base.commands)?;
 
         let timestamp_writes = if let Some(tw) = timestamp_writes {
             Some(ArcPassTimestampWrites {
@@ -416,7 +412,7 @@ impl Global {
             None
         };
 
-        self.compute_pass_end_impl::<A>(
+        self.compute_pass_end_impl(
             &cmd_buf,
             BasePass {
                 label: base.label,
@@ -429,11 +425,11 @@ impl Global {
         )
     }
 
-    fn compute_pass_end_impl<A: HalApi>(
+    fn compute_pass_end_impl(
         &self,
-        cmd_buf: &CommandBuffer<A>,
-        base: BasePass<ArcComputeCommand<A>>,
-        mut timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+        cmd_buf: &CommandBuffer,
+        base: BasePass<ArcComputeCommand>,
+        mut timestamp_writes: Option<ArcPassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
         profiling::scope!("CommandEncoder::run_compute_pass");
         let pass_scope = PassErrorScope::Pass;
@@ -660,13 +656,13 @@ impl Global {
     }
 }
 
-fn set_bind_group<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
+fn set_bind_group(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
     dynamic_offsets: &[DynamicOffset],
     index: u32,
     num_dynamic_offsets: usize,
-    bind_group: Arc<BindGroup<A>>,
+    bind_group: Arc<BindGroup>,
 ) -> Result<(), ComputePassErrorInner> {
     bind_group.same_device_as(cmd_buf)?;
 
@@ -727,10 +723,10 @@ fn set_bind_group<A: HalApi>(
     Ok(())
 }
 
-fn set_pipeline<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    pipeline: Arc<ComputePipeline<A>>,
+fn set_pipeline(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    pipeline: Arc<ComputePipeline>,
 ) -> Result<(), ComputePassErrorInner> {
     pipeline.same_device_as(cmd_buf)?;
 
@@ -789,8 +785,8 @@ fn set_pipeline<A: HalApi>(
     Ok(())
 }
 
-fn set_push_constant<A: HalApi>(
-    state: &mut State<A>,
+fn set_push_constant(
+    state: &mut State,
     push_constant_data: &[u32],
     offset: u32,
     size_bytes: u32,
@@ -826,10 +822,7 @@ fn set_push_constant<A: HalApi>(
     Ok(())
 }
 
-fn dispatch<A: HalApi>(
-    state: &mut State<A>,
-    groups: [u32; 3],
-) -> Result<(), ComputePassErrorInner> {
+fn dispatch(state: &mut State, groups: [u32; 3]) -> Result<(), ComputePassErrorInner> {
     state.is_ready()?;
 
     state.flush_states(None)?;
@@ -854,10 +847,10 @@ fn dispatch<A: HalApi>(
     Ok(())
 }
 
-fn dispatch_indirect<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    buffer: Arc<Buffer<A>>,
+fn dispatch_indirect(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    buffer: Arc<Buffer>,
     offset: u64,
 ) -> Result<(), ComputePassErrorInner> {
     buffer.same_device_as(cmd_buf)?;
@@ -902,7 +895,7 @@ fn dispatch_indirect<A: HalApi>(
     Ok(())
 }
 
-fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn push_debug_group(state: &mut State, string_data: &[u8], len: usize) {
     state.debug_scope_depth += 1;
     if !state
         .device
@@ -918,7 +911,7 @@ fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: us
     state.string_offset += len;
 }
 
-fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), ComputePassErrorInner> {
+fn pop_debug_group(state: &mut State) -> Result<(), ComputePassErrorInner> {
     if state.debug_scope_depth == 0 {
         return Err(ComputePassErrorInner::InvalidPopDebugGroup);
     }
@@ -935,7 +928,7 @@ fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), ComputePassErr
     Ok(())
 }
 
-fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn insert_debug_marker(state: &mut State, string_data: &[u8], len: usize) {
     if !state
         .device
         .instance_flags
@@ -948,10 +941,10 @@ fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len:
     state.string_offset += len;
 }
 
-fn write_timestamp<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    query_set: Arc<resource::QuerySet<A>>,
+fn write_timestamp(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    query_set: Arc<resource::QuerySet>,
     query_index: u32,
 ) -> Result<(), ComputePassErrorInner> {
     query_set.same_device_as(cmd_buf)?;
@@ -968,9 +961,9 @@ fn write_timestamp<A: HalApi>(
 
 // Recording a compute pass.
 impl Global {
-    pub fn compute_pass_set_bind_group<A: HalApi>(
+    pub fn compute_pass_set_bind_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         index: u32,
         bind_group_id: id::BindGroupId,
         offsets: &[DynamicOffset],
@@ -993,7 +986,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bind_group = hub
             .bind_groups
             .get(bind_group_id)
@@ -1009,9 +1002,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_set_pipeline<A: HalApi>(
+    pub fn compute_pass_set_pipeline(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         pipeline_id: id::ComputePipelineId,
     ) -> Result<(), ComputePassError> {
         let redundant = pass.current_pipeline.set_and_check_redundant(pipeline_id);
@@ -1024,7 +1017,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let pipeline = hub
             .compute_pipelines
             .get(pipeline_id)
@@ -1036,9 +1029,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_set_push_constants<A: HalApi>(
+    pub fn compute_pass_set_push_constants(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         offset: u32,
         data: &[u8],
     ) -> Result<(), ComputePassError> {
@@ -1064,7 +1057,7 @@ impl Global {
                 .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])),
         );
 
-        base.commands.push(ArcComputeCommand::<A>::SetPushConstant {
+        base.commands.push(ArcComputeCommand::SetPushConstant {
             offset,
             size_bytes: data.len() as u32,
             values_offset: value_offset,
@@ -1073,9 +1066,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_dispatch_workgroups<A: HalApi>(
+    pub fn compute_pass_dispatch_workgroups(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         groups_x: u32,
         groups_y: u32,
         groups_z: u32,
@@ -1083,20 +1076,19 @@ impl Global {
         let scope = PassErrorScope::Dispatch { indirect: false };
 
         let base = pass.base_mut(scope)?;
-        base.commands.push(ArcComputeCommand::<A>::Dispatch([
-            groups_x, groups_y, groups_z,
-        ]));
+        base.commands
+            .push(ArcComputeCommand::Dispatch([groups_x, groups_y, groups_z]));
 
         Ok(())
     }
 
-    pub fn compute_pass_dispatch_workgroups_indirect<A: HalApi>(
+    pub fn compute_pass_dispatch_workgroups_indirect(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), ComputePassError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let scope = PassErrorScope::Dispatch { indirect: true };
         let base = pass.base_mut(scope)?;
 
@@ -1107,14 +1099,14 @@ impl Global {
             .map_pass_err(scope)?;
 
         base.commands
-            .push(ArcComputeCommand::<A>::DispatchIndirect { buffer, offset });
+            .push(ArcComputeCommand::DispatchIndirect { buffer, offset });
 
         Ok(())
     }
 
-    pub fn compute_pass_push_debug_group<A: HalApi>(
+    pub fn compute_pass_push_debug_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         label: &str,
         color: u32,
     ) -> Result<(), ComputePassError> {
@@ -1123,7 +1115,7 @@ impl Global {
         let bytes = label.as_bytes();
         base.string_data.extend_from_slice(bytes);
 
-        base.commands.push(ArcComputeCommand::<A>::PushDebugGroup {
+        base.commands.push(ArcComputeCommand::PushDebugGroup {
             color,
             len: bytes.len(),
         });
@@ -1131,20 +1123,20 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_pop_debug_group<A: HalApi>(
+    pub fn compute_pass_pop_debug_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
     ) -> Result<(), ComputePassError> {
         let base = pass.base_mut(PassErrorScope::PopDebugGroup)?;
 
-        base.commands.push(ArcComputeCommand::<A>::PopDebugGroup);
+        base.commands.push(ArcComputeCommand::PopDebugGroup);
 
         Ok(())
     }
 
-    pub fn compute_pass_insert_debug_marker<A: HalApi>(
+    pub fn compute_pass_insert_debug_marker(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         label: &str,
         color: u32,
     ) -> Result<(), ComputePassError> {
@@ -1153,25 +1145,24 @@ impl Global {
         let bytes = label.as_bytes();
         base.string_data.extend_from_slice(bytes);
 
-        base.commands
-            .push(ArcComputeCommand::<A>::InsertDebugMarker {
-                color,
-                len: bytes.len(),
-            });
+        base.commands.push(ArcComputeCommand::InsertDebugMarker {
+            color,
+            len: bytes.len(),
+        });
 
         Ok(())
     }
 
-    pub fn compute_pass_write_timestamp<A: HalApi>(
+    pub fn compute_pass_write_timestamp(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::WriteTimestamp;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
             .get(query_set_id)
@@ -1186,16 +1177,16 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_begin_pipeline_statistics_query<A: HalApi>(
+    pub fn compute_pass_begin_pipeline_statistics_query(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::BeginPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
             .get(query_set_id)
@@ -1211,14 +1202,14 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_end_pipeline_statistics_query<A: HalApi>(
+    pub fn compute_pass_end_pipeline_statistics_query(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::EndPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
         base.commands
-            .push(ArcComputeCommand::<A>::EndPipelineStatisticsQuery);
+            .push(ArcComputeCommand::EndPipelineStatisticsQuery);
 
         Ok(())
     }
diff --git a/wgpu-core/src/command/compute_command.rs b/wgpu-core/src/command/compute_command.rs
index 761827b85a..e16487b7ea 100644
--- a/wgpu-core/src/command/compute_command.rs
+++ b/wgpu-core/src/command/compute_command.rs
@@ -2,7 +2,6 @@ use std::sync::Arc;
 
 use crate::{
     binding_model::BindGroup,
-    hal_api::HalApi,
     id,
     pipeline::ComputePipeline,
     resource::{Buffer, QuerySet},
@@ -71,10 +70,10 @@ pub enum ComputeCommand {
 impl ComputeCommand {
     /// Resolves all ids in a list of commands into the corresponding resource Arc.
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn resolve_compute_command_ids<A: HalApi>(
-        hub: &crate::hub::Hub<A>,
+    pub fn resolve_compute_command_ids(
+        hub: &crate::hub::Hub,
         commands: &[ComputeCommand],
-    ) -> Result<Vec<ArcComputeCommand<A>>, super::ComputePassError> {
+    ) -> Result<Vec<ArcComputeCommand>, super::ComputePassError> {
         use super::{ComputePassError, ComputePassErrorInner, PassErrorScope};
 
         let buffers_guard = hub.buffers.read();
@@ -82,9 +81,9 @@ impl ComputeCommand {
         let query_set_guard = hub.query_sets.read();
         let pipelines_guard = hub.compute_pipelines.read();
 
-        let resolved_commands: Vec<ArcComputeCommand<A>> = commands
+        let resolved_commands: Vec<ArcComputeCommand> = commands
             .iter()
-            .map(|c| -> Result<ArcComputeCommand<A>, ComputePassError> {
+            .map(|c| -> Result<ArcComputeCommand, ComputePassError> {
                 Ok(match *c {
                     ComputeCommand::SetBindGroup {
                         index,
@@ -182,14 +181,14 @@ impl ComputeCommand {
 
 /// Equivalent to `ComputeCommand` but the Ids resolved into resource Arcs.
 #[derive(Clone, Debug)]
-pub enum ArcComputeCommand<A: HalApi> {
+pub enum ArcComputeCommand {
     SetBindGroup {
         index: u32,
         num_dynamic_offsets: usize,
-        bind_group: Arc<BindGroup<A>>,
+        bind_group: Arc<BindGroup>,
     },
 
-    SetPipeline(Arc<ComputePipeline<A>>),
+    SetPipeline(Arc<ComputePipeline>),
 
     /// Set a range of push constants to values stored in `push_constant_data`.
     SetPushConstant {
@@ -211,7 +210,7 @@ pub enum ArcComputeCommand<A: HalApi> {
     Dispatch([u32; 3]),
 
     DispatchIndirect {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: wgt::BufferAddress,
     },
 
@@ -228,12 +227,12 @@ pub enum ArcComputeCommand<A: HalApi> {
     },
 
     WriteTimestamp {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
 
     BeginPipelineStatisticsQuery {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
 
diff --git a/wgpu-core/src/command/dyn_compute_pass.rs b/wgpu-core/src/command/dyn_compute_pass.rs
index ea15e2667d..273feaddf7 100644
--- a/wgpu-core/src/command/dyn_compute_pass.rs
+++ b/wgpu-core/src/command/dyn_compute_pass.rs
@@ -1,6 +1,6 @@
 use wgt::WasmNotSendSync;
 
-use crate::{global, hal_api::HalApi, id};
+use crate::{global, id};
 
 use super::{ComputePass, ComputePassError};
 
@@ -74,7 +74,7 @@ pub trait DynComputePass: std::fmt::Debug + WasmNotSendSync {
     fn label(&self) -> Option<&str>;
 }
 
-impl<A: HalApi> DynComputePass for ComputePass<A> {
+impl DynComputePass for ComputePass {
     fn set_bind_group(
         &mut self,
         context: &global::Global,
diff --git a/wgpu-core/src/command/dyn_render_pass.rs b/wgpu-core/src/command/dyn_render_pass.rs
index 7ad79262b3..d20ca09780 100644
--- a/wgpu-core/src/command/dyn_render_pass.rs
+++ b/wgpu-core/src/command/dyn_render_pass.rs
@@ -1,6 +1,6 @@
 use wgt::WasmNotSendSync;
 
-use crate::{global, hal_api::HalApi, id};
+use crate::{global, id};
 
 use super::{RenderPass, RenderPassError};
 
@@ -178,7 +178,7 @@ pub trait DynRenderPass: std::fmt::Debug + WasmNotSendSync {
     fn label(&self) -> Option<&str>;
 }
 
-impl<A: HalApi> DynRenderPass for RenderPass<A> {
+impl DynRenderPass for RenderPass {
     fn set_index_buffer(
         &mut self,
         context: &global::Global,
diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs
index 7e672393f1..a4711998b2 100644
--- a/wgpu-core/src/command/memory_init.rs
+++ b/wgpu-core/src/command/memory_init.rs
@@ -2,7 +2,6 @@ use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain};
 
 use crate::{
     device::Device,
-    hal_api::HalApi,
     init_tracker::*,
     resource::{DestroyedResourceError, ParentDevice, Texture, Trackable},
     snatch::SnatchGuard,
@@ -15,39 +14,31 @@ use super::{clear::clear_texture, BakedCommands, ClearError};
 /// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass.
 /// Any read access to this surface needs to be preceded by a texture initialization.
 #[derive(Clone)]
-pub(crate) struct TextureSurfaceDiscard<A: HalApi> {
-    pub texture: Arc<Texture<A>>,
+pub(crate) struct TextureSurfaceDiscard {
+    pub texture: Arc<Texture>,
     pub mip_level: u32,
     pub layer: u32,
 }
 
-pub(crate) type SurfacesInDiscardState<A> = Vec<TextureSurfaceDiscard<A>>;
+pub(crate) type SurfacesInDiscardState = Vec<TextureSurfaceDiscard>;
 
-pub(crate) struct CommandBufferTextureMemoryActions<A: HalApi> {
+#[derive(Default)]
+pub(crate) struct CommandBufferTextureMemoryActions {
     /// The tracker actions that we need to be executed before the command
     /// buffer is executed.
-    init_actions: Vec<TextureInitTrackerAction<A>>,
+    init_actions: Vec<TextureInitTrackerAction>,
     /// All the discards that haven't been followed by init again within the
     /// command buffer i.e. everything in this list resets the texture init
     /// state *after* the command buffer execution
-    discards: Vec<TextureSurfaceDiscard<A>>,
+    discards: Vec<TextureSurfaceDiscard>,
 }
 
-impl<A: HalApi> Default for CommandBufferTextureMemoryActions<A> {
-    fn default() -> Self {
-        Self {
-            init_actions: Default::default(),
-            discards: Default::default(),
-        }
-    }
-}
-
-impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
-    pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction<A>> {
+impl CommandBufferTextureMemoryActions {
+    pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction> {
         self.init_actions.drain(..)
     }
 
-    pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard<A>) {
+    pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard) {
         self.discards.push(discard);
     }
 
@@ -57,8 +48,8 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
     #[must_use]
     pub(crate) fn register_init_action(
         &mut self,
-        action: &TextureInitTrackerAction<A>,
-    ) -> SurfacesInDiscardState<A> {
+        action: &TextureInitTrackerAction,
+    ) -> SurfacesInDiscardState {
         let mut immediately_necessary_clears = SurfacesInDiscardState::new();
 
         // Note that within a command buffer we may stack arbitrary memory init
@@ -117,7 +108,7 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
     // implicit init, not requiring any immediate resource init.
     pub(crate) fn register_implicit_init(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         range: TextureInitRange,
     ) {
         let must_be_empty = self.register_init_action(&TextureInitTrackerAction {
@@ -133,14 +124,11 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
 // register_init_action and initializes them on the spot.
 //
 // Takes care of barriers as well!
-pub(crate) fn fixup_discarded_surfaces<
-    A: HalApi,
-    InitIter: Iterator<Item = TextureSurfaceDiscard<A>>,
->(
+pub(crate) fn fixup_discarded_surfaces<InitIter: Iterator<Item = TextureSurfaceDiscard>>(
     inits: InitIter,
     encoder: &mut dyn hal::DynCommandEncoder,
-    texture_tracker: &mut TextureTracker<A>,
-    device: &Device<A>,
+    texture_tracker: &mut TextureTracker,
+    device: &Device,
     snatch_guard: &SnatchGuard<'_>,
 ) {
     for init in inits {
@@ -160,12 +148,12 @@ pub(crate) fn fixup_discarded_surfaces<
     }
 }
 
-impl<A: HalApi> BakedCommands<A> {
+impl BakedCommands {
     // inserts all buffer initializations that are going to be needed for
     // executing the commands and updates resource init states accordingly
     pub(crate) fn initialize_buffer_memory(
         &mut self,
-        device_tracker: &mut DeviceTracker<A>,
+        device_tracker: &mut DeviceTracker,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<(), DestroyedResourceError> {
         profiling::scope!("initialize_buffer_memory");
@@ -265,8 +253,8 @@ impl<A: HalApi> BakedCommands<A> {
     // uninitialized
     pub(crate) fn initialize_texture_memory(
         &mut self,
-        device_tracker: &mut DeviceTracker<A>,
-        device: &Device<A>,
+        device_tracker: &mut DeviceTracker,
+        device: &Device,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<(), DestroyedResourceError> {
         profiling::scope!("initialize_texture_memory");
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index df9360e775..d2714087df 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -37,7 +37,7 @@ use crate::init_tracker::BufferInitTrackerAction;
 use crate::resource::Labeled;
 use crate::track::{DeviceTracker, Tracker, UsageScope};
 use crate::LabelHelpers;
-use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label};
+use crate::{api_log, global::Global, id, resource_log, Label};
 
 use thiserror::Error;
 
@@ -240,16 +240,16 @@ impl CommandEncoder {
     }
 }
 
-pub(crate) struct BakedCommands<A: HalApi> {
+pub(crate) struct BakedCommands {
     pub(crate) encoder: Box<dyn hal::DynCommandEncoder>,
     pub(crate) list: Vec<Box<dyn hal::DynCommandBuffer>>,
-    pub(crate) trackers: Tracker<A>,
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: CommandBufferTextureMemoryActions<A>,
+    pub(crate) trackers: Tracker,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_actions: CommandBufferTextureMemoryActions,
 }
 
 /// The mutable state of a [`CommandBuffer`].
-pub struct CommandBufferMutable<A: HalApi> {
+pub struct CommandBufferMutable {
     /// The [`wgpu_hal::Api::CommandBuffer`]s we've built so far, and the encoder
     /// they belong to.
     ///
@@ -260,7 +260,7 @@ pub struct CommandBufferMutable<A: HalApi> {
     status: CommandEncoderStatus,
 
     /// All the resources that the commands recorded so far have referred to.
-    pub(crate) trackers: Tracker<A>,
+    pub(crate) trackers: Tracker,
 
     /// The regions of buffers and textures these commands will read and write.
     ///
@@ -268,18 +268,18 @@ pub struct CommandBufferMutable<A: HalApi> {
     /// buffers/textures we actually need to initialize. If we're
     /// definitely going to write to something before we read from it,
     /// we don't need to clear its contents.
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: CommandBufferTextureMemoryActions<A>,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_actions: CommandBufferTextureMemoryActions,
 
-    pub(crate) pending_query_resets: QueryResetMap<A>,
+    pub(crate) pending_query_resets: QueryResetMap,
     #[cfg(feature = "trace")]
     pub(crate) commands: Option<Vec<TraceCommand>>,
 }
 
-impl<A: HalApi> CommandBufferMutable<A> {
+impl CommandBufferMutable {
     pub(crate) fn open_encoder_and_tracker(
         &mut self,
-    ) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker<A>), DeviceError> {
+    ) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker), DeviceError> {
         let encoder = self.encoder.open()?;
         let tracker = &mut self.trackers;
 
@@ -305,8 +305,8 @@ impl<A: HalApi> CommandBufferMutable<A> {
 /// - Once a command buffer is submitted to the queue, it is removed from the id
 ///   registry, and its contents are taken to construct a [`BakedCommands`],
 ///   whose contents eventually become the property of the submission queue.
-pub struct CommandBuffer<A: HalApi> {
-    pub(crate) device: Arc<Device<A>>,
+pub struct CommandBuffer {
+    pub(crate) device: Arc<Device>,
     support_clear_texture: bool,
     /// The `label` from the descriptor used to create the resource.
     label: String,
@@ -317,10 +317,10 @@ pub struct CommandBuffer<A: HalApi> {
     /// When this is submitted, dropped, or destroyed, its contents are
     /// extracted into a [`BakedCommands`] by
     /// [`CommandBuffer::extract_baked_commands`].
-    pub(crate) data: Mutex<Option<CommandBufferMutable<A>>>,
+    pub(crate) data: Mutex<Option<CommandBufferMutable>>,
 }
 
-impl<A: HalApi> Drop for CommandBuffer<A> {
+impl Drop for CommandBuffer {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
         if self.data.lock().is_none() {
@@ -336,10 +336,10 @@ impl<A: HalApi> Drop for CommandBuffer<A> {
     }
 }
 
-impl<A: HalApi> CommandBuffer<A> {
+impl CommandBuffer {
     pub(crate) fn new(
         encoder: Box<dyn hal::DynCommandEncoder>,
-        device: &Arc<Device<A>>,
+        device: &Arc<Device>,
         label: &Label,
     ) -> Self {
         CommandBuffer {
@@ -373,8 +373,8 @@ impl<A: HalApi> CommandBuffer<A> {
 
     pub(crate) fn insert_barriers_from_tracker(
         raw: &mut dyn hal::DynCommandEncoder,
-        base: &mut Tracker<A>,
-        head: &Tracker<A>,
+        base: &mut Tracker,
+        head: &Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers");
@@ -387,8 +387,8 @@ impl<A: HalApi> CommandBuffer<A> {
 
     pub(crate) fn insert_barriers_from_scope(
         raw: &mut dyn hal::DynCommandEncoder,
-        base: &mut Tracker<A>,
-        head: &UsageScope<A>,
+        base: &mut Tracker,
+        head: &UsageScope,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers");
@@ -401,7 +401,7 @@ impl<A: HalApi> CommandBuffer<A> {
 
     pub(crate) fn drain_barriers(
         raw: &mut dyn hal::DynCommandEncoder,
-        base: &mut Tracker<A>,
+        base: &mut Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("drain_barriers");
@@ -425,8 +425,8 @@ impl<A: HalApi> CommandBuffer<A> {
 
     pub(crate) fn insert_barriers_from_device_tracker(
         raw: &mut dyn hal::DynCommandEncoder,
-        base: &mut DeviceTracker<A>,
-        head: &Tracker<A>,
+        base: &mut DeviceTracker,
+        head: &Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers_from_device_tracker");
@@ -448,7 +448,7 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 }
 
-impl<A: HalApi> CommandBuffer<A> {
+impl CommandBuffer {
     fn lock_encoder_impl(&self, lock: bool) -> Result<(), CommandEncoderError> {
         let mut cmd_buf_data_guard = self.data.lock();
         let cmd_buf_data = cmd_buf_data_guard.as_mut().unwrap();
@@ -508,7 +508,7 @@ impl<A: HalApi> CommandBuffer<A> {
         }
     }
 
-    pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands<A> {
+    pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands {
         let data = self.data.lock().take().unwrap();
         BakedCommands {
             encoder: data.encoder.raw,
@@ -519,17 +519,17 @@ impl<A: HalApi> CommandBuffer<A> {
         }
     }
 
-    pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands<A> {
+    pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands {
         let mut command_buffer = Arc::into_inner(self)
             .expect("CommandBuffer cannot be destroyed because is still in use");
         command_buffer.extract_baked_commands()
     }
 }
 
-crate::impl_resource_type_generic!(CommandBuffer);
+crate::impl_resource_type!(CommandBuffer);
 crate::impl_labeled!(CommandBuffer);
 crate::impl_parent_device!(CommandBuffer);
-crate::impl_storage_item_generic!(CommandBuffer);
+crate::impl_storage_item!(CommandBuffer);
 
 /// A stream of commands for a render pass or compute pass.
 ///
@@ -609,14 +609,14 @@ pub enum CommandEncoderError {
 }
 
 impl Global {
-    pub fn command_encoder_finish<A: HalApi>(
+    pub fn command_encoder_finish(
         &self,
         encoder_id: id::CommandEncoderId,
         _desc: &wgt::CommandBufferDescriptor<Label>,
     ) -> (id::CommandBufferId, Option<CommandEncoderError>) {
         profiling::scope!("CommandEncoder::finish");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => {
@@ -651,7 +651,7 @@ impl Global {
         (encoder_id.into_command_buffer_id(), error)
     }
 
-    pub fn command_encoder_push_debug_group<A: HalApi>(
+    pub fn command_encoder_push_debug_group(
         &self,
         encoder_id: id::CommandEncoderId,
         label: &str,
@@ -659,7 +659,7 @@ impl Global {
         profiling::scope!("CommandEncoder::push_debug_group");
         api_log!("CommandEncoder::push_debug_group {label}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => cmd_buf,
@@ -687,7 +687,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_insert_debug_marker<A: HalApi>(
+    pub fn command_encoder_insert_debug_marker(
         &self,
         encoder_id: id::CommandEncoderId,
         label: &str,
@@ -695,7 +695,7 @@ impl Global {
         profiling::scope!("CommandEncoder::insert_debug_marker");
         api_log!("CommandEncoder::insert_debug_marker {label}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => cmd_buf,
@@ -724,14 +724,14 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_pop_debug_group<A: HalApi>(
+    pub fn command_encoder_pop_debug_group(
         &self,
         encoder_id: id::CommandEncoderId,
     ) -> Result<(), CommandEncoderError> {
         profiling::scope!("CommandEncoder::pop_debug_marker");
         api_log!("CommandEncoder::pop_debug_group");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => cmd_buf,
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
index 26997ebd8b..de5103ac88 100644
--- a/wgpu-core/src/command/query.rs
+++ b/wgpu-core/src/command/query.rs
@@ -4,7 +4,6 @@ use crate::{
     command::{CommandBuffer, CommandEncoderError},
     device::{DeviceError, MissingFeatures},
     global::Global,
-    hal_api::HalApi,
     id,
     init_tracker::MemoryInitKind,
     resource::{
@@ -18,17 +17,17 @@ use thiserror::Error;
 use wgt::BufferAddress;
 
 #[derive(Debug)]
-pub(crate) struct QueryResetMap<A: HalApi> {
-    map: FastHashMap<TrackerIndex, (Vec<bool>, Arc<QuerySet<A>>)>,
+pub(crate) struct QueryResetMap {
+    map: FastHashMap<TrackerIndex, (Vec<bool>, Arc<QuerySet>)>,
 }
-impl<A: HalApi> QueryResetMap<A> {
+impl QueryResetMap {
     pub fn new() -> Self {
         Self {
             map: FastHashMap::default(),
         }
     }
 
-    pub fn use_query_set(&mut self, query_set: &Arc<QuerySet<A>>, query: u32) -> bool {
+    pub fn use_query_set(&mut self, query_set: &Arc<QuerySet>, query: u32) -> bool {
         let vec_pair = self
             .map
             .entry(query_set.tracker_index())
@@ -161,12 +160,12 @@ pub enum ResolveError {
     },
 }
 
-impl<A: HalApi> QuerySet<A> {
+impl QuerySet {
     fn validate_query(
         self: &Arc<Self>,
         query_type: SimplifiedQueryType,
         query_index: u32,
-        reset_state: Option<&mut QueryResetMap<A>>,
+        reset_state: Option<&mut QueryResetMap>,
     ) -> Result<(), QueryUseError> {
         // We need to defer our resets because we are in a renderpass,
         // add the usage to the reset map.
@@ -199,7 +198,7 @@ impl<A: HalApi> QuerySet<A> {
         self: &Arc<Self>,
         raw_encoder: &mut dyn hal::DynCommandEncoder,
         query_index: u32,
-        reset_state: Option<&mut QueryResetMap<A>>,
+        reset_state: Option<&mut QueryResetMap>,
     ) -> Result<(), QueryUseError> {
         let needs_reset = reset_state.is_none();
         self.validate_query(SimplifiedQueryType::Timestamp, query_index, reset_state)?;
@@ -216,13 +215,13 @@ impl<A: HalApi> QuerySet<A> {
     }
 }
 
-pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
-    query_set: Arc<QuerySet<A>>,
+pub(super) fn validate_and_begin_occlusion_query(
+    query_set: Arc<QuerySet>,
     raw_encoder: &mut dyn hal::DynCommandEncoder,
-    tracker: &mut StatelessTracker<QuerySet<A>>,
+    tracker: &mut StatelessTracker<QuerySet>,
     query_index: u32,
-    reset_state: Option<&mut QueryResetMap<A>>,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    reset_state: Option<&mut QueryResetMap>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     let needs_reset = reset_state.is_none();
     query_set.validate_query(SimplifiedQueryType::Occlusion, query_index, reset_state)?;
@@ -248,9 +247,9 @@ pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
     Ok(())
 }
 
-pub(super) fn end_occlusion_query<A: HalApi>(
+pub(super) fn end_occlusion_query(
     raw_encoder: &mut dyn hal::DynCommandEncoder,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
         unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
@@ -260,14 +259,14 @@ pub(super) fn end_occlusion_query<A: HalApi>(
     }
 }
 
-pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
-    query_set: Arc<QuerySet<A>>,
+pub(super) fn validate_and_begin_pipeline_statistics_query(
+    query_set: Arc<QuerySet>,
     raw_encoder: &mut dyn hal::DynCommandEncoder,
-    tracker: &mut StatelessTracker<QuerySet<A>>,
-    cmd_buf: &CommandBuffer<A>,
+    tracker: &mut StatelessTracker<QuerySet>,
+    cmd_buf: &CommandBuffer,
     query_index: u32,
-    reset_state: Option<&mut QueryResetMap<A>>,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    reset_state: Option<&mut QueryResetMap>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     query_set.same_device_as(cmd_buf)?;
 
@@ -299,9 +298,9 @@ pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
     Ok(())
 }
 
-pub(super) fn end_pipeline_statistics_query<A: HalApi>(
+pub(super) fn end_pipeline_statistics_query(
     raw_encoder: &mut dyn hal::DynCommandEncoder,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
         unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
@@ -312,13 +311,13 @@ pub(super) fn end_pipeline_statistics_query<A: HalApi>(
 }
 
 impl Global {
-    pub fn command_encoder_write_timestamp<A: HalApi>(
+    pub fn command_encoder_write_timestamp(
         &self,
         command_encoder_id: id::CommandEncoderId,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), QueryError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -361,7 +360,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_resolve_query_set<A: HalApi>(
+    pub fn command_encoder_resolve_query_set(
         &self,
         command_encoder_id: id::CommandEncoderId,
         query_set_id: id::QuerySetId,
@@ -370,7 +369,7 @@ impl Global {
         destination: id::BufferId,
         destination_offset: BufferAddress,
     ) -> Result<(), QueryError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 7e7f9a1af8..1128e60a54 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -21,7 +21,6 @@ use crate::{
         RenderPassCompatibilityError, RenderPassContext,
     },
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction},
     pipeline::{self, PipelineFlags},
@@ -133,11 +132,11 @@ pub struct RenderPassColorAttachment {
 
 /// Describes a color attachment to a render pass.
 #[derive(Debug)]
-struct ArcRenderPassColorAttachment<A: HalApi> {
+struct ArcRenderPassColorAttachment {
     /// The view to use as an attachment.
-    pub view: Arc<TextureView<A>>,
+    pub view: Arc<TextureView>,
     /// The view that will receive the resolved output if multisampling is used.
-    pub resolve_target: Option<Arc<TextureView<A>>>,
+    pub resolve_target: Option<Arc<TextureView>>,
     /// What operations will be performed on this color attachment.
     pub channel: PassChannel<Color>,
 }
@@ -156,16 +155,16 @@ pub struct RenderPassDepthStencilAttachment {
 }
 /// Describes a depth/stencil attachment to a render pass.
 #[derive(Debug)]
-pub struct ArcRenderPassDepthStencilAttachment<A: HalApi> {
+pub struct ArcRenderPassDepthStencilAttachment {
     /// The view to use as an attachment.
-    pub view: Arc<TextureView<A>>,
+    pub view: Arc<TextureView>,
     /// What operations will be performed on the depth part of the attachment.
     pub depth: PassChannel<f32>,
     /// What operations will be performed on the stencil part of the attachment.
     pub stencil: PassChannel<u32>,
 }
 
-impl<A: HalApi> ArcRenderPassDepthStencilAttachment<A> {
+impl ArcRenderPassDepthStencilAttachment {
     /// Validate the given aspects' read-only flags against their load
     /// and store ops.
     ///
@@ -218,45 +217,45 @@ pub struct RenderPassDescriptor<'a> {
 }
 
 /// Describes the attachments of a render pass.
-struct ArcRenderPassDescriptor<'a, A: HalApi> {
+struct ArcRenderPassDescriptor<'a> {
     pub label: &'a Label<'a>,
     /// The color attachments of the render pass.
     pub color_attachments:
-        ArrayVec<Option<ArcRenderPassColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>,
+        ArrayVec<Option<ArcRenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
     /// The depth and stencil attachment of the render pass, if any.
-    pub depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
+    pub depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+    pub timestamp_writes: Option<ArcPassTimestampWrites>,
     /// Defines where the occlusion query results will be stored for this pass.
-    pub occlusion_query_set: Option<Arc<QuerySet<A>>>,
+    pub occlusion_query_set: Option<Arc<QuerySet>>,
 }
 
-pub struct RenderPass<A: HalApi> {
+pub struct RenderPass {
     /// All pass data & records is stored here.
     ///
     /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
     /// Any attempt to record more commands will result in a validation error.
-    base: Option<BasePass<ArcRenderCommand<A>>>,
+    base: Option<BasePass<ArcRenderCommand>>,
 
     /// Parent command buffer that this pass records commands into.
     ///
     /// If it is none, this pass is invalid and any operation on it will return an error.
-    parent: Option<Arc<CommandBuffer<A>>>,
+    parent: Option<Arc<CommandBuffer>>,
 
     color_attachments:
-        ArrayVec<Option<ArcRenderPassColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>,
-    depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
-    timestamp_writes: Option<ArcPassTimestampWrites<A>>,
-    occlusion_query_set: Option<Arc<QuerySet<A>>>,
+        ArrayVec<Option<ArcRenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
+    depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+    timestamp_writes: Option<ArcPassTimestampWrites>,
+    occlusion_query_set: Option<Arc<QuerySet>>,
 
     // Resource binding dedupe state.
     current_bind_groups: BindGroupStateChange,
     current_pipeline: StateChange<id::RenderPipelineId>,
 }
 
-impl<A: HalApi> RenderPass<A> {
+impl RenderPass {
     /// If the parent command buffer is invalid, the returned pass will be invalid.
-    fn new(parent: Option<Arc<CommandBuffer<A>>>, desc: ArcRenderPassDescriptor<A>) -> Self {
+    fn new(parent: Option<Arc<CommandBuffer>>, desc: ArcRenderPassDescriptor) -> Self {
         let ArcRenderPassDescriptor {
             label,
             timestamp_writes,
@@ -286,7 +285,7 @@ impl<A: HalApi> RenderPass<A> {
     fn base_mut<'a>(
         &'a mut self,
         scope: PassErrorScope,
-    ) -> Result<&'a mut BasePass<ArcRenderCommand<A>>, RenderPassError> {
+    ) -> Result<&'a mut BasePass<ArcRenderCommand>, RenderPassError> {
         self.base
             .as_mut()
             .ok_or(RenderPassErrorInner::PassEnded)
@@ -294,7 +293,7 @@ impl<A: HalApi> RenderPass<A> {
     }
 }
 
-impl<A: HalApi> fmt::Debug for RenderPass<A> {
+impl fmt::Debug for RenderPass {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("RenderPass")
             .field("label", &self.label())
@@ -444,38 +443,38 @@ impl VertexState {
     }
 }
 
-struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi> {
+struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
     pipeline_flags: PipelineFlags,
-    binder: Binder<A>,
+    binder: Binder,
     blend_constant: OptionalState,
     stencil_reference: u32,
-    pipeline: Option<Arc<RenderPipeline<A>>>,
+    pipeline: Option<Arc<RenderPipeline>>,
     index: IndexState,
     vertex: VertexState,
     debug_scope_depth: u32,
 
-    info: RenderPassInfo<'scope, A>,
+    info: RenderPassInfo<'scope>,
 
     snatch_guard: &'snatch_guard SnatchGuard<'snatch_guard>,
 
-    device: &'cmd_buf Arc<Device<A>>,
+    device: &'cmd_buf Arc<Device>,
 
     raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
 
-    tracker: &'cmd_buf mut Tracker<A>,
-    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions<A>,
+    tracker: &'cmd_buf mut Tracker,
+    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
+    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
 
     temp_offsets: Vec<u32>,
     dynamic_offset_count: usize,
     string_offset: usize,
 
-    active_occlusion_query: Option<(Arc<QuerySet<A>>, u32)>,
-    active_pipeline_statistics_query: Option<(Arc<QuerySet<A>>, u32)>,
+    active_occlusion_query: Option<(Arc<QuerySet>, u32)>,
+    active_pipeline_statistics_query: Option<(Arc<QuerySet>, u32)>,
 }
 
-impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi>
-    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A>
+impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
 {
     fn is_ready(&self, indexed: bool) -> Result<(), DrawError> {
         if let Some(pipeline) = self.pipeline.as_ref() {
@@ -747,14 +746,14 @@ where
     }
 }
 
-struct RenderAttachment<A: HalApi> {
-    texture: Arc<Texture<A>>,
+struct RenderAttachment {
+    texture: Arc<Texture>,
     selector: TextureSelector,
     usage: hal::TextureUses,
 }
 
-impl<A: HalApi> TextureView<A> {
-    fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment<A> {
+impl TextureView {
+    fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment {
         RenderAttachment {
             texture: self.parent.clone(),
             selector: self.selector.clone(),
@@ -766,26 +765,26 @@ impl<A: HalApi> TextureView<A> {
 const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_ATTACHMENTS + hal::MAX_COLOR_ATTACHMENTS + 1;
 type AttachmentDataVec<T> = ArrayVec<T, MAX_TOTAL_ATTACHMENTS>;
 
-struct RenderPassInfo<'d, A: HalApi> {
+struct RenderPassInfo<'d> {
     context: RenderPassContext,
-    usage_scope: UsageScope<'d, A>,
+    usage_scope: UsageScope<'d>,
     /// All render attachments, including depth/stencil
-    render_attachments: AttachmentDataVec<RenderAttachment<A>>,
+    render_attachments: AttachmentDataVec<RenderAttachment>,
     is_depth_read_only: bool,
     is_stencil_read_only: bool,
     extent: wgt::Extent3d,
 
-    pending_discard_init_fixups: SurfacesInDiscardState<A>,
-    divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, Arc<TextureView<A>>)>,
+    pending_discard_init_fixups: SurfacesInDiscardState,
+    divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, Arc<TextureView>)>,
     multiview: Option<NonZeroU32>,
 }
 
-impl<'d, A: HalApi> RenderPassInfo<'d, A> {
+impl<'d> RenderPassInfo<'d> {
     fn add_pass_texture_init_actions<V>(
         channel: &PassChannel<V>,
-        texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-        view: &TextureView<A>,
-        pending_discard_init_fixups: &mut SurfacesInDiscardState<A>,
+        texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+        view: &TextureView,
+        pending_discard_init_fixups: &mut SurfacesInDiscardState,
     ) {
         if channel.load_op == LoadOp::Load {
             pending_discard_init_fixups.extend(texture_memory_actions.register_init_action(
@@ -816,19 +815,19 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
     }
 
     fn start(
-        device: &'d Arc<Device<A>>,
+        device: &'d Arc<Device>,
         hal_label: Option<&str>,
         color_attachments: ArrayVec<
-            Option<ArcRenderPassColorAttachment<A>>,
+            Option<ArcRenderPassColorAttachment>,
             { hal::MAX_COLOR_ATTACHMENTS },
         >,
-        mut depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
-        mut timestamp_writes: Option<ArcPassTimestampWrites<A>>,
-        mut occlusion_query_set: Option<Arc<QuerySet<A>>>,
+        mut depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+        mut timestamp_writes: Option<ArcPassTimestampWrites>,
+        mut occlusion_query_set: Option<Arc<QuerySet>>,
         encoder: &mut CommandEncoder,
-        trackers: &mut Tracker<A>,
-        texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-        pending_query_resets: &mut QueryResetMap<A>,
+        trackers: &mut Tracker,
+        texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+        pending_query_resets: &mut QueryResetMap,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<Self, RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::start");
@@ -839,7 +838,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         let mut is_depth_read_only = false;
         let mut is_stencil_read_only = false;
 
-        let mut render_attachments = AttachmentDataVec::<RenderAttachment<A>>::new();
+        let mut render_attachments = AttachmentDataVec::<RenderAttachment>::new();
         let mut discarded_surfaces = AttachmentDataVec::new();
         let mut pending_discard_init_fixups = SurfacesInDiscardState::new();
         let mut divergent_discarded_depth_stencil_aspect = None;
@@ -853,7 +852,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
         let mut detected_multiview: Option<Option<NonZeroU32>> = None;
 
-        let mut check_multiview = |view: &TextureView<A>| {
+        let mut check_multiview = |view: &TextureView| {
             // Get the multiview configuration for this texture view
             let layers = view.selector.layers.end - view.selector.layers.start;
             let this_multiview = if layers >= 2 {
@@ -884,7 +883,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
             Ok(())
         };
-        let mut add_view = |view: &TextureView<A>, location| {
+        let mut add_view = |view: &TextureView, location| {
             let render_extent = view.render_extent.map_err(|reason| {
                 RenderPassErrorInner::TextureViewIsNotRenderable { location, reason }
             })?;
@@ -1048,7 +1047,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                 color_attachments_hal.push(None);
                 continue;
             };
-            let color_view: &TextureView<A> = &at.view;
+            let color_view: &TextureView = &at.view;
             color_view.same_device(device)?;
             check_multiview(color_view)?;
             add_view(
@@ -1256,7 +1255,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         mut self,
         raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
-    ) -> Result<(UsageScope<'d, A>, SurfacesInDiscardState<A>), RenderPassErrorInner> {
+    ) -> Result<(UsageScope<'d>, SurfacesInDiscardState), RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::finish");
         unsafe {
             raw.end_render_pass();
@@ -1332,16 +1331,16 @@ impl Global {
     /// Any operation on an invalid pass will return an error.
     ///
     /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
-    pub fn command_encoder_create_render_pass<A: HalApi>(
+    pub fn command_encoder_create_render_pass(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &RenderPassDescriptor<'_>,
-    ) -> (RenderPass<A>, Option<CommandEncoderError>) {
-        fn fill_arc_desc<A: HalApi>(
-            hub: &crate::hub::Hub<A>,
+    ) -> (RenderPass, Option<CommandEncoderError>) {
+        fn fill_arc_desc(
+            hub: &crate::hub::Hub,
             desc: &RenderPassDescriptor<'_>,
-            arc_desc: &mut ArcRenderPassDescriptor<A>,
-            device: &Device<A>,
+            arc_desc: &mut ArcRenderPassDescriptor,
+            device: &Device,
         ) -> Result<(), CommandEncoderError> {
             let query_sets = hub.query_sets.read();
             let texture_views = hub.texture_views.read();
@@ -1436,7 +1435,7 @@ impl Global {
             Ok(())
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let mut arc_desc = ArcRenderPassDescriptor {
             label: &desc.label,
             timestamp_writes: None,
@@ -1466,18 +1465,18 @@ impl Global {
     ///
     /// If creation fails, an invalid pass is returned.
     /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_render_pass_dyn<A: HalApi>(
+    pub fn command_encoder_create_render_pass_dyn(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &RenderPassDescriptor<'_>,
     ) -> (Box<dyn DynRenderPass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_render_pass::<A>(encoder_id, desc);
+        let (pass, err) = self.command_encoder_create_render_pass(encoder_id, desc);
         (Box::new(pass), err)
     }
 
     #[doc(hidden)]
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn render_pass_end_with_unresolved_commands<A: HalApi>(
+    pub fn render_pass_end_with_unresolved_commands(
         &self,
         encoder_id: id::CommandEncoderId,
         base: BasePass<super::RenderCommand>,
@@ -1490,7 +1489,7 @@ impl Global {
 
         #[cfg(feature = "trace")]
         {
-            let hub = A::hub(self);
+            let hub = &self.hub;
 
             let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
                 Ok(cmd_buf) => cmd_buf,
@@ -1525,7 +1524,7 @@ impl Global {
             push_constant_data,
         } = base;
 
-        let (mut render_pass, encoder_error) = self.command_encoder_create_render_pass::<A>(
+        let (mut render_pass, encoder_error) = self.command_encoder_create_render_pass(
             encoder_id,
             &RenderPassDescriptor {
                 label: label.as_deref().map(Cow::Borrowed),
@@ -1542,7 +1541,7 @@ impl Global {
             });
         };
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         render_pass.base = Some(BasePass {
             label,
             commands: super::RenderCommand::resolve_render_command_ids(hub, &commands)?,
@@ -1562,10 +1561,7 @@ impl Global {
     }
 
     #[doc(hidden)]
-    pub fn render_pass_end<A: HalApi>(
-        &self,
-        pass: &mut RenderPass<A>,
-    ) -> Result<(), RenderPassError> {
+    pub fn render_pass_end(&self, pass: &mut RenderPass) -> Result<(), RenderPassError> {
         let pass_scope = PassErrorScope::Pass;
 
         let base = pass
@@ -1945,13 +1941,13 @@ impl Global {
     }
 }
 
-fn set_bind_group<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
+fn set_bind_group(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
     dynamic_offsets: &[DynamicOffset],
     index: u32,
     num_dynamic_offsets: usize,
-    bind_group: Arc<BindGroup<A>>,
+    bind_group: Arc<BindGroup>,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!(
         "RenderPass::set_bind_group {index} {}",
@@ -2026,10 +2022,10 @@ fn set_bind_group<A: HalApi>(
     Ok(())
 }
 
-fn set_pipeline<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    pipeline: Arc<RenderPipeline<A>>,
+fn set_pipeline(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    pipeline: Arc<RenderPipeline>,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::set_pipeline {}", pipeline.error_ident());
 
@@ -2135,10 +2131,10 @@ fn set_pipeline<A: HalApi>(
     Ok(())
 }
 
-fn set_index_buffer<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    buffer: Arc<crate::resource::Buffer<A>>,
+fn set_index_buffer(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    buffer: Arc<crate::resource::Buffer>,
     index_format: IndexFormat,
     offset: u64,
     size: Option<BufferSize>,
@@ -2181,11 +2177,11 @@ fn set_index_buffer<A: HalApi>(
     Ok(())
 }
 
-fn set_vertex_buffer<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
+fn set_vertex_buffer(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
     slot: u32,
-    buffer: Arc<crate::resource::Buffer<A>>,
+    buffer: Arc<crate::resource::Buffer>,
     offset: u64,
     size: Option<BufferSize>,
 ) -> Result<(), RenderPassErrorInner> {
@@ -2247,7 +2243,7 @@ fn set_vertex_buffer<A: HalApi>(
     Ok(())
 }
 
-fn set_blend_constant<A: HalApi>(state: &mut State<A>, color: &Color) {
+fn set_blend_constant(state: &mut State, color: &Color) {
     api_log!("RenderPass::set_blend_constant");
 
     state.blend_constant = OptionalState::Set;
@@ -2262,7 +2258,7 @@ fn set_blend_constant<A: HalApi>(state: &mut State<A>, color: &Color) {
     }
 }
 
-fn set_stencil_reference<A: HalApi>(state: &mut State<A>, value: u32) {
+fn set_stencil_reference(state: &mut State, value: u32) {
     api_log!("RenderPass::set_stencil_reference {value}");
 
     state.stencil_reference = value;
@@ -2276,8 +2272,8 @@ fn set_stencil_reference<A: HalApi>(state: &mut State<A>, value: u32) {
     }
 }
 
-fn set_viewport<A: HalApi>(
-    state: &mut State<A>,
+fn set_viewport(
+    state: &mut State,
     rect: Rect<f32>,
     depth_min: f32,
     depth_max: f32,
@@ -2307,8 +2303,8 @@ fn set_viewport<A: HalApi>(
     Ok(())
 }
 
-fn set_push_constant<A: HalApi>(
-    state: &mut State<A>,
+fn set_push_constant(
+    state: &mut State,
     push_constant_data: &[u32],
     stages: ShaderStages,
     offset: u32,
@@ -2341,10 +2337,7 @@ fn set_push_constant<A: HalApi>(
     Ok(())
 }
 
-fn set_scissor<A: HalApi>(
-    state: &mut State<A>,
-    rect: Rect<u32>,
-) -> Result<(), RenderPassErrorInner> {
+fn set_scissor(state: &mut State, rect: Rect<u32>) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::set_scissor_rect {rect:?}");
 
     if rect.x + rect.w > state.info.extent.width || rect.y + rect.h > state.info.extent.height {
@@ -2362,8 +2355,8 @@ fn set_scissor<A: HalApi>(
     Ok(())
 }
 
-fn draw<A: HalApi>(
-    state: &mut State<A>,
+fn draw(
+    state: &mut State,
     vertex_count: u32,
     instance_count: u32,
     first_vertex: u32,
@@ -2402,8 +2395,8 @@ fn draw<A: HalApi>(
     Ok(())
 }
 
-fn draw_indexed<A: HalApi>(
-    state: &mut State<A>,
+fn draw_indexed(
+    state: &mut State,
     index_count: u32,
     instance_count: u32,
     first_index: u32,
@@ -2446,10 +2439,10 @@ fn draw_indexed<A: HalApi>(
     Ok(())
 }
 
-fn multi_draw_indirect<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    indirect_buffer: Arc<crate::resource::Buffer<A>>,
+fn multi_draw_indirect(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    indirect_buffer: Arc<crate::resource::Buffer>,
     offset: u64,
     count: Option<NonZeroU32>,
     indexed: bool,
@@ -2521,12 +2514,12 @@ fn multi_draw_indirect<A: HalApi>(
     Ok(())
 }
 
-fn multi_draw_indirect_count<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    indirect_buffer: Arc<crate::resource::Buffer<A>>,
+fn multi_draw_indirect_count(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    indirect_buffer: Arc<crate::resource::Buffer>,
     offset: u64,
-    count_buffer: Arc<crate::resource::Buffer<A>>,
+    count_buffer: Arc<crate::resource::Buffer>,
     count_buffer_offset: u64,
     max_count: u32,
     indexed: bool,
@@ -2629,7 +2622,7 @@ fn multi_draw_indirect_count<A: HalApi>(
     Ok(())
 }
 
-fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn push_debug_group(state: &mut State, string_data: &[u8], len: usize) {
     state.debug_scope_depth += 1;
     if !state
         .device
@@ -2647,7 +2640,7 @@ fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: us
     state.string_offset += len;
 }
 
-fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), RenderPassErrorInner> {
+fn pop_debug_group(state: &mut State) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::pop_debug_group");
 
     if state.debug_scope_depth == 0 {
@@ -2666,7 +2659,7 @@ fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), RenderPassErro
     Ok(())
 }
 
-fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn insert_debug_marker(state: &mut State, string_data: &[u8], len: usize) {
     if !state
         .device
         .instance_flags
@@ -2682,11 +2675,11 @@ fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len:
     state.string_offset += len;
 }
 
-fn write_timestamp<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    pending_query_resets: &mut QueryResetMap<A>,
-    query_set: Arc<QuerySet<A>>,
+fn write_timestamp(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    pending_query_resets: &mut QueryResetMap,
+    query_set: Arc<QuerySet>,
     query_index: u32,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!(
@@ -2710,10 +2703,10 @@ fn write_timestamp<A: HalApi>(
     Ok(())
 }
 
-fn execute_bundle<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    bundle: Arc<super::RenderBundle<A>>,
+fn execute_bundle(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    bundle: Arc<super::RenderBundle>,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::execute_bundle {}", bundle.error_ident());
 
@@ -2774,12 +2767,12 @@ fn execute_bundle<A: HalApi>(
 }
 
 impl Global {
-    fn resolve_render_pass_buffer_id<A: HalApi>(
+    fn resolve_render_pass_buffer_id(
         &self,
         scope: PassErrorScope,
         buffer_id: id::Id<id::markers::Buffer>,
-    ) -> Result<Arc<crate::resource::Buffer<A>>, RenderPassError> {
-        let hub = A::hub(self);
+    ) -> Result<Arc<crate::resource::Buffer>, RenderPassError> {
+        let hub = &self.hub;
         let buffer = hub
             .buffers
             .get(buffer_id)
@@ -2789,12 +2782,12 @@ impl Global {
         Ok(buffer)
     }
 
-    fn resolve_render_pass_query_set<A: HalApi>(
+    fn resolve_render_pass_query_set(
         &self,
         scope: PassErrorScope,
         query_set_id: id::Id<id::markers::QuerySet>,
-    ) -> Result<Arc<QuerySet<A>>, RenderPassError> {
-        let hub = A::hub(self);
+    ) -> Result<Arc<QuerySet>, RenderPassError> {
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
             .get(query_set_id)
@@ -2804,9 +2797,9 @@ impl Global {
         Ok(query_set)
     }
 
-    pub fn render_pass_set_bind_group<A: HalApi>(
+    pub fn render_pass_set_bind_group(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         index: u32,
         bind_group_id: id::BindGroupId,
         offsets: &[DynamicOffset],
@@ -2828,7 +2821,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bind_group = hub
             .bind_groups
             .get(bind_group_id)
@@ -2844,9 +2837,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_pipeline<A: HalApi>(
+    pub fn render_pass_set_pipeline(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         pipeline_id: id::RenderPipelineId,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::SetPipelineRender;
@@ -2859,7 +2852,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let pipeline = hub
             .render_pipelines
             .get(pipeline_id)
@@ -2871,9 +2864,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_index_buffer<A: HalApi>(
+    pub fn render_pass_set_index_buffer(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         index_format: IndexFormat,
         offset: BufferAddress,
@@ -2892,9 +2885,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_vertex_buffer<A: HalApi>(
+    pub fn render_pass_set_vertex_buffer(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         slot: u32,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2913,9 +2906,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_blend_constant<A: HalApi>(
+    pub fn render_pass_set_blend_constant(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         color: Color,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::SetBlendConstant;
@@ -2927,9 +2920,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_stencil_reference<A: HalApi>(
+    pub fn render_pass_set_stencil_reference(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         value: u32,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::SetStencilReference;
@@ -2941,9 +2934,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_viewport<A: HalApi>(
+    pub fn render_pass_set_viewport(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         x: f32,
         y: f32,
         w: f32,
@@ -2963,9 +2956,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_scissor_rect<A: HalApi>(
+    pub fn render_pass_set_scissor_rect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         x: u32,
         y: u32,
         w: u32,
@@ -2980,9 +2973,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_push_constants<A: HalApi>(
+    pub fn render_pass_set_push_constants(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         stages: ShaderStages,
         offset: u32,
         data: &[u8],
@@ -3019,9 +3012,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw<A: HalApi>(
+    pub fn render_pass_draw(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         vertex_count: u32,
         instance_count: u32,
         first_vertex: u32,
@@ -3043,9 +3036,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw_indexed<A: HalApi>(
+    pub fn render_pass_draw_indexed(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         index_count: u32,
         instance_count: u32,
         first_index: u32,
@@ -3069,9 +3062,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw_indirect<A: HalApi>(
+    pub fn render_pass_draw_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), RenderPassError> {
@@ -3091,9 +3084,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw_indexed_indirect<A: HalApi>(
+    pub fn render_pass_draw_indexed_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), RenderPassError> {
@@ -3113,9 +3106,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indirect<A: HalApi>(
+    pub fn render_pass_multi_draw_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count: u32,
@@ -3136,9 +3129,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indexed_indirect<A: HalApi>(
+    pub fn render_pass_multi_draw_indexed_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count: u32,
@@ -3159,9 +3152,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indirect_count<A: HalApi>(
+    pub fn render_pass_multi_draw_indirect_count(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count_buffer_id: id::BufferId,
@@ -3175,7 +3168,7 @@ impl Global {
         let base = pass.base_mut(scope)?;
 
         // Don't use resolve_render_pass_buffer_id here, because we don't want to take the read-lock twice.
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let buffers = hub.buffers.read();
         let buffer = buffers
             .get_owned(buffer_id)
@@ -3199,9 +3192,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indexed_indirect_count<A: HalApi>(
+    pub fn render_pass_multi_draw_indexed_indirect_count(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count_buffer_id: id::BufferId,
@@ -3215,7 +3208,7 @@ impl Global {
         let base = pass.base_mut(scope)?;
 
         // Don't use resolve_render_pass_buffer_id here, because we don't want to take the read-lock twice.
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let buffers = hub.buffers.read();
         let buffer = buffers
             .get_owned(buffer_id)
@@ -3240,9 +3233,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_push_debug_group<A: HalApi>(
+    pub fn render_pass_push_debug_group(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         label: &str,
         color: u32,
     ) -> Result<(), RenderPassError> {
@@ -3259,9 +3252,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_pop_debug_group<A: HalApi>(
+    pub fn render_pass_pop_debug_group(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
     ) -> Result<(), RenderPassError> {
         let base = pass.base_mut(PassErrorScope::PopDebugGroup)?;
 
@@ -3270,9 +3263,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_insert_debug_marker<A: HalApi>(
+    pub fn render_pass_insert_debug_marker(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         label: &str,
         color: u32,
     ) -> Result<(), RenderPassError> {
@@ -3289,9 +3282,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_write_timestamp<A: HalApi>(
+    pub fn render_pass_write_timestamp(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), RenderPassError> {
@@ -3306,9 +3299,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_begin_occlusion_query<A: HalApi>(
+    pub fn render_pass_begin_occlusion_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         query_index: u32,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::BeginOcclusionQuery;
@@ -3320,9 +3313,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_end_occlusion_query<A: HalApi>(
+    pub fn render_pass_end_occlusion_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::EndOcclusionQuery;
         let base = pass.base_mut(scope)?;
@@ -3332,9 +3325,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_begin_pipeline_statistics_query<A: HalApi>(
+    pub fn render_pass_begin_pipeline_statistics_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), RenderPassError> {
@@ -3350,9 +3343,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_end_pipeline_statistics_query<A: HalApi>(
+    pub fn render_pass_end_pipeline_statistics_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::EndPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
@@ -3363,15 +3356,15 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_execute_bundles<A: HalApi>(
+    pub fn render_pass_execute_bundles(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         render_bundle_ids: &[id::RenderBundleId],
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::ExecuteBundle;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bundles = hub.render_bundles.read();
 
         for &bundle_id in render_bundle_ids {
diff --git a/wgpu-core/src/command/render_command.rs b/wgpu-core/src/command/render_command.rs
index 287aa888f1..891ee3cfbc 100644
--- a/wgpu-core/src/command/render_command.rs
+++ b/wgpu-core/src/command/render_command.rs
@@ -1,6 +1,5 @@
 use crate::{
     binding_model::BindGroup,
-    hal_api::HalApi,
     id,
     pipeline::RenderPipeline,
     resource::{Buffer, QuerySet},
@@ -126,10 +125,10 @@ pub enum RenderCommand {
 impl RenderCommand {
     /// Resolves all ids in a list of commands into the corresponding resource Arc.
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn resolve_render_command_ids<A: HalApi>(
-        hub: &crate::hub::Hub<A>,
+    pub fn resolve_render_command_ids(
+        hub: &crate::hub::Hub,
         commands: &[RenderCommand],
-    ) -> Result<Vec<ArcRenderCommand<A>>, super::RenderPassError> {
+    ) -> Result<Vec<ArcRenderCommand>, super::RenderPassError> {
         use super::{
             DrawKind, PassErrorScope, RenderCommandError, RenderPassError, RenderPassErrorInner,
         };
@@ -140,9 +139,9 @@ impl RenderCommand {
         let pipelines_guard = hub.render_pipelines.read();
         let render_bundles_guard = hub.render_bundles.read();
 
-        let resolved_commands: Vec<ArcRenderCommand<A>> = commands
+        let resolved_commands: Vec<ArcRenderCommand> = commands
             .iter()
-            .map(|c| -> Result<ArcRenderCommand<A>, RenderPassError> {
+            .map(|c| -> Result<ArcRenderCommand, RenderPassError> {
                 Ok(match *c {
                     RenderCommand::SetBindGroup {
                         index,
@@ -381,22 +380,22 @@ impl RenderCommand {
 /// Equivalent to `RenderCommand` with the Ids resolved into resource Arcs.
 #[doc(hidden)]
 #[derive(Clone, Debug)]
-pub enum ArcRenderCommand<A: HalApi> {
+pub enum ArcRenderCommand {
     SetBindGroup {
         index: u32,
         num_dynamic_offsets: usize,
-        bind_group: Arc<BindGroup<A>>,
+        bind_group: Arc<BindGroup>,
     },
-    SetPipeline(Arc<RenderPipeline<A>>),
+    SetPipeline(Arc<RenderPipeline>),
     SetIndexBuffer {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         index_format: wgt::IndexFormat,
         offset: BufferAddress,
         size: Option<BufferSize>,
     },
     SetVertexBuffer {
         slot: u32,
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: BufferAddress,
         size: Option<BufferSize>,
     },
@@ -450,16 +449,16 @@ pub enum ArcRenderCommand<A: HalApi> {
         first_instance: u32,
     },
     MultiDrawIndirect {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: BufferAddress,
         /// Count of `None` represents a non-multi call.
         count: Option<NonZeroU32>,
         indexed: bool,
     },
     MultiDrawIndirectCount {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: BufferAddress,
-        count_buffer: Arc<Buffer<A>>,
+        count_buffer: Arc<Buffer>,
         count_buffer_offset: BufferAddress,
         max_count: u32,
         indexed: bool,
@@ -474,7 +473,7 @@ pub enum ArcRenderCommand<A: HalApi> {
         len: usize,
     },
     WriteTimestamp {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
     BeginOcclusionQuery {
@@ -482,9 +481,9 @@ pub enum ArcRenderCommand<A: HalApi> {
     },
     EndOcclusionQuery,
     BeginPipelineStatisticsQuery {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
     EndPipelineStatisticsQuery,
-    ExecuteBundle(Arc<RenderBundle<A>>),
+    ExecuteBundle(Arc<RenderBundle>),
 }
diff --git a/wgpu-core/src/command/timestamp_writes.rs b/wgpu-core/src/command/timestamp_writes.rs
index 82ab13c6dd..e91b48534d 100644
--- a/wgpu-core/src/command/timestamp_writes.rs
+++ b/wgpu-core/src/command/timestamp_writes.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use crate::{hal_api::HalApi, id};
+use crate::id;
 
 /// Describes the writing of timestamp values in a render or compute pass.
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -15,9 +15,9 @@ pub struct PassTimestampWrites {
 }
 
 /// Describes the writing of timestamp values in a render or compute pass with the query set resolved.
-pub struct ArcPassTimestampWrites<A: HalApi> {
+pub struct ArcPassTimestampWrites {
     /// The query set to write the timestamps to.
-    pub query_set: Arc<crate::resource::QuerySet<A>>,
+    pub query_set: Arc<crate::resource::QuerySet>,
     /// The index of the query set at which a start timestamp of this pass is written, if any.
     pub beginning_of_pass_write_index: Option<u32>,
     /// The index of the query set at which an end timestamp of this pass is written, if any.
diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs
index 4ccc762720..de5ef9ed84 100644
--- a/wgpu-core/src/command/transfer.rs
+++ b/wgpu-core/src/command/transfer.rs
@@ -6,7 +6,6 @@ use crate::{
     conv,
     device::{Device, DeviceError, MissingDownlevelFlags},
     global::Global,
-    hal_api::HalApi,
     id::{BufferId, CommandEncoderId, TextureId},
     init_tracker::{
         has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange,
@@ -159,10 +158,10 @@ impl From<DeviceError> for CopyError {
     }
 }
 
-pub(crate) fn extract_texture_selector<A: HalApi>(
+pub(crate) fn extract_texture_selector(
     copy_texture: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Texture<A>,
+    texture: &Texture,
 ) -> Result<(TextureSelector, hal::TextureCopyBase), TransferError> {
     let format = texture.desc.format;
     let copy_aspect = hal::FormatAspects::new(format, copy_texture.aspect);
@@ -407,15 +406,15 @@ pub(crate) fn validate_texture_copy_range(
     Ok((copy_extent, array_layer_count))
 }
 
-fn handle_texture_init<A: HalApi>(
+fn handle_texture_init(
     init_kind: MemoryInitKind,
     encoder: &mut CommandEncoder,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     copy_texture: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), ClearError> {
     let init_action = TextureInitTrackerAction {
@@ -457,14 +456,14 @@ fn handle_texture_init<A: HalApi>(
 ///
 /// Ensure the source texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
-fn handle_src_texture_init<A: HalApi>(
+fn handle_src_texture_init(
     encoder: &mut CommandEncoder,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     source: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), TransferError> {
     handle_texture_init(
@@ -485,14 +484,14 @@ fn handle_src_texture_init<A: HalApi>(
 ///
 /// Ensure the destination texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
-fn handle_dst_texture_init<A: HalApi>(
+fn handle_dst_texture_init(
     encoder: &mut CommandEncoder,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     destination: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), TransferError> {
     // Attention: If we don't write full texture subresources, we need to a full
@@ -524,7 +523,7 @@ fn handle_dst_texture_init<A: HalApi>(
 }
 
 impl Global {
-    pub fn command_encoder_copy_buffer_to_buffer<A: HalApi>(
+    pub fn command_encoder_copy_buffer_to_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         source: BufferId,
@@ -541,7 +540,7 @@ impl Global {
         if source == destination {
             return Err(TransferError::SameSourceDestinationBuffer.into());
         }
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -697,7 +696,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_copy_buffer_to_texture<A: HalApi>(
+    pub fn command_encoder_copy_buffer_to_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyBuffer,
@@ -711,7 +710,7 @@ impl Global {
             destination.texture
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -865,7 +864,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_copy_texture_to_buffer<A: HalApi>(
+    pub fn command_encoder_copy_texture_to_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyTexture,
@@ -879,7 +878,7 @@ impl Global {
             destination.buffer
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -1045,7 +1044,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_copy_texture_to_texture<A: HalApi>(
+    pub fn command_encoder_copy_texture_to_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyTexture,
@@ -1059,7 +1058,7 @@ impl Global {
             destination.texture
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
diff --git a/wgpu-core/src/device/any_device.rs b/wgpu-core/src/device/any_device.rs
deleted file mode 100644
index e796bf0574..0000000000
--- a/wgpu-core/src/device/any_device.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use wgt::Backend;
-
-use super::Device;
-/// The `AnyDevice` type: a pointer to a `Device<A>` for any backend `A`.
-use crate::hal_api::HalApi;
-
-use std::fmt;
-use std::mem::ManuallyDrop;
-use std::ptr::NonNull;
-use std::sync::Arc;
-
-struct AnyDeviceVtable {
-    // We oppurtunistically store the backend here, since we now it will be used
-    // with backend selection and it can be stored in static memory.
-    backend: Backend,
-    // Drop glue which knows how to drop the stored data.
-    drop: unsafe fn(*mut ()),
-}
-
-/// A pointer to a `Device<A>`, for any backend `A`.
-///
-/// Any `AnyDevice` is just like an `Arc<Device<A>>`, except that the `A` type
-/// parameter is erased. To access the `Device`, you must downcast to a
-/// particular backend with the \[`downcast_ref`\] or \[`downcast_clone`\]
-/// methods.
-pub struct AnyDevice {
-    data: NonNull<()>,
-    vtable: &'static AnyDeviceVtable,
-}
-
-impl AnyDevice {
-    /// Return an `AnyDevice` that holds an owning `Arc` pointer to `device`.
-    pub fn new<A: HalApi>(device: Arc<Device<A>>) -> AnyDevice {
-        unsafe fn drop_glue<A: HalApi>(ptr: *mut ()) {
-            // Drop the arc this instance is holding.
-            unsafe {
-                _ = Arc::from_raw(ptr.cast::<Device<A>>());
-            }
-        }
-
-        // SAFETY: The pointer returned by Arc::into_raw is guaranteed to be
-        // non-null.
-        let data = unsafe { NonNull::new_unchecked(Arc::into_raw(device).cast_mut()) };
-
-        AnyDevice {
-            data: data.cast(),
-            vtable: &AnyDeviceVtable {
-                backend: A::VARIANT,
-                drop: drop_glue::<A>,
-            },
-        }
-    }
-
-    /// If `self` is an `Arc<Device<A>>`, return a reference to the
-    /// device.
-    pub fn downcast_ref<A: HalApi>(&self) -> Option<&Device<A>> {
-        if self.vtable.backend != A::VARIANT {
-            return None;
-        }
-
-        // SAFETY: We just checked the instance above implicitly by the backend
-        // that it was statically constructed through.
-        Some(unsafe { &*(self.data.as_ptr().cast::<Device<A>>()) })
-    }
-
-    /// If `self` is an `Arc<Device<A>>`, return a clone of that.
-    pub fn downcast_clone<A: HalApi>(&self) -> Option<Arc<Device<A>>> {
-        if self.vtable.backend != A::VARIANT {
-            return None;
-        }
-
-        // We need to prevent the destructor of the arc from running, since it
-        // refers to the instance held by this object. Dropping it would
-        // invalidate this object.
-        //
-        // SAFETY: We just checked the instance above implicitly by the backend
-        // that it was statically constructed through.
-        let this =
-            ManuallyDrop::new(unsafe { Arc::from_raw(self.data.as_ptr().cast::<Device<A>>()) });
-
-        // Cloning it increases the reference count, and we return a new arc
-        // instance.
-        Some((*this).clone())
-    }
-}
-
-impl Drop for AnyDevice {
-    fn drop(&mut self) {
-        unsafe { (self.vtable.drop)(self.data.as_ptr()) }
-    }
-}
-
-impl fmt::Debug for AnyDevice {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "AnyDevice<{}>", self.vtable.backend)
-    }
-}
-
-#[cfg(send_sync)]
-unsafe impl Send for AnyDevice {}
-#[cfg(send_sync)]
-unsafe impl Sync for AnyDevice {}
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index f456a00ca2..d9f983d1a8 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -31,12 +31,12 @@ use std::{borrow::Cow, ptr::NonNull, sync::atomic::Ordering};
 use super::{ImplicitPipelineIds, UserClosures};
 
 impl Global {
-    pub fn adapter_is_surface_supported<A: HalApi>(
+    pub fn adapter_is_surface_supported(
         &self,
         adapter_id: AdapterId,
         surface_id: SurfaceId,
     ) -> Result<bool, instance::IsSurfaceSupportedError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface_guard = self.surfaces.read();
         let adapter_guard = hub.adapters.read();
@@ -49,13 +49,13 @@ impl Global {
         Ok(adapter.is_surface_supported(surface))
     }
 
-    pub fn surface_get_capabilities<A: HalApi>(
+    pub fn surface_get_capabilities(
         &self,
         surface_id: SurfaceId,
         adapter_id: AdapterId,
     ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> {
         profiling::scope!("Surface::get_capabilities");
-        self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| {
+        self.fetch_adapter_and_surface::<_, _>(surface_id, adapter_id, |adapter, surface| {
             let mut hal_caps = surface.get_capabilities(adapter)?;
 
             hal_caps.formats.sort_by_key(|f| !f.is_srgb());
@@ -72,7 +72,6 @@ impl Global {
     }
 
     fn fetch_adapter_and_surface<
-        A: HalApi,
         F: FnOnce(&Adapter, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
         B,
     >(
@@ -81,7 +80,7 @@ impl Global {
         adapter_id: AdapterId,
         get_supported_callback: F,
     ) -> Result<B, instance::GetSurfaceSupportError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface_guard = self.surfaces.read();
         let adapter_guard = hub.adapters.read();
@@ -95,11 +94,8 @@ impl Global {
         get_supported_callback(adapter, surface)
     }
 
-    pub fn device_features<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> Result<wgt::Features, DeviceError> {
-        let hub = A::hub(self);
+    pub fn device_features(&self, device_id: DeviceId) -> Result<wgt::Features, DeviceError> {
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -109,11 +105,8 @@ impl Global {
         Ok(device.features)
     }
 
-    pub fn device_limits<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> Result<wgt::Limits, DeviceError> {
-        let hub = A::hub(self);
+    pub fn device_limits(&self, device_id: DeviceId) -> Result<wgt::Limits, DeviceError> {
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -123,11 +116,11 @@ impl Global {
         Ok(device.limits.clone())
     }
 
-    pub fn device_downlevel_properties<A: HalApi>(
+    pub fn device_downlevel_properties(
         &self,
         device_id: DeviceId,
     ) -> Result<wgt::DownlevelCapabilities, DeviceError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -137,7 +130,7 @@ impl Global {
         Ok(device.downlevel.clone())
     }
 
-    pub fn device_create_buffer<A: HalApi>(
+    pub fn device_create_buffer(
         &self,
         device_id: DeviceId,
         desc: &resource::BufferDescriptor,
@@ -145,8 +138,8 @@ impl Global {
     ) -> (id::BufferId, Option<CreateBufferError>) {
         profiling::scope!("Device::create_buffer");
 
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -220,16 +213,20 @@ impl Global {
     /// [`device_create_buffer`]: Global::device_create_buffer
     /// [`usage`]: https://www.w3.org/TR/webgpu/#dom-gputexturedescriptor-usage
     /// [`wgpu_types::BufferUsages`]: wgt::BufferUsages
-    pub fn create_buffer_error<A: HalApi>(&self, id_in: Option<id::BufferId>) {
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+    pub fn create_buffer_error(&self, backend: wgt::Backend, id_in: Option<id::BufferId>) {
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(backend, id_in);
 
         fid.assign_error();
     }
 
-    pub fn create_render_bundle_error<A: HalApi>(&self, id_in: Option<id::RenderBundleId>) {
-        let hub = A::hub(self);
-        let fid = hub.render_bundles.prepare(id_in);
+    pub fn create_render_bundle_error(
+        &self,
+        backend: wgt::Backend,
+        id_in: Option<id::RenderBundleId>,
+    ) {
+        let hub = &self.hub;
+        let fid = hub.render_bundles.prepare(backend, id_in);
 
         fid.assign_error();
     }
@@ -237,21 +234,21 @@ impl Global {
     /// Assign `id_in` an error with the given `label`.
     ///
     /// See `create_buffer_error` for more context and explanation.
-    pub fn create_texture_error<A: HalApi>(&self, id_in: Option<id::TextureId>) {
-        let hub = A::hub(self);
-        let fid = hub.textures.prepare(id_in);
+    pub fn create_texture_error(&self, backend: wgt::Backend, id_in: Option<id::TextureId>) {
+        let hub = &self.hub;
+        let fid = hub.textures.prepare(backend, id_in);
 
         fid.assign_error();
     }
 
     #[cfg(feature = "replay")]
-    pub fn device_set_buffer_data<A: HalApi>(
+    pub fn device_set_buffer_data(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         data: &[u8],
     ) -> BufferAccessResult {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -291,14 +288,11 @@ impl Global {
         Ok(())
     }
 
-    pub fn buffer_destroy<A: HalApi>(
-        &self,
-        buffer_id: id::BufferId,
-    ) -> Result<(), resource::DestroyError> {
+    pub fn buffer_destroy(&self, buffer_id: id::BufferId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Buffer::destroy");
         api_log!("Buffer::destroy {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -318,11 +312,11 @@ impl Global {
         buffer.destroy()
     }
 
-    pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId) {
+    pub fn buffer_drop(&self, buffer_id: id::BufferId) {
         profiling::scope!("Buffer::drop");
         api_log!("Buffer::drop {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = match hub.buffers.unregister(buffer_id) {
             Some(buffer) => buffer,
@@ -342,7 +336,7 @@ impl Global {
         );
     }
 
-    pub fn device_create_texture<A: HalApi>(
+    pub fn device_create_texture(
         &self,
         device_id: DeviceId,
         desc: &resource::TextureDescriptor,
@@ -350,9 +344,9 @@ impl Global {
     ) -> (id::TextureId, Option<resource::CreateTextureError>) {
         profiling::scope!("Device::create_texture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.textures.prepare(id_in);
+        let fid = hub.textures.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -387,7 +381,7 @@ impl Global {
     /// - `hal_texture` must be created from `device_id` corresponding raw handle.
     /// - `hal_texture` must be created respecting `desc`
     /// - `hal_texture` must be initialized
-    pub unsafe fn create_texture_from_hal<A: HalApi>(
+    pub unsafe fn create_texture_from_hal(
         &self,
         hal_texture: Box<dyn hal::DynTexture>,
         device_id: DeviceId,
@@ -396,9 +390,9 @@ impl Global {
     ) -> (id::TextureId, Option<resource::CreateTextureError>) {
         profiling::scope!("Device::create_texture_from_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.textures.prepare(id_in);
+        let fid = hub.textures.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -444,8 +438,8 @@ impl Global {
     ) -> (id::BufferId, Option<CreateBufferError>) {
         profiling::scope!("Device::create_buffer");
 
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(A::VARIANT, id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -460,7 +454,7 @@ impl Global {
                 trace.add(trace::Action::CreateBuffer(fid.id(), desc.clone()));
             }
 
-            let buffer = device.create_buffer_from_hal(hal_buffer, desc);
+            let buffer = device.create_buffer_from_hal(Box::new(hal_buffer), desc);
 
             let id = fid.assign(buffer);
             api_log!("Device::create_buffer -> {id:?}");
@@ -474,14 +468,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn texture_destroy<A: HalApi>(
-        &self,
-        texture_id: id::TextureId,
-    ) -> Result<(), resource::DestroyError> {
+    pub fn texture_destroy(&self, texture_id: id::TextureId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Texture::destroy");
         api_log!("Texture::destroy {texture_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let texture = hub
             .textures
@@ -496,11 +487,11 @@ impl Global {
         texture.destroy()
     }
 
-    pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId) {
+    pub fn texture_drop(&self, texture_id: id::TextureId) {
         profiling::scope!("Texture::drop");
         api_log!("Texture::drop {texture_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_texture) = hub.textures.unregister(texture_id) {
             #[cfg(feature = "trace")]
@@ -510,7 +501,7 @@ impl Global {
         }
     }
 
-    pub fn texture_create_view<A: HalApi>(
+    pub fn texture_create_view(
         &self,
         texture_id: id::TextureId,
         desc: &resource::TextureViewDescriptor,
@@ -518,9 +509,9 @@ impl Global {
     ) -> (id::TextureViewId, Option<resource::CreateTextureViewError>) {
         profiling::scope!("Texture::create_view");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.texture_views.prepare(id_in);
+        let fid = hub.texture_views.prepare(texture_id.backend(), id_in);
 
         let error = 'error: {
             let texture = match hub.textures.get(texture_id) {
@@ -557,14 +548,14 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn texture_view_drop<A: HalApi>(
+    pub fn texture_view_drop(
         &self,
         texture_view_id: id::TextureViewId,
     ) -> Result<(), resource::TextureViewDestroyError> {
         profiling::scope!("TextureView::drop");
         api_log!("TextureView::drop {texture_view_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_view) = hub.texture_views.unregister(texture_view_id) {
             #[cfg(feature = "trace")]
@@ -575,7 +566,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn device_create_sampler<A: HalApi>(
+    pub fn device_create_sampler(
         &self,
         device_id: DeviceId,
         desc: &resource::SamplerDescriptor,
@@ -583,8 +574,8 @@ impl Global {
     ) -> (id::SamplerId, Option<resource::CreateSamplerError>) {
         profiling::scope!("Device::create_sampler");
 
-        let hub = A::hub(self);
-        let fid = hub.samplers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.samplers.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -612,11 +603,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn sampler_drop<A: HalApi>(&self, sampler_id: id::SamplerId) {
+    pub fn sampler_drop(&self, sampler_id: id::SamplerId) {
         profiling::scope!("Sampler::drop");
         api_log!("Sampler::drop {sampler_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_sampler) = hub.samplers.unregister(sampler_id) {
             #[cfg(feature = "trace")]
@@ -626,7 +617,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_bind_group_layout<A: HalApi>(
+    pub fn device_create_bind_group_layout(
         &self,
         device_id: DeviceId,
         desc: &binding_model::BindGroupLayoutDescriptor,
@@ -637,8 +628,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_bind_group_layout");
 
-        let hub = A::hub(self);
-        let fid = hub.bind_group_layouts.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.bind_group_layouts.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -681,16 +672,16 @@ impl Global {
             return (id, None);
         };
 
-        let fid = hub.bind_group_layouts.prepare(id_in);
+        let fid = hub.bind_group_layouts.prepare(device_id.backend(), id_in);
         let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn bind_group_layout_drop<A: HalApi>(&self, bind_group_layout_id: id::BindGroupLayoutId) {
+    pub fn bind_group_layout_drop(&self, bind_group_layout_id: id::BindGroupLayoutId) {
         profiling::scope!("BindGroupLayout::drop");
         api_log!("BindGroupLayout::drop {bind_group_layout_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_layout) = hub.bind_group_layouts.unregister(bind_group_layout_id) {
             #[cfg(feature = "trace")]
@@ -700,7 +691,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_pipeline_layout<A: HalApi>(
+    pub fn device_create_pipeline_layout(
         &self,
         device_id: DeviceId,
         desc: &binding_model::PipelineLayoutDescriptor,
@@ -711,8 +702,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_pipeline_layout");
 
-        let hub = A::hub(self);
-        let fid = hub.pipeline_layouts.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.pipeline_layouts.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -764,11 +755,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn pipeline_layout_drop<A: HalApi>(&self, pipeline_layout_id: id::PipelineLayoutId) {
+    pub fn pipeline_layout_drop(&self, pipeline_layout_id: id::PipelineLayoutId) {
         profiling::scope!("PipelineLayout::drop");
         api_log!("PipelineLayout::drop {pipeline_layout_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(_layout) = hub.pipeline_layouts.unregister(pipeline_layout_id) {
             #[cfg(feature = "trace")]
             if let Some(t) = _layout.device.trace.lock().as_mut() {
@@ -777,7 +768,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_bind_group<A: HalApi>(
+    pub fn device_create_bind_group(
         &self,
         device_id: DeviceId,
         desc: &binding_model::BindGroupDescriptor,
@@ -785,8 +776,8 @@ impl Global {
     ) -> (id::BindGroupId, Option<binding_model::CreateBindGroupError>) {
         profiling::scope!("Device::create_bind_group");
 
-        let hub = A::hub(self);
-        let fid = hub.bind_groups.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.bind_groups.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -804,12 +795,12 @@ impl Global {
                 Err(..) => break 'error binding_model::CreateBindGroupError::InvalidLayout,
             };
 
-            fn map_entry<'a, A: HalApi>(
+            fn map_entry<'a>(
                 e: &BindGroupEntry<'a>,
-                buffer_storage: &Storage<resource::Buffer<A>>,
-                sampler_storage: &Storage<resource::Sampler<A>>,
-                texture_view_storage: &Storage<resource::TextureView<A>>,
-            ) -> Result<ResolvedBindGroupEntry<'a, A>, binding_model::CreateBindGroupError>
+                buffer_storage: &Storage<resource::Buffer>,
+                sampler_storage: &Storage<resource::Sampler>,
+                texture_view_storage: &Storage<resource::TextureView>,
+            ) -> Result<ResolvedBindGroupEntry<'a>, binding_model::CreateBindGroupError>
             {
                 let map_buffer = |bb: &BufferBinding| {
                     buffer_storage
@@ -904,11 +895,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn bind_group_drop<A: HalApi>(&self, bind_group_id: id::BindGroupId) {
+    pub fn bind_group_drop(&self, bind_group_id: id::BindGroupId) {
         profiling::scope!("BindGroup::drop");
         api_log!("BindGroup::drop {bind_group_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_bind_group) = hub.bind_groups.unregister(bind_group_id) {
             #[cfg(feature = "trace")]
@@ -932,7 +923,7 @@ impl Global {
     /// input.
     ///
     /// </div>
-    pub fn device_create_shader_module<A: HalApi>(
+    pub fn device_create_shader_module(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ShaderModuleDescriptor,
@@ -944,8 +935,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_shader_module");
 
-        let hub = A::hub(self);
-        let fid = hub.shader_modules.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.shader_modules.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1007,7 +998,7 @@ impl Global {
     ///
     /// This function passes SPIR-V binary to the backend as-is and can potentially result in a
     /// driver crash.
-    pub unsafe fn device_create_shader_module_spirv<A: HalApi>(
+    pub unsafe fn device_create_shader_module_spirv(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ShaderModuleDescriptor,
@@ -1019,8 +1010,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_shader_module");
 
-        let hub = A::hub(self);
-        let fid = hub.shader_modules.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.shader_modules.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1055,11 +1046,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn shader_module_drop<A: HalApi>(&self, shader_module_id: id::ShaderModuleId) {
+    pub fn shader_module_drop(&self, shader_module_id: id::ShaderModuleId) {
         profiling::scope!("ShaderModule::drop");
         api_log!("ShaderModule::drop {shader_module_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(shader_module) = hub.shader_modules.unregister(shader_module_id) {
             #[cfg(feature = "trace")]
@@ -1070,7 +1061,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_command_encoder<A: HalApi>(
+    pub fn device_create_command_encoder(
         &self,
         device_id: DeviceId,
         desc: &wgt::CommandEncoderDescriptor<Label>,
@@ -1078,10 +1069,11 @@ impl Global {
     ) -> (id::CommandEncoderId, Option<DeviceError>) {
         profiling::scope!("Device::create_command_encoder");
 
-        let hub = A::hub(self);
-        let fid = hub
-            .command_buffers
-            .prepare(id_in.map(|id| id.into_command_buffer_id()));
+        let hub = &self.hub;
+        let fid = hub.command_buffers.prepare(
+            device_id.backend(),
+            id_in.map(|id| id.into_command_buffer_id()),
+        );
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1103,11 +1095,11 @@ impl Global {
         (id.into_command_encoder_id(), Some(error))
     }
 
-    pub fn command_encoder_drop<A: HalApi>(&self, command_encoder_id: id::CommandEncoderId) {
+    pub fn command_encoder_drop(&self, command_encoder_id: id::CommandEncoderId) {
         profiling::scope!("CommandEncoder::drop");
         api_log!("CommandEncoder::drop {command_encoder_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(cmd_buf) = hub
             .command_buffers
@@ -1117,10 +1109,10 @@ impl Global {
         }
     }
 
-    pub fn command_buffer_drop<A: HalApi>(&self, command_buffer_id: id::CommandBufferId) {
+    pub fn command_buffer_drop(&self, command_buffer_id: id::CommandBufferId) {
         profiling::scope!("CommandBuffer::drop");
         api_log!("CommandBuffer::drop {command_buffer_id:?}");
-        self.command_encoder_drop::<A>(command_buffer_id.into_command_encoder_id())
+        self.command_encoder_drop(command_buffer_id.into_command_encoder_id())
     }
 
     pub fn device_create_render_bundle_encoder(
@@ -1140,7 +1132,7 @@ impl Global {
         (Box::into_raw(Box::new(encoder)), error)
     }
 
-    pub fn render_bundle_encoder_finish<A: HalApi>(
+    pub fn render_bundle_encoder_finish(
         &self,
         bundle_encoder: command::RenderBundleEncoder,
         desc: &command::RenderBundleDescriptor,
@@ -1148,9 +1140,11 @@ impl Global {
     ) -> (id::RenderBundleId, Option<command::RenderBundleError>) {
         profiling::scope!("RenderBundleEncoder::finish");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.render_bundles.prepare(id_in);
+        let fid = hub
+            .render_bundles
+            .prepare(bundle_encoder.parent().backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(bundle_encoder.parent()) {
@@ -1191,11 +1185,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn render_bundle_drop<A: HalApi>(&self, render_bundle_id: id::RenderBundleId) {
+    pub fn render_bundle_drop(&self, render_bundle_id: id::RenderBundleId) {
         profiling::scope!("RenderBundle::drop");
         api_log!("RenderBundle::drop {render_bundle_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_bundle) = hub.render_bundles.unregister(render_bundle_id) {
             #[cfg(feature = "trace")]
@@ -1205,7 +1199,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_query_set<A: HalApi>(
+    pub fn device_create_query_set(
         &self,
         device_id: DeviceId,
         desc: &resource::QuerySetDescriptor,
@@ -1213,8 +1207,8 @@ impl Global {
     ) -> (id::QuerySetId, Option<resource::CreateQuerySetError>) {
         profiling::scope!("Device::create_query_set");
 
-        let hub = A::hub(self);
-        let fid = hub.query_sets.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.query_sets.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1245,11 +1239,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn query_set_drop<A: HalApi>(&self, query_set_id: id::QuerySetId) {
+    pub fn query_set_drop(&self, query_set_id: id::QuerySetId) {
         profiling::scope!("QuerySet::drop");
         api_log!("QuerySet::drop {query_set_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_query_set) = hub.query_sets.unregister(query_set_id) {
             #[cfg(feature = "trace")]
@@ -1259,7 +1253,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_render_pipeline<A: HalApi>(
+    pub fn device_create_render_pipeline(
         &self,
         device_id: DeviceId,
         desc: &pipeline::RenderPipelineDescriptor,
@@ -1271,12 +1265,12 @@ impl Global {
     ) {
         profiling::scope!("Device::create_render_pipeline");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let missing_implicit_pipeline_ids =
             desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
 
-        let fid = hub.render_pipelines.prepare(id_in);
+        let fid = hub.render_pipelines.prepare(device_id.backend(), id_in);
         let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
 
         let error = 'error: {
@@ -1457,7 +1451,7 @@ impl Global {
 
     /// Get an ID of one of the bind group layouts. The ID adds a refcount,
     /// which needs to be released by calling `bind_group_layout_drop`.
-    pub fn render_pipeline_get_bind_group_layout<A: HalApi>(
+    pub fn render_pipeline_get_bind_group_layout(
         &self,
         pipeline_id: id::RenderPipelineId,
         index: u32,
@@ -1466,7 +1460,7 @@ impl Global {
         id::BindGroupLayoutId,
         Option<binding_model::GetBindGroupLayoutError>,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = 'error: {
             let pipeline = match hub.render_pipelines.get(pipeline_id) {
@@ -1474,7 +1468,10 @@ impl Global {
                 Err(_) => break 'error binding_model::GetBindGroupLayoutError::InvalidPipeline,
             };
             let id = match pipeline.layout.bind_group_layouts.get(index as usize) {
-                Some(bg) => hub.bind_group_layouts.prepare(id_in).assign(bg.clone()),
+                Some(bg) => hub
+                    .bind_group_layouts
+                    .prepare(pipeline_id.backend(), id_in)
+                    .assign(bg.clone()),
                 None => {
                     break 'error binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index)
                 }
@@ -1482,15 +1479,18 @@ impl Global {
             return (id, None);
         };
 
-        let id = hub.bind_group_layouts.prepare(id_in).assign_error();
+        let id = hub
+            .bind_group_layouts
+            .prepare(pipeline_id.backend(), id_in)
+            .assign_error();
         (id, Some(error))
     }
 
-    pub fn render_pipeline_drop<A: HalApi>(&self, render_pipeline_id: id::RenderPipelineId) {
+    pub fn render_pipeline_drop(&self, render_pipeline_id: id::RenderPipelineId) {
         profiling::scope!("RenderPipeline::drop");
         api_log!("RenderPipeline::drop {render_pipeline_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_pipeline) = hub.render_pipelines.unregister(render_pipeline_id) {
             #[cfg(feature = "trace")]
@@ -1500,7 +1500,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_compute_pipeline<A: HalApi>(
+    pub fn device_create_compute_pipeline(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ComputePipelineDescriptor,
@@ -1512,12 +1512,12 @@ impl Global {
     ) {
         profiling::scope!("Device::create_compute_pipeline");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let missing_implicit_pipeline_ids =
             desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
 
-        let fid = hub.compute_pipelines.prepare(id_in);
+        let fid = hub.compute_pipelines.prepare(device_id.backend(), id_in);
         let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
 
         let error = 'error: {
@@ -1651,7 +1651,7 @@ impl Global {
 
     /// Get an ID of one of the bind group layouts. The ID adds a refcount,
     /// which needs to be released by calling `bind_group_layout_drop`.
-    pub fn compute_pipeline_get_bind_group_layout<A: HalApi>(
+    pub fn compute_pipeline_get_bind_group_layout(
         &self,
         pipeline_id: id::ComputePipelineId,
         index: u32,
@@ -1660,7 +1660,7 @@ impl Global {
         id::BindGroupLayoutId,
         Option<binding_model::GetBindGroupLayoutError>,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = 'error: {
             let pipeline = match hub.compute_pipelines.get(pipeline_id) {
@@ -1669,7 +1669,10 @@ impl Global {
             };
 
             let id = match pipeline.layout.bind_group_layouts.get(index as usize) {
-                Some(bg) => hub.bind_group_layouts.prepare(id_in).assign(bg.clone()),
+                Some(bg) => hub
+                    .bind_group_layouts
+                    .prepare(pipeline_id.backend(), id_in)
+                    .assign(bg.clone()),
                 None => {
                     break 'error binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index)
                 }
@@ -1678,15 +1681,18 @@ impl Global {
             return (id, None);
         };
 
-        let id = hub.bind_group_layouts.prepare(id_in).assign_error();
+        let id = hub
+            .bind_group_layouts
+            .prepare(pipeline_id.backend(), id_in)
+            .assign_error();
         (id, Some(error))
     }
 
-    pub fn compute_pipeline_drop<A: HalApi>(&self, compute_pipeline_id: id::ComputePipelineId) {
+    pub fn compute_pipeline_drop(&self, compute_pipeline_id: id::ComputePipelineId) {
         profiling::scope!("ComputePipeline::drop");
         api_log!("ComputePipeline::drop {compute_pipeline_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_pipeline) = hub.compute_pipelines.unregister(compute_pipeline_id) {
             #[cfg(feature = "trace")]
@@ -1699,7 +1705,7 @@ impl Global {
     /// # Safety
     /// The `data` argument of `desc` must have been returned by
     /// [Self::pipeline_cache_get_data] for the same adapter
-    pub unsafe fn device_create_pipeline_cache<A: HalApi>(
+    pub unsafe fn device_create_pipeline_cache(
         &self,
         device_id: DeviceId,
         desc: &pipeline::PipelineCacheDescriptor<'_>,
@@ -1710,9 +1716,9 @@ impl Global {
     ) {
         profiling::scope!("Device::create_pipeline_cache");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.pipeline_caches.prepare(id_in);
+        let fid = hub.pipeline_caches.prepare(device_id.backend(), id_in);
         let error: pipeline::CreatePipelineCacheError = 'error: {
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
@@ -1744,11 +1750,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn pipeline_cache_drop<A: HalApi>(&self, pipeline_cache_id: id::PipelineCacheId) {
+    pub fn pipeline_cache_drop(&self, pipeline_cache_id: id::PipelineCacheId) {
         profiling::scope!("PipelineCache::drop");
         api_log!("PipelineCache::drop {pipeline_cache_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(cache) = hub.pipeline_caches.unregister(pipeline_cache_id) {
             #[cfg(feature = "trace")]
@@ -1759,7 +1765,7 @@ impl Global {
         }
     }
 
-    pub fn surface_configure<A: HalApi>(
+    pub fn surface_configure(
         &self,
         surface_id: SurfaceId,
         device_id: DeviceId,
@@ -1886,7 +1892,7 @@ impl Global {
             // User callbacks must not be called while we are holding locks.
             let user_callbacks;
             {
-                let hub = A::hub(self);
+                let hub = &self.hub;
                 let surface_guard = self.surfaces.read();
 
                 let device = match hub.devices.get(device_id) {
@@ -1989,7 +1995,7 @@ impl Global {
                 //
                 // https://github.com/gfx-rs/wgpu/issues/4105
 
-                let surface_raw = surface.raw(A::VARIANT).unwrap();
+                let surface_raw = surface.raw(device_id.backend()).unwrap();
                 match unsafe { surface_raw.configure(device.raw(), &hal_config) } {
                     Ok(()) => (),
                     Err(error) => {
@@ -2008,7 +2014,7 @@ impl Global {
 
                 let mut presentation = surface.presentation.lock();
                 *presentation = Some(present::Presentation {
-                    device: super::any_device::AnyDevice::new(device),
+                    device,
                     config: config.clone(),
                     acquired_texture: None,
                 });
@@ -2024,14 +2030,14 @@ impl Global {
     /// Check `device_id` for freeable resources and completed buffer mappings.
     ///
     /// Return `queue_empty` indicating whether there are more queue submissions still in flight.
-    pub fn device_poll<A: HalApi>(
+    pub fn device_poll(
         &self,
         device_id: DeviceId,
         maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<bool, WaitIdleError> {
         api_log!("Device::poll {maintain:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let device = hub
             .devices
             .get(device_id)
@@ -2047,8 +2053,8 @@ impl Global {
         Ok(queue_empty)
     }
 
-    fn poll_single_device<A: HalApi>(
-        device: &crate::device::Device<A>,
+    fn poll_single_device(
+        device: &crate::device::Device,
         maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<DevicePoll, WaitIdleError> {
         let snatch_guard = device.snatchable_lock.read();
@@ -2065,25 +2071,26 @@ impl Global {
         })
     }
 
-    /// Poll all devices belonging to the backend `A`.
+    /// Poll all devices belonging to the specified backend.
     ///
     /// If `force_wait` is true, block until all buffer mappings are done.
     ///
     /// Return `all_queue_empty` indicating whether there are more queue
     /// submissions still in flight.
-    fn poll_all_devices_of_api<A: HalApi>(
+    fn poll_all_devices_of_api(
         &self,
+        backend: wgt::Backend,
         force_wait: bool,
         closures: &mut UserClosures,
     ) -> Result<bool, WaitIdleError> {
         profiling::scope!("poll_device");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let mut all_queue_empty = true;
         {
             let device_guard = hub.devices.read();
 
-            for (_id, device) in device_guard.iter(A::VARIANT) {
+            for (_id, device) in device_guard.iter(backend) {
                 let maintain = if force_wait {
                     wgt::Maintain::Wait
                 } else {
@@ -2118,22 +2125,22 @@ impl Global {
         #[cfg(vulkan)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Vulkan>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Vulkan, force_wait, &mut closures)?;
         }
         #[cfg(metal)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Metal>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Metal, force_wait, &mut closures)?;
         }
         #[cfg(dx12)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Dx12>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Dx12, force_wait, &mut closures)?;
         }
         #[cfg(gles)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Gles>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Gl, force_wait, &mut closures)?;
         }
 
         closures.fire();
@@ -2141,10 +2148,10 @@ impl Global {
         Ok(all_queue_empty)
     }
 
-    pub fn device_start_capture<A: HalApi>(&self, id: DeviceId) {
+    pub fn device_start_capture(&self, id: DeviceId) {
         api_log!("Device::start_capture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             if !device.is_valid() {
@@ -2154,10 +2161,10 @@ impl Global {
         }
     }
 
-    pub fn device_stop_capture<A: HalApi>(&self, id: DeviceId) {
+    pub fn device_stop_capture(&self, id: DeviceId) {
         api_log!("Device::stop_capture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             if !device.is_valid() {
@@ -2170,15 +2177,15 @@ impl Global {
     // This is a test-only function to force the device into an
     // invalid state by inserting an error value in its place in
     // the registry.
-    pub fn device_make_invalid<A: HalApi>(&self, device_id: DeviceId) {
-        let hub = A::hub(self);
+    pub fn device_make_invalid(&self, device_id: DeviceId) {
+        let hub = &self.hub;
         hub.devices.force_replace_with_error(device_id);
     }
 
-    pub fn pipeline_cache_get_data<A: HalApi>(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
+    pub fn pipeline_cache_get_data(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
         use crate::pipeline_cache;
         api_log!("PipelineCache::get_data");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(cache) = hub.pipeline_caches.get(id) {
             // TODO: Is this check needed?
@@ -2204,11 +2211,11 @@ impl Global {
         None
     }
 
-    pub fn device_drop<A: HalApi>(&self, device_id: DeviceId) {
+    pub fn device_drop(&self, device_id: DeviceId) {
         profiling::scope!("Device::drop");
         api_log!("Device::drop {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(device) = hub.devices.unregister(device_id) {
             let device_lost_closure = device.lock_life().device_lost_closure.take();
             if let Some(closure) = device_lost_closure {
@@ -2228,12 +2235,12 @@ impl Global {
 
     // This closure will be called exactly once during "lose the device",
     // or when it is replaced.
-    pub fn device_set_device_lost_closure<A: HalApi>(
+    pub fn device_set_device_lost_closure(
         &self,
         device_id: DeviceId,
         device_lost_closure: DeviceLostClosure,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             let mut life_tracker = device.lock_life();
@@ -2253,10 +2260,10 @@ impl Global {
         }
     }
 
-    pub fn device_destroy<A: HalApi>(&self, device_id: DeviceId) {
+    pub fn device_destroy(&self, device_id: DeviceId) {
         api_log!("Device::destroy {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             // Follow the steps at
@@ -2279,21 +2286,18 @@ impl Global {
         }
     }
 
-    pub fn device_mark_lost<A: HalApi>(&self, device_id: DeviceId, message: &str) {
+    pub fn device_mark_lost(&self, device_id: DeviceId, message: &str) {
         api_log!("Device::mark_lost {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             device.lose(message);
         }
     }
 
-    pub fn device_get_internal_counters<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> wgt::InternalCounters {
-        let hub = A::hub(self);
+    pub fn device_get_internal_counters(&self, device_id: DeviceId) -> wgt::InternalCounters {
+        let hub = &self.hub;
         if let Ok(device) = hub.devices.get(device_id) {
             wgt::InternalCounters {
                 hal: device.get_hal_counters(),
@@ -2304,28 +2308,28 @@ impl Global {
         }
     }
 
-    pub fn device_generate_allocator_report<A: HalApi>(
+    pub fn device_generate_allocator_report(
         &self,
         device_id: DeviceId,
     ) -> Option<wgt::AllocatorReport> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
         hub.devices
             .get(device_id)
             .ok()
             .and_then(|device| device.generate_allocator_report())
     }
 
-    pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
+    pub fn queue_drop(&self, queue_id: QueueId) {
         profiling::scope!("Queue::drop");
         api_log!("Queue::drop {queue_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(queue) = hub.queues.unregister(queue_id) {
             drop(queue);
         }
     }
 
-    pub fn buffer_map_async<A: HalApi>(
+    pub fn buffer_map_async(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2335,7 +2339,7 @@ impl Global {
         profiling::scope!("Buffer::map_async");
         api_log!("Buffer::map_async {buffer_id:?} offset {offset:?} size {size:?} op: {op:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let op_and_err = 'error: {
             let buffer = match hub.buffers.get(buffer_id) {
@@ -2360,7 +2364,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn buffer_get_mapped_range<A: HalApi>(
+    pub fn buffer_get_mapped_range(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2369,7 +2373,7 @@ impl Global {
         profiling::scope!("Buffer::get_mapped_range");
         api_log!("Buffer::get_mapped_range {buffer_id:?} offset {offset:?} size {size:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -2441,11 +2445,11 @@ impl Global {
             }
         }
     }
-    pub fn buffer_unmap<A: HalApi>(&self, buffer_id: id::BufferId) -> BufferAccessResult {
+    pub fn buffer_unmap(&self, buffer_id: id::BufferId) -> BufferAccessResult {
         profiling::scope!("unmap", "Buffer");
         api_log!("Buffer::unmap {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 1ee84be933..e6aed78a08 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -3,7 +3,6 @@ use crate::{
         queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
         DeviceError, DeviceLostClosure,
     },
-    hal_api::HalApi,
     resource::{self, Buffer, Texture, Trackable},
     snatch::SnatchGuard,
     SubmissionIndex,
@@ -22,7 +21,7 @@ use thiserror::Error;
 ///
 /// [`wgpu_hal`]: hal
 /// [`ResourceInfo::submission_index`]: crate::resource::ResourceInfo
-struct ActiveSubmission<A: HalApi> {
+struct ActiveSubmission {
     /// The index of the submission we track.
     ///
     /// When `Device::fence`'s value is greater than or equal to this, our queue
@@ -30,10 +29,10 @@ struct ActiveSubmission<A: HalApi> {
     index: SubmissionIndex,
 
     /// Temporary resources to be freed once this queue submission has completed.
-    temp_resources: Vec<TempResource<A>>,
+    temp_resources: Vec<TempResource>,
 
     /// Buffers to be mapped once this submission has completed.
-    mapped: Vec<Arc<Buffer<A>>>,
+    mapped: Vec<Arc<Buffer>>,
 
     /// Command buffers used by this submission, and the encoder that owns them.
     ///
@@ -47,18 +46,18 @@ struct ActiveSubmission<A: HalApi> {
     /// the command encoder is recycled.
     ///
     /// [`wgpu_hal::Queue::submit`]: hal::Queue::submit
-    encoders: Vec<EncoderInFlight<A>>,
+    encoders: Vec<EncoderInFlight>,
 
     /// List of queue "on_submitted_work_done" closures to be called once this
     /// submission has completed.
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
 
-impl<A: HalApi> ActiveSubmission<A> {
+impl ActiveSubmission {
     /// Returns true if this submission contains the given buffer.
     ///
     /// This only uses constant-time operations.
-    pub fn contains_buffer(&self, buffer: &Buffer<A>) -> bool {
+    pub fn contains_buffer(&self, buffer: &Buffer) -> bool {
         for encoder in &self.encoders {
             // The ownership location of buffers depends on where the command encoder
             // came from. If it is the staging command encoder on the queue, it is
@@ -83,7 +82,7 @@ impl<A: HalApi> ActiveSubmission<A> {
     /// Returns true if this submission contains the given texture.
     ///
     /// This only uses constant-time operations.
-    pub fn contains_texture(&self, texture: &Texture<A>) -> bool {
+    pub fn contains_texture(&self, texture: &Texture) -> bool {
         for encoder in &self.encoders {
             // The ownership location of textures depends on where the command encoder
             // came from. If it is the staging command encoder on the queue, it is
@@ -150,11 +149,11 @@ pub enum WaitIdleError {
 ///
 /// Only calling `Global::buffer_map_async` clones a new `Arc` for the
 /// buffer. This new `Arc` is only dropped by `handle_mapping`.
-pub(crate) struct LifetimeTracker<A: HalApi> {
+pub(crate) struct LifetimeTracker {
     /// Buffers for which a call to [`Buffer::map_async`] has succeeded, but
     /// which haven't been examined by `triage_mapped` yet to decide when they
     /// can be mapped.
-    mapped: Vec<Arc<Buffer<A>>>,
+    mapped: Vec<Arc<Buffer>>,
 
     /// Resources used by queue submissions still in flight. One entry per
     /// submission, with older submissions appearing before younger.
@@ -162,11 +161,11 @@ pub(crate) struct LifetimeTracker<A: HalApi> {
     /// Entries are added by `track_submission` and drained by
     /// `LifetimeTracker::triage_submissions`. Lots of methods contribute data
     /// to particular entries.
-    active: Vec<ActiveSubmission<A>>,
+    active: Vec<ActiveSubmission>,
 
     /// Buffers the user has asked us to map, and which are not used by any
     /// queue submission still in flight.
-    ready_to_map: Vec<Arc<Buffer<A>>>,
+    ready_to_map: Vec<Arc<Buffer>>,
 
     /// Queue "on_submitted_work_done" closures that were initiated for while there is no
     /// currently pending submissions. These cannot be immediately invoked as they
@@ -180,7 +179,7 @@ pub(crate) struct LifetimeTracker<A: HalApi> {
     pub device_lost_closure: Option<DeviceLostClosure>,
 }
 
-impl<A: HalApi> LifetimeTracker<A> {
+impl LifetimeTracker {
     pub fn new() -> Self {
         Self {
             mapped: Vec::new(),
@@ -200,8 +199,8 @@ impl<A: HalApi> LifetimeTracker<A> {
     pub fn track_submission(
         &mut self,
         index: SubmissionIndex,
-        temp_resources: impl Iterator<Item = TempResource<A>>,
-        encoders: Vec<EncoderInFlight<A>>,
+        temp_resources: impl Iterator<Item = TempResource>,
+        encoders: Vec<EncoderInFlight>,
     ) {
         self.active.push(ActiveSubmission {
             index,
@@ -212,16 +211,13 @@ impl<A: HalApi> LifetimeTracker<A> {
         });
     }
 
-    pub(crate) fn map(&mut self, value: &Arc<Buffer<A>>) {
+    pub(crate) fn map(&mut self, value: &Arc<Buffer>) {
         self.mapped.push(value.clone());
     }
 
     /// Returns the submission index of the most recent submission that uses the
     /// given buffer.
-    pub fn get_buffer_latest_submission_index(
-        &self,
-        buffer: &Buffer<A>,
-    ) -> Option<SubmissionIndex> {
+    pub fn get_buffer_latest_submission_index(&self, buffer: &Buffer) -> Option<SubmissionIndex> {
         // We iterate in reverse order, so that we can bail out early as soon
         // as we find a hit.
         self.active.iter().rev().find_map(|submission| {
@@ -237,7 +233,7 @@ impl<A: HalApi> LifetimeTracker<A> {
     /// given texture.
     pub fn get_texture_latest_submission_index(
         &self,
-        texture: &Texture<A>,
+        texture: &Texture,
     ) -> Option<SubmissionIndex> {
         // We iterate in reverse order, so that we can bail out early as soon
         // as we find a hit.
@@ -295,7 +291,7 @@ impl<A: HalApi> LifetimeTracker<A> {
 
     pub fn schedule_resource_destruction(
         &mut self,
-        temp_resource: TempResource<A>,
+        temp_resource: TempResource,
         last_submit_index: SubmissionIndex,
     ) {
         let resources = self
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index c6f88b2634..ac35ec7530 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -1,6 +1,5 @@
 use crate::{
     binding_model,
-    hal_api::HalApi,
     hub::Hub,
     id::{BindGroupLayoutId, PipelineLayoutId},
     resource::{
@@ -19,7 +18,6 @@ use wgt::{BufferAddress, DeviceLostReason, TextureFormat};
 
 use std::num::NonZeroU32;
 
-pub mod any_device;
 pub(crate) mod bgl;
 pub mod global;
 mod life;
@@ -299,9 +297,9 @@ impl DeviceLostClosure {
     }
 }
 
-fn map_buffer<A: HalApi>(
+fn map_buffer(
     raw: &dyn hal::DynDevice,
-    buffer: &Buffer<A>,
+    buffer: &Buffer,
     offset: BufferAddress,
     size: BufferAddress,
     kind: HostMap,
@@ -434,13 +432,21 @@ pub struct ImplicitPipelineIds<'a> {
 }
 
 impl ImplicitPipelineIds<'_> {
-    fn prepare<A: HalApi>(self, hub: &Hub<A>) -> ImplicitPipelineContext {
+    fn prepare(self, hub: &Hub) -> ImplicitPipelineContext {
+        let backend = self.root_id.backend();
         ImplicitPipelineContext {
-            root_id: hub.pipeline_layouts.prepare(Some(self.root_id)).into_id(),
+            root_id: hub
+                .pipeline_layouts
+                .prepare(backend, Some(self.root_id))
+                .into_id(),
             group_ids: self
                 .group_ids
                 .iter()
-                .map(|id_in| hub.bind_group_layouts.prepare(Some(*id_in)).into_id())
+                .map(|id_in| {
+                    hub.bind_group_layouts
+                        .prepare(backend, Some(*id_in))
+                        .into_id()
+                })
                 .collect(),
         }
     }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index cca59e0b1a..deab6bff21 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -10,7 +10,6 @@ use crate::{
     device::{DeviceError, WaitIdleError},
     get_lowest_common_denom,
     global::Global,
-    hal_api::HalApi,
     hal_label,
     id::{self, QueueId},
     init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
@@ -37,13 +36,13 @@ use thiserror::Error;
 
 use super::Device;
 
-pub struct Queue<A: HalApi> {
+pub struct Queue {
     raw: ManuallyDrop<Box<dyn hal::DynQueue>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
 }
 
-impl<A: HalApi> Queue<A> {
-    pub(crate) fn new(device: Arc<Device<A>>, raw: Box<dyn hal::DynQueue>) -> Self {
+impl Queue {
+    pub(crate) fn new(device: Arc<Device>, raw: Box<dyn hal::DynQueue>) -> Self {
         Queue {
             raw: ManuallyDrop::new(raw),
             device,
@@ -55,17 +54,17 @@ impl<A: HalApi> Queue<A> {
     }
 }
 
-crate::impl_resource_type_generic!(Queue);
+crate::impl_resource_type!(Queue);
 // TODO: https://github.com/gfx-rs/wgpu/issues/4014
-impl<A: HalApi> Labeled for Queue<A> {
+impl Labeled for Queue {
     fn label(&self) -> &str {
         ""
     }
 }
 crate::impl_parent_device!(Queue);
-crate::impl_storage_item_generic!(Queue);
+crate::impl_storage_item!(Queue);
 
-impl<A: HalApi> Drop for Queue<A> {
+impl Drop for Queue {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
         // SAFETY: we never access `self.raw` beyond this point.
@@ -141,10 +140,10 @@ impl SubmittedWorkDoneClosure {
 /// - `ActiveSubmission::temp_resources`: temporary resources used by a queue
 ///   submission, to be freed when it completes
 #[derive(Debug)]
-pub enum TempResource<A: HalApi> {
-    StagingBuffer(FlushedStagingBuffer<A>),
-    DestroyedBuffer(DestroyedBuffer<A>),
-    DestroyedTexture(DestroyedTexture<A>),
+pub enum TempResource {
+    StagingBuffer(FlushedStagingBuffer),
+    DestroyedBuffer(DestroyedBuffer),
+    DestroyedTexture(DestroyedTexture),
 }
 
 /// A series of raw [`CommandBuffer`]s that have been submitted to a
@@ -152,18 +151,18 @@ pub enum TempResource<A: HalApi> {
 ///
 /// [`CommandBuffer`]: hal::Api::CommandBuffer
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
-pub(crate) struct EncoderInFlight<A: HalApi> {
+pub(crate) struct EncoderInFlight {
     raw: Box<dyn hal::DynCommandEncoder>,
     cmd_buffers: Vec<Box<dyn hal::DynCommandBuffer>>,
-    pub(crate) trackers: Tracker<A>,
+    pub(crate) trackers: Tracker,
 
     /// These are the buffers that have been tracked by `PendingWrites`.
-    pub(crate) pending_buffers: FastHashMap<TrackerIndex, Arc<Buffer<A>>>,
+    pub(crate) pending_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
     /// These are the textures that have been tracked by `PendingWrites`.
-    pub(crate) pending_textures: FastHashMap<TrackerIndex, Arc<Texture<A>>>,
+    pub(crate) pending_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
 }
 
-impl<A: HalApi> EncoderInFlight<A> {
+impl EncoderInFlight {
     /// Free all of our command buffers.
     ///
     /// Return the command encoder, fully reset and ready to be
@@ -203,7 +202,7 @@ impl<A: HalApi> EncoderInFlight<A> {
 ///
 /// All uses of [`StagingBuffer`]s end up here.
 #[derive(Debug)]
-pub(crate) struct PendingWrites<A: HalApi> {
+pub(crate) struct PendingWrites {
     pub command_encoder: Box<dyn hal::DynCommandEncoder>,
 
     /// True if `command_encoder` is in the "recording" state, as
@@ -213,12 +212,12 @@ pub(crate) struct PendingWrites<A: HalApi> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     pub is_recording: bool,
 
-    temp_resources: Vec<TempResource<A>>,
-    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer<A>>>,
-    dst_textures: FastHashMap<TrackerIndex, Arc<Texture<A>>>,
+    temp_resources: Vec<TempResource>,
+    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
+    dst_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
 }
 
-impl<A: HalApi> PendingWrites<A> {
+impl PendingWrites {
     pub fn new(command_encoder: Box<dyn hal::DynCommandEncoder>) -> Self {
         Self {
             command_encoder,
@@ -240,29 +239,29 @@ impl<A: HalApi> PendingWrites<A> {
         self.temp_resources.clear();
     }
 
-    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer<A>>) {
+    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer>) {
         self.dst_buffers
             .insert(buffer.tracker_index(), buffer.clone());
     }
 
-    pub fn insert_texture(&mut self, texture: &Arc<Texture<A>>) {
+    pub fn insert_texture(&mut self, texture: &Arc<Texture>) {
         self.dst_textures
             .insert(texture.tracker_index(), texture.clone());
     }
 
-    pub fn contains_buffer(&self, buffer: &Arc<Buffer<A>>) -> bool {
+    pub fn contains_buffer(&self, buffer: &Arc<Buffer>) -> bool {
         self.dst_buffers.contains_key(&buffer.tracker_index())
     }
 
-    pub fn contains_texture(&self, texture: &Arc<Texture<A>>) -> bool {
+    pub fn contains_texture(&self, texture: &Arc<Texture>) -> bool {
         self.dst_textures.contains_key(&texture.tracker_index())
     }
 
-    pub fn consume_temp(&mut self, resource: TempResource<A>) {
+    pub fn consume_temp(&mut self, resource: TempResource) {
         self.temp_resources.push(resource);
     }
 
-    pub fn consume(&mut self, buffer: FlushedStagingBuffer<A>) {
+    pub fn consume(&mut self, buffer: FlushedStagingBuffer) {
         self.temp_resources
             .push(TempResource::StagingBuffer(buffer));
     }
@@ -272,7 +271,7 @@ impl<A: HalApi> PendingWrites<A> {
         command_allocator: &CommandAllocator,
         device: &dyn hal::DynDevice,
         queue: &dyn hal::DynQueue,
-    ) -> Result<Option<EncoderInFlight<A>>, DeviceError> {
+    ) -> Result<Option<EncoderInFlight>, DeviceError> {
         if self.is_recording {
             let pending_buffers = mem::take(&mut self.dst_buffers);
             let pending_textures = mem::take(&mut self.dst_textures);
@@ -362,7 +361,7 @@ pub enum QueueSubmitError {
 //TODO: move out common parts of write_xxx.
 
 impl Global {
-    pub fn queue_write_buffer<A: HalApi>(
+    pub fn queue_write_buffer(
         &self,
         queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -372,7 +371,7 @@ impl Global {
         profiling::scope!("Queue::write_buffer");
         api_log!("Queue::write_buffer {buffer_id:?} {}bytes", data.len());
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -433,14 +432,14 @@ impl Global {
         result
     }
 
-    pub fn queue_create_staging_buffer<A: HalApi>(
+    pub fn queue_create_staging_buffer(
         &self,
         queue_id: QueueId,
         buffer_size: wgt::BufferSize,
         id_in: Option<id::StagingBufferId>,
     ) -> Result<(id::StagingBufferId, NonNull<u8>), QueueWriteError> {
         profiling::scope!("Queue::create_staging_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -452,14 +451,14 @@ impl Global {
         let staging_buffer = StagingBuffer::new(device, buffer_size)?;
         let ptr = unsafe { staging_buffer.ptr() };
 
-        let fid = hub.staging_buffers.prepare(id_in);
+        let fid = hub.staging_buffers.prepare(queue_id.backend(), id_in);
         let id = fid.assign(Arc::new(staging_buffer));
         resource_log!("Queue::create_staging_buffer {id:?}");
 
         Ok((id, ptr))
     }
 
-    pub fn queue_write_staging_buffer<A: HalApi>(
+    pub fn queue_write_staging_buffer(
         &self,
         queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -467,7 +466,7 @@ impl Global {
         staging_buffer_id: id::StagingBufferId,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::write_staging_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -503,7 +502,7 @@ impl Global {
         result
     }
 
-    pub fn queue_validate_write_buffer<A: HalApi>(
+    pub fn queue_validate_write_buffer(
         &self,
         _queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -511,7 +510,7 @@ impl Global {
         buffer_size: wgt::BufferSize,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::validate_write_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -523,9 +522,9 @@ impl Global {
         Ok(())
     }
 
-    fn queue_validate_write_buffer_impl<A: HalApi>(
+    fn queue_validate_write_buffer_impl(
         &self,
-        buffer: &Buffer<A>,
+        buffer: &Buffer,
         buffer_offset: u64,
         buffer_size: wgt::BufferSize,
     ) -> Result<(), TransferError> {
@@ -548,16 +547,16 @@ impl Global {
         Ok(())
     }
 
-    fn queue_write_staging_buffer_impl<A: HalApi>(
+    fn queue_write_staging_buffer_impl(
         &self,
-        queue: &Arc<Queue<A>>,
-        device: &Arc<Device<A>>,
-        pending_writes: &mut PendingWrites<A>,
-        staging_buffer: &FlushedStagingBuffer<A>,
+        queue: &Arc<Queue>,
+        device: &Arc<Device>,
+        pending_writes: &mut PendingWrites,
+        staging_buffer: &FlushedStagingBuffer,
         buffer_id: id::BufferId,
         buffer_offset: u64,
     ) -> Result<(), QueueWriteError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let dst = hub
             .buffers
@@ -606,7 +605,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn queue_write_texture<A: HalApi>(
+    pub fn queue_write_texture(
         &self,
         queue_id: QueueId,
         destination: &ImageCopyTexture,
@@ -617,7 +616,7 @@ impl Global {
         profiling::scope!("Queue::write_texture");
         api_log!("Queue::write_texture {:?} {size:?}", destination.texture);
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -849,7 +848,7 @@ impl Global {
     }
 
     #[cfg(webgl)]
-    pub fn queue_copy_external_image_to_texture<A: HalApi>(
+    pub fn queue_copy_external_image_to_texture(
         &self,
         queue_id: QueueId,
         source: &wgt::ImageCopyExternalImage,
@@ -858,7 +857,7 @@ impl Global {
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::copy_external_image_to_texture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -1039,7 +1038,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn queue_submit<A: HalApi>(
+    pub fn queue_submit(
         &self,
         queue_id: QueueId,
         command_buffer_ids: &[id::CommandBufferId],
@@ -1048,7 +1047,7 @@ impl Global {
         api_log!("Queue::submit {queue_id:?}");
 
         let (submit_index, callbacks) = {
-            let hub = A::hub(self);
+            let hub = &self.hub;
 
             let queue = hub
                 .queues
@@ -1186,13 +1185,13 @@ impl Global {
 
                         //Note: locking the trackers has to be done after the storages
                         let mut trackers = device.trackers.lock();
-                        baked.initialize_buffer_memory(&mut *trackers, &snatch_guard)?;
-                        baked.initialize_texture_memory(&mut *trackers, device, &snatch_guard)?;
+                        baked.initialize_buffer_memory(&mut trackers, &snatch_guard)?;
+                        baked.initialize_texture_memory(&mut trackers, device, &snatch_guard)?;
                         //Note: stateless trackers are not merged:
                         // device already knows these resources exist.
                         CommandBuffer::insert_barriers_from_device_tracker(
                             baked.encoder.as_mut(),
-                            &mut *trackers,
+                            &mut trackers,
                             &baked.trackers,
                             &snatch_guard,
                         );
@@ -1353,18 +1352,15 @@ impl Global {
         Ok(submit_index)
     }
 
-    pub fn queue_get_timestamp_period<A: HalApi>(
-        &self,
-        queue_id: QueueId,
-    ) -> Result<f32, InvalidQueue> {
-        let hub = A::hub(self);
+    pub fn queue_get_timestamp_period(&self, queue_id: QueueId) -> Result<f32, InvalidQueue> {
+        let hub = &self.hub;
         match hub.queues.get(queue_id) {
             Ok(queue) => Ok(unsafe { queue.raw().get_timestamp_period() }),
             Err(_) => Err(InvalidQueue),
         }
     }
 
-    pub fn queue_on_submitted_work_done<A: HalApi>(
+    pub fn queue_on_submitted_work_done(
         &self,
         queue_id: QueueId,
         closure: SubmittedWorkDoneClosure,
@@ -1372,7 +1368,7 @@ impl Global {
         api_log!("Queue::on_submitted_work_done {queue_id:?}");
 
         //TODO: flush pending writes
-        let hub = A::hub(self);
+        let hub = &self.hub;
         match hub.queues.get(queue_id) {
             Ok(queue) => queue.device.lock_life().add_work_done_closure(closure),
             Err(_) => return Err(InvalidQueue),
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index a7ce999407..5f50d38c8b 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -11,7 +11,6 @@ use crate::{
         AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures,
         RenderPassContext, CLEANUP_WAIT_MS,
     },
-    hal_api::HalApi,
     hal_label,
     init_tracker::{
         BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange,
@@ -77,10 +76,10 @@ use super::{
 /// Important:
 /// When locking pending_writes please check that trackers is not locked
 /// trackers should be locked only when needed for the shortest time possible
-pub struct Device<A: HalApi> {
+pub struct Device {
     raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
     pub(crate) adapter: Arc<Adapter>,
-    pub(crate) queue: OnceCell<Weak<Queue<A>>>,
+    pub(crate) queue: OnceCell<Weak<Queue>>,
     queue_to_drop: OnceCell<Box<dyn hal::DynQueue>>,
     pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     /// The `label` from the descriptor used to create the resource.
@@ -130,30 +129,30 @@ pub struct Device<A: HalApi> {
     ///
     /// Has to be locked temporarily only (locked last)
     /// and never before pending_writes
-    pub(crate) trackers: Mutex<DeviceTracker<A>>,
+    pub(crate) trackers: Mutex<DeviceTracker>,
     pub(crate) tracker_indices: TrackerIndexAllocators,
     // Life tracker should be locked right after the device and before anything else.
-    life_tracker: Mutex<LifetimeTracker<A>>,
+    life_tracker: Mutex<LifetimeTracker>,
     /// Pool of bind group layouts, allowing deduplication.
-    pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout<A>>,
+    pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout>,
     pub(crate) alignments: hal::Alignments,
     pub(crate) limits: wgt::Limits,
     pub(crate) features: wgt::Features,
     pub(crate) downlevel: wgt::DownlevelCapabilities,
     pub(crate) instance_flags: wgt::InstanceFlags,
-    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites<A>>>,
-    pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy<A>>>,
+    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
+    pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy>>,
     #[cfg(feature = "trace")]
     pub(crate) trace: Mutex<Option<trace::Trace>>,
-    pub(crate) usage_scopes: UsageScopePool<A>,
+    pub(crate) usage_scopes: UsageScopePool,
 }
 
-pub(crate) enum DeferredDestroy<A: HalApi> {
-    TextureView(Weak<TextureView<A>>),
-    BindGroup(Weak<BindGroup<A>>),
+pub(crate) enum DeferredDestroy {
+    TextureView(Weak<TextureView>),
+    BindGroup(Weak<BindGroup>),
 }
 
-impl<A: HalApi> std::fmt::Debug for Device<A> {
+impl std::fmt::Debug for Device {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("Device")
             .field("label", &self.label())
@@ -164,7 +163,7 @@ impl<A: HalApi> std::fmt::Debug for Device<A> {
     }
 }
 
-impl<A: HalApi> Drop for Device<A> {
+impl Drop for Device {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -194,7 +193,7 @@ pub enum CreateDeviceError {
     FailedToCreateZeroBuffer(#[from] DeviceError),
 }
 
-impl<A: HalApi> Device<A> {
+impl Device {
     pub(crate) fn raw(&self) -> &dyn hal::DynDevice {
         self.raw.as_ref()
     }
@@ -218,7 +217,7 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-impl<A: HalApi> Device<A> {
+impl Device {
     pub(crate) fn new(
         raw_device: Box<dyn hal::DynDevice>,
         raw_queue: &dyn hal::DynQueue,
@@ -238,7 +237,7 @@ impl<A: HalApi> Device<A> {
         let pending_encoder = command_allocator
             .acquire_encoder(raw_device.as_ref(), raw_queue)
             .map_err(|_| CreateDeviceError::OutOfMemory)?;
-        let mut pending_writes = PendingWrites::<A>::new(pending_encoder);
+        let mut pending_writes = PendingWrites::new(pending_encoder);
 
         // Create zeroed buffer used for texture clears.
         let zero_buffer = unsafe {
@@ -297,7 +296,7 @@ impl<A: HalApi> Device<A> {
                     Ok(mut trace) => {
                         trace.add(trace::Action::Init {
                             desc: desc.clone(),
-                            backend: A::VARIANT,
+                            backend: adapter.raw.backend(),
                         });
                         Some(trace)
                     }
@@ -321,6 +320,11 @@ impl<A: HalApi> Device<A> {
         })
     }
 
+    /// Returns the backend this device is using.
+    pub fn backend(&self) -> wgt::Backend {
+        self.adapter.raw.backend()
+    }
+
     pub fn is_valid(&self) -> bool {
         self.valid.load(Ordering::Acquire)
     }
@@ -337,7 +341,7 @@ impl<A: HalApi> Device<A> {
         assert!(self.queue_to_drop.set(queue).is_ok());
     }
 
-    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker<A>> {
+    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
         self.life_tracker.lock()
     }
 
@@ -384,11 +388,11 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub fn get_queue(&self) -> Option<Arc<Queue<A>>> {
+    pub fn get_queue(&self) -> Option<Arc<Queue>> {
         self.queue.get().as_ref()?.upgrade()
     }
 
-    pub fn set_queue(&self, queue: &Arc<Queue<A>>) {
+    pub fn set_queue(&self, queue: &Arc<Queue>) {
         assert!(self.queue.set(Arc::downgrade(queue)).is_ok());
     }
 
@@ -504,7 +508,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_buffer(
         self: &Arc<Self>,
         desc: &resource::BufferDescriptor,
-    ) -> Result<Arc<Buffer<A>>, resource::CreateBufferError> {
+    ) -> Result<Arc<Buffer>, resource::CreateBufferError> {
         self.check_is_valid()?;
 
         if desc.size > self.limits.max_buffer_size {
@@ -652,7 +656,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         hal_texture: Box<dyn hal::DynTexture>,
         desc: &resource::TextureDescriptor,
-    ) -> Result<Arc<Texture<A>>, resource::CreateTextureError> {
+    ) -> Result<Arc<Texture>, resource::CreateTextureError> {
         let format_features = self
             .describe_format_features(desc.format)
             .map_err(|error| resource::CreateTextureError::MissingFeatures(desc.format, error))?;
@@ -679,11 +683,11 @@ impl<A: HalApi> Device<A> {
 
     pub fn create_buffer_from_hal(
         self: &Arc<Self>,
-        hal_buffer: A::Buffer,
+        hal_buffer: Box<dyn hal::DynBuffer>,
         desc: &resource::BufferDescriptor,
-    ) -> Arc<Buffer<A>> {
+    ) -> Arc<Buffer> {
         let buffer = Buffer {
-            raw: Snatchable::new(Box::new(hal_buffer)),
+            raw: Snatchable::new(hal_buffer),
             device: self.clone(),
             usage: desc.usage,
             size: desc.size,
@@ -710,7 +714,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_texture(
         self: &Arc<Self>,
         desc: &resource::TextureDescriptor,
-    ) -> Result<Arc<Texture<A>>, resource::CreateTextureError> {
+    ) -> Result<Arc<Texture>, resource::CreateTextureError> {
         use resource::{CreateTextureError, TextureDimensionError};
 
         self.check_is_valid()?;
@@ -1017,9 +1021,9 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_texture_view(
         self: &Arc<Self>,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         desc: &resource::TextureViewDescriptor,
-    ) -> Result<Arc<TextureView<A>>, resource::CreateTextureViewError> {
+    ) -> Result<Arc<TextureView>, resource::CreateTextureViewError> {
         self.check_is_valid()?;
 
         let snatch_guard = texture.device.snatchable_lock.read();
@@ -1323,7 +1327,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_sampler(
         self: &Arc<Self>,
         desc: &resource::SamplerDescriptor,
-    ) -> Result<Arc<Sampler<A>>, resource::CreateSamplerError> {
+    ) -> Result<Arc<Sampler>, resource::CreateSamplerError> {
         self.check_is_valid()?;
 
         if desc
@@ -1438,7 +1442,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: pipeline::ShaderModuleSource<'a>,
-    ) -> Result<Arc<pipeline::ShaderModule<A>>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         let (module, source) = match source {
@@ -1567,7 +1571,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: &'a [u32],
-    ) -> Result<Arc<pipeline::ShaderModule<A>>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         self.require_features(wgt::Features::SPIRV_SHADER_PASSTHROUGH)?;
@@ -1606,7 +1610,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_command_encoder(
         self: &Arc<Self>,
         label: &crate::Label,
-    ) -> Result<Arc<command::CommandBuffer<A>>, DeviceError> {
+    ) -> Result<Arc<command::CommandBuffer>, DeviceError> {
         self.check_is_valid()?;
 
         let queue = self.get_queue().unwrap();
@@ -1626,7 +1630,7 @@ impl<A: HalApi> Device<A> {
     //TODO: should this be combined with `get_introspection_bind_group_layouts` in some way?
     pub(crate) fn make_late_sized_buffer_groups(
         shader_binding_sizes: &FastHashMap<naga::ResourceBinding, wgt::BufferSize>,
-        layout: &binding_model::PipelineLayout<A>,
+        layout: &binding_model::PipelineLayout,
     ) -> ArrayVec<pipeline::LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }> {
         // Given the shader-required binding sizes and the pipeline layout,
         // return the filtered list of them in the layout order,
@@ -1664,7 +1668,7 @@ impl<A: HalApi> Device<A> {
         label: &crate::Label,
         entry_map: bgl::EntryMap,
         origin: bgl::Origin,
-    ) -> Result<Arc<BindGroupLayout<A>>, binding_model::CreateBindGroupLayoutError> {
+    ) -> Result<Arc<BindGroupLayout>, binding_model::CreateBindGroupLayoutError> {
         #[derive(PartialEq)]
         enum WritableStorage {
             Yes,
@@ -1878,13 +1882,13 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_buffer_binding<'a>(
         self: &Arc<Self>,
-        bb: &'a binding_model::ResolvedBufferBinding<A>,
+        bb: &'a binding_model::ResolvedBufferBinding,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        used_buffer_ranges: &mut Vec<BufferInitTrackerAction<A>>,
+        used_buffer_ranges: &mut Vec<BufferInitTrackerAction>,
         dynamic_binding_info: &mut Vec<binding_model::BindGroupDynamicBindingData>,
         late_buffer_binding_sizes: &mut FastHashMap<u32, wgt::BufferSize>,
-        used: &mut BindGroupStates<A>,
+        used: &mut BindGroupStates,
         limits: &wgt::Limits,
         snatch_guard: &'a SnatchGuard<'a>,
     ) -> Result<hal::BufferBinding<'a, dyn hal::DynBuffer>, binding_model::CreateBindGroupError>
@@ -2016,10 +2020,10 @@ impl<A: HalApi> Device<A> {
 
     fn create_sampler_binding<'a>(
         self: &Arc<Self>,
-        used: &mut BindGroupStates<A>,
+        used: &mut BindGroupStates,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        sampler: &'a Arc<Sampler<A>>,
+        sampler: &'a Arc<Sampler>,
     ) -> Result<&'a dyn hal::DynSampler, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
@@ -2067,9 +2071,9 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        view: &'a Arc<TextureView<A>>,
-        used: &mut BindGroupStates<A>,
-        used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
+        view: &'a Arc<TextureView>,
+        used: &mut BindGroupStates,
+        used_texture_ranges: &mut Vec<TextureInitTrackerAction>,
         snatch_guard: &'a SnatchGuard<'a>,
     ) -> Result<hal::TextureBinding<'a, dyn hal::DynTextureView>, binding_model::CreateBindGroupError>
     {
@@ -2109,8 +2113,8 @@ impl<A: HalApi> Device<A> {
     // (not passing a duplicate) beforehand.
     pub(crate) fn create_bind_group(
         self: &Arc<Self>,
-        desc: binding_model::ResolvedBindGroupDescriptor<A>,
-    ) -> Result<Arc<BindGroup<A>>, binding_model::CreateBindGroupError> {
+        desc: binding_model::ResolvedBindGroupDescriptor,
+    ) -> Result<Arc<BindGroup>, binding_model::CreateBindGroupError> {
         use crate::binding_model::{CreateBindGroupError as Error, ResolvedBindingResource as Br};
 
         let layout = desc.layout;
@@ -2357,7 +2361,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        view: &TextureView<A>,
+        view: &TextureView,
         expected: &'static str,
     ) -> Result<(wgt::TextureUsages, hal::TextureUses), binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
@@ -2486,9 +2490,8 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_pipeline_layout(
         self: &Arc<Self>,
-        desc: &binding_model::ResolvedPipelineLayoutDescriptor<A>,
-    ) -> Result<Arc<binding_model::PipelineLayout<A>>, binding_model::CreatePipelineLayoutError>
-    {
+        desc: &binding_model::ResolvedPipelineLayoutDescriptor,
+    ) -> Result<Arc<binding_model::PipelineLayout>, binding_model::CreatePipelineLayoutError> {
         use crate::binding_model::CreatePipelineLayoutError as Error;
 
         self.check_is_valid()?;
@@ -2594,7 +2597,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn derive_pipeline_layout(
         self: &Arc<Self>,
         mut derived_group_layouts: ArrayVec<bgl::EntryMap, { hal::MAX_BIND_GROUPS }>,
-    ) -> Result<Arc<binding_model::PipelineLayout<A>>, pipeline::ImplicitLayoutError> {
+    ) -> Result<Arc<binding_model::PipelineLayout>, pipeline::ImplicitLayoutError> {
         while derived_group_layouts
             .last()
             .map_or(false, |map| map.is_empty())
@@ -2639,8 +2642,8 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_compute_pipeline(
         self: &Arc<Self>,
-        desc: pipeline::ResolvedComputePipelineDescriptor<A>,
-    ) -> Result<Arc<pipeline::ComputePipeline<A>>, pipeline::CreateComputePipelineError> {
+        desc: pipeline::ResolvedComputePipelineDescriptor,
+    ) -> Result<Arc<pipeline::ComputePipeline>, pipeline::CreateComputePipelineError> {
         self.check_is_valid()?;
 
         self.require_downlevel_flags(wgt::DownlevelFlags::COMPUTE_SHADERS)?;
@@ -2772,8 +2775,8 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_render_pipeline(
         self: &Arc<Self>,
-        desc: pipeline::ResolvedRenderPipelineDescriptor<A>,
-    ) -> Result<Arc<pipeline::RenderPipeline<A>>, pipeline::CreateRenderPipelineError> {
+        desc: pipeline::ResolvedRenderPipelineDescriptor,
+    ) -> Result<Arc<pipeline::RenderPipeline>, pipeline::CreateRenderPipelineError> {
         use wgt::TextureFormatFeatureFlags as Tfff;
 
         self.check_is_valid()?;
@@ -3400,7 +3403,7 @@ impl<A: HalApi> Device<A> {
     pub unsafe fn create_pipeline_cache(
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
-    ) -> Result<Arc<pipeline::PipelineCache<A>>, pipeline::CreatePipelineCacheError> {
+    ) -> Result<Arc<pipeline::PipelineCache>, pipeline::CreatePipelineCacheError> {
         use crate::pipeline_cache;
 
         self.check_is_valid()?;
@@ -3509,7 +3512,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_query_set(
         self: &Arc<Self>,
         desc: &resource::QuerySetDescriptor,
-    ) -> Result<Arc<QuerySet<A>>, resource::CreateQuerySetError> {
+    ) -> Result<Arc<QuerySet>, resource::CreateQuerySetError> {
         use resource::CreateQuerySetError as Error;
 
         self.check_is_valid()?;
@@ -3605,7 +3608,7 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub(crate) fn new_usage_scope(&self) -> UsageScope<'_, A> {
+    pub(crate) fn new_usage_scope(&self) -> UsageScope<'_> {
         UsageScope::new_pooled(&self.usage_scopes, &self.tracker_indices)
     }
 
@@ -3618,8 +3621,8 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-impl<A: HalApi> Device<A> {
-    pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer<A>) {
+impl Device {
+    pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer) {
         let mut baked = cmd_buf.extract_baked_commands();
         unsafe {
             baked.encoder.reset_all(baked.list);
@@ -3656,6 +3659,6 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-crate::impl_resource_type_generic!(Device);
+crate::impl_resource_type!(Device);
 crate::impl_labeled!(Device);
-crate::impl_storage_item_generic!(Device);
+crate::impl_storage_item!(Device);
diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index e4708fd4dc..4d79a81e3b 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -1,10 +1,6 @@
-use std::collections::HashMap;
-
-use wgt::Backend;
-
 use crate::{
     hal_api::HalApi,
-    hub::{HubReport, Hubs},
+    hub::{Hub, HubReport},
     instance::{Instance, Surface},
     registry::{Registry, RegistryReport},
     resource_log,
@@ -13,22 +9,22 @@ use crate::{
 #[derive(Debug, PartialEq, Eq)]
 pub struct GlobalReport {
     pub surfaces: RegistryReport,
-    pub report_per_backend: HashMap<Backend, HubReport>,
+    pub hub: HubReport,
 }
 
 impl GlobalReport {
     pub fn surfaces(&self) -> &RegistryReport {
         &self.surfaces
     }
-    pub fn hub_report(&self, backend: Backend) -> &HubReport {
-        self.report_per_backend.get(&backend).unwrap()
+    pub fn hub_report(&self) -> &HubReport {
+        &self.hub
     }
 }
 
 pub struct Global {
     pub instance: Instance,
     pub(crate) surfaces: Registry<Surface>,
-    pub(crate) hubs: Hubs,
+    pub(crate) hub: Hub,
 }
 
 impl Global {
@@ -36,8 +32,8 @@ impl Global {
         profiling::scope!("Global::new");
         Self {
             instance: Instance::new(name, instance_desc),
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
@@ -54,8 +50,8 @@ impl Global {
                 instance_per_backend: std::iter::once((A::VARIANT, dyn_instance)).collect(),
                 ..Default::default()
             },
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
@@ -79,47 +75,15 @@ impl Global {
         profiling::scope!("Global::new");
         Self {
             instance,
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
     pub fn generate_report(&self) -> GlobalReport {
-        let mut report_per_backend = HashMap::default();
-        let instance_per_backend = &self.instance.instance_per_backend;
-
-        #[cfg(vulkan)]
-        if instance_per_backend
-            .iter()
-            .any(|(backend, _)| backend == &Backend::Vulkan)
-        {
-            report_per_backend.insert(Backend::Vulkan, self.hubs.vulkan.generate_report());
-        };
-        #[cfg(metal)]
-        if instance_per_backend
-            .iter()
-            .any(|(backend, _)| backend == &Backend::Metal)
-        {
-            report_per_backend.insert(Backend::Metal, self.hubs.metal.generate_report());
-        };
-        #[cfg(dx12)]
-        if instance_per_backend
-            .iter()
-            .any(|(backend, _)| backend == &Backend::Dx12)
-        {
-            report_per_backend.insert(Backend::Dx12, self.hubs.dx12.generate_report());
-        };
-        #[cfg(gles)]
-        if instance_per_backend
-            .iter()
-            .any(|(backend, _)| backend == &Backend::Gl)
-        {
-            report_per_backend.insert(Backend::Gl, self.hubs.gl.generate_report());
-        };
-
         GlobalReport {
             surfaces: self.surfaces.generate_report(),
-            report_per_backend,
+            hub: self.hub.generate_report(),
         }
     }
 }
@@ -130,23 +94,8 @@ impl Drop for Global {
         resource_log!("Global::drop");
         let mut surfaces_locked = self.surfaces.write();
 
-        // destroy hubs before the instance gets dropped
-        #[cfg(vulkan)]
-        {
-            self.hubs.vulkan.clear(&surfaces_locked);
-        }
-        #[cfg(metal)]
-        {
-            self.hubs.metal.clear(&surfaces_locked);
-        }
-        #[cfg(dx12)]
-        {
-            self.hubs.dx12.clear(&surfaces_locked);
-        }
-        #[cfg(gles)]
-        {
-            self.hubs.gl.clear(&surfaces_locked);
-        }
+        // destroy hub before the instance gets dropped
+        self.hub.clear(&surfaces_locked);
 
         surfaces_locked.map.clear();
     }
diff --git a/wgpu-core/src/hal_api.rs b/wgpu-core/src/hal_api.rs
index ebd09ffc73..b41847b8d5 100644
--- a/wgpu-core/src/hal_api.rs
+++ b/wgpu-core/src/hal_api.rs
@@ -1,53 +1,29 @@
 use wgt::{Backend, WasmNotSendSync};
 
-use crate::{global::Global, hub::Hub};
-
 pub trait HalApi: hal::Api + 'static + WasmNotSendSync {
     const VARIANT: Backend;
-
-    fn hub(global: &Global) -> &Hub<Self>;
 }
 
 impl HalApi for hal::api::Empty {
     const VARIANT: Backend = Backend::Empty;
-
-    fn hub(_: &Global) -> &Hub<Self> {
-        unimplemented!("called empty api")
-    }
 }
 
 #[cfg(vulkan)]
 impl HalApi for hal::api::Vulkan {
     const VARIANT: Backend = Backend::Vulkan;
-
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.vulkan
-    }
 }
 
 #[cfg(metal)]
 impl HalApi for hal::api::Metal {
     const VARIANT: Backend = Backend::Metal;
-
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.metal
-    }
 }
 
 #[cfg(dx12)]
 impl HalApi for hal::api::Dx12 {
     const VARIANT: Backend = Backend::Dx12;
-
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.dx12
-    }
 }
 
 #[cfg(gles)]
 impl HalApi for hal::api::Gles {
     const VARIANT: Backend = Backend::Gl;
-
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.gl
-    }
 }
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index a4d04f3f7c..cfdca16832 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -108,7 +108,6 @@ use crate::{
     binding_model::{BindGroup, BindGroupLayout, PipelineLayout},
     command::{CommandBuffer, RenderBundle},
     device::{queue::Queue, Device},
-    hal_api::HalApi,
     instance::{Adapter, Surface},
     pipeline::{ComputePipeline, PipelineCache, RenderPipeline, ShaderModule},
     registry::{Registry, RegistryReport},
@@ -145,10 +144,7 @@ impl HubReport {
 }
 
 #[allow(rustdoc::private_intra_doc_links)]
-/// All the resources for a particular backend in a [`crate::global::Global`].
-///
-/// To obtain `global`'s `Hub` for some [`HalApi`] backend type `A`,
-/// call [`A::hub(global)`].
+/// All the resources tracked by a [`crate::global::Global`].
 ///
 /// ## Locking
 ///
@@ -169,48 +165,48 @@ impl HubReport {
 ///
 ///
 /// [`A::hub(global)`]: HalApi::hub
-pub struct Hub<A: HalApi> {
+pub struct Hub {
     pub(crate) adapters: Registry<Adapter>,
-    pub(crate) devices: Registry<Device<A>>,
-    pub(crate) queues: Registry<Queue<A>>,
-    pub(crate) pipeline_layouts: Registry<PipelineLayout<A>>,
-    pub(crate) shader_modules: Registry<ShaderModule<A>>,
-    pub(crate) bind_group_layouts: Registry<BindGroupLayout<A>>,
-    pub(crate) bind_groups: Registry<BindGroup<A>>,
-    pub(crate) command_buffers: Registry<CommandBuffer<A>>,
-    pub(crate) render_bundles: Registry<RenderBundle<A>>,
-    pub(crate) render_pipelines: Registry<RenderPipeline<A>>,
-    pub(crate) compute_pipelines: Registry<ComputePipeline<A>>,
-    pub(crate) pipeline_caches: Registry<PipelineCache<A>>,
-    pub(crate) query_sets: Registry<QuerySet<A>>,
-    pub(crate) buffers: Registry<Buffer<A>>,
-    pub(crate) staging_buffers: Registry<StagingBuffer<A>>,
-    pub(crate) textures: Registry<Texture<A>>,
-    pub(crate) texture_views: Registry<TextureView<A>>,
-    pub(crate) samplers: Registry<Sampler<A>>,
+    pub(crate) devices: Registry<Device>,
+    pub(crate) queues: Registry<Queue>,
+    pub(crate) pipeline_layouts: Registry<PipelineLayout>,
+    pub(crate) shader_modules: Registry<ShaderModule>,
+    pub(crate) bind_group_layouts: Registry<BindGroupLayout>,
+    pub(crate) bind_groups: Registry<BindGroup>,
+    pub(crate) command_buffers: Registry<CommandBuffer>,
+    pub(crate) render_bundles: Registry<RenderBundle>,
+    pub(crate) render_pipelines: Registry<RenderPipeline>,
+    pub(crate) compute_pipelines: Registry<ComputePipeline>,
+    pub(crate) pipeline_caches: Registry<PipelineCache>,
+    pub(crate) query_sets: Registry<QuerySet>,
+    pub(crate) buffers: Registry<Buffer>,
+    pub(crate) staging_buffers: Registry<StagingBuffer>,
+    pub(crate) textures: Registry<Texture>,
+    pub(crate) texture_views: Registry<TextureView>,
+    pub(crate) samplers: Registry<Sampler>,
 }
 
-impl<A: HalApi> Hub<A> {
-    fn new() -> Self {
+impl Hub {
+    pub(crate) fn new() -> Self {
         Self {
-            adapters: Registry::new(A::VARIANT),
-            devices: Registry::new(A::VARIANT),
-            queues: Registry::new(A::VARIANT),
-            pipeline_layouts: Registry::new(A::VARIANT),
-            shader_modules: Registry::new(A::VARIANT),
-            bind_group_layouts: Registry::new(A::VARIANT),
-            bind_groups: Registry::new(A::VARIANT),
-            command_buffers: Registry::new(A::VARIANT),
-            render_bundles: Registry::new(A::VARIANT),
-            render_pipelines: Registry::new(A::VARIANT),
-            compute_pipelines: Registry::new(A::VARIANT),
-            pipeline_caches: Registry::new(A::VARIANT),
-            query_sets: Registry::new(A::VARIANT),
-            buffers: Registry::new(A::VARIANT),
-            staging_buffers: Registry::new(A::VARIANT),
-            textures: Registry::new(A::VARIANT),
-            texture_views: Registry::new(A::VARIANT),
-            samplers: Registry::new(A::VARIANT),
+            adapters: Registry::new(),
+            devices: Registry::new(),
+            queues: Registry::new(),
+            pipeline_layouts: Registry::new(),
+            shader_modules: Registry::new(),
+            bind_group_layouts: Registry::new(),
+            bind_groups: Registry::new(),
+            command_buffers: Registry::new(),
+            render_bundles: Registry::new(),
+            render_pipelines: Registry::new(),
+            compute_pipelines: Registry::new(),
+            pipeline_caches: Registry::new(),
+            query_sets: Registry::new(),
+            buffers: Registry::new(),
+            staging_buffers: Registry::new(),
+            textures: Registry::new(),
+            texture_views: Registry::new(),
+            samplers: Registry::new(),
         }
     }
 
@@ -239,11 +235,9 @@ impl<A: HalApi> Hub<A> {
         for element in surface_guard.map.iter() {
             if let Element::Occupied(ref surface, _epoch) = *element {
                 if let Some(ref mut present) = surface.presentation.lock().take() {
-                    if let Some(device) = present.device.downcast_ref::<A>() {
-                        let suf = surface.raw(A::VARIANT);
-                        unsafe {
-                            suf.unwrap().unconfigure(device.raw());
-                        }
+                    let suf = surface.raw(present.device.backend());
+                    unsafe {
+                        suf.unwrap().unconfigure(present.device.raw());
                     }
                 }
             }
@@ -278,33 +272,3 @@ impl<A: HalApi> Hub<A> {
         }
     }
 }
-
-pub struct Hubs {
-    #[cfg(vulkan)]
-    pub(crate) vulkan: Hub<hal::api::Vulkan>,
-    #[cfg(metal)]
-    pub(crate) metal: Hub<hal::api::Metal>,
-    #[cfg(dx12)]
-    pub(crate) dx12: Hub<hal::api::Dx12>,
-    #[cfg(gles)]
-    pub(crate) gl: Hub<hal::api::Gles>,
-    #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))]
-    pub(crate) empty: Hub<hal::api::Empty>,
-}
-
-impl Hubs {
-    pub(crate) fn new() -> Self {
-        Self {
-            #[cfg(vulkan)]
-            vulkan: Hub::new(),
-            #[cfg(metal)]
-            metal: Hub::new(),
-            #[cfg(dx12)]
-            dx12: Hub::new(),
-            #[cfg(gles)]
-            gl: Hub::new(),
-            #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))]
-            empty: Hub::new(),
-        }
-    }
-}
diff --git a/wgpu-core/src/init_tracker/buffer.rs b/wgpu-core/src/init_tracker/buffer.rs
index 2c0fa8d372..ee8e99aa22 100644
--- a/wgpu-core/src/init_tracker/buffer.rs
+++ b/wgpu-core/src/init_tracker/buffer.rs
@@ -1,10 +1,10 @@
 use super::{InitTracker, MemoryInitKind};
-use crate::{hal_api::HalApi, resource::Buffer};
+use crate::resource::Buffer;
 use std::{ops::Range, sync::Arc};
 
 #[derive(Debug, Clone)]
-pub(crate) struct BufferInitTrackerAction<A: HalApi> {
-    pub buffer: Arc<Buffer<A>>,
+pub(crate) struct BufferInitTrackerAction {
+    pub buffer: Arc<Buffer>,
     pub range: Range<wgt::BufferAddress>,
     pub kind: MemoryInitKind,
 }
@@ -14,21 +14,21 @@ pub(crate) type BufferInitTracker = InitTracker<wgt::BufferAddress>;
 impl BufferInitTracker {
     /// Checks if an action has/requires any effect on the initialization status
     /// and shrinks its range if possible.
-    pub(crate) fn check_action<A: HalApi>(
+    pub(crate) fn check_action(
         &self,
-        action: &BufferInitTrackerAction<A>,
-    ) -> Option<BufferInitTrackerAction<A>> {
+        action: &BufferInitTrackerAction,
+    ) -> Option<BufferInitTrackerAction> {
         self.create_action(&action.buffer, action.range.clone(), action.kind)
     }
 
     /// Creates an action if it would have any effect on the initialization
     /// status and shrinks the range if possible.
-    pub(crate) fn create_action<A: HalApi>(
+    pub(crate) fn create_action(
         &self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         query_range: Range<wgt::BufferAddress>,
         kind: MemoryInitKind,
-    ) -> Option<BufferInitTrackerAction<A>> {
+    ) -> Option<BufferInitTrackerAction> {
         self.check(query_range)
             .map(|range| BufferInitTrackerAction {
                 buffer: buffer.clone(),
diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs
index 4785b52229..4bf7278f21 100644
--- a/wgpu-core/src/init_tracker/texture.rs
+++ b/wgpu-core/src/init_tracker/texture.rs
@@ -1,5 +1,5 @@
 use super::{InitTracker, MemoryInitKind};
-use crate::{hal_api::HalApi, resource::Texture, track::TextureSelector};
+use crate::{resource::Texture, track::TextureSelector};
 use arrayvec::ArrayVec;
 use std::{ops::Range, sync::Arc};
 
@@ -35,8 +35,8 @@ impl From<TextureSelector> for TextureInitRange {
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct TextureInitTrackerAction<A: HalApi> {
-    pub(crate) texture: Arc<Texture<A>>,
+pub(crate) struct TextureInitTrackerAction {
+    pub(crate) texture: Arc<Texture>,
     pub(crate) range: TextureInitRange,
     pub(crate) kind: MemoryInitKind,
 }
@@ -57,10 +57,10 @@ impl TextureInitTracker {
         }
     }
 
-    pub(crate) fn check_action<A: HalApi>(
+    pub(crate) fn check_action(
         &self,
-        action: &TextureInitTrackerAction<A>,
-    ) -> Option<TextureInitTrackerAction<A>> {
+        action: &TextureInitTrackerAction,
+    ) -> Option<TextureInitTrackerAction> {
         let mut mip_range_start = usize::MAX;
         let mut mip_range_end = usize::MIN;
         let mut layer_range_start = u32::MAX;
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 9c0a5fd3bb..8c7585be99 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -1,6 +1,7 @@
 use std::sync::Arc;
 use std::{borrow::Cow, collections::HashMap};
 
+use crate::hub::Hub;
 use crate::{
     api_log,
     device::{queue::Queue, resource::Device, DeviceDescriptor},
@@ -262,13 +263,13 @@ impl Adapter {
     }
 
     #[allow(clippy::type_complexity)]
-    fn create_device_and_queue_from_hal<A: HalApi>(
+    fn create_device_and_queue_from_hal(
         self: &Arc<Self>,
         hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
-    ) -> Result<(Arc<Device<A>>, Arc<Queue<A>>), RequestDeviceError> {
+    ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         api_log!("Adapter::create_device");
 
         if let Ok(device) = Device::new(
@@ -288,12 +289,12 @@ impl Adapter {
     }
 
     #[allow(clippy::type_complexity)]
-    fn create_device_and_queue<A: HalApi>(
+    fn create_device_and_queue(
         self: &Arc<Self>,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
-    ) -> Result<(Arc<Device<A>>, Arc<Queue<A>>), RequestDeviceError> {
+    ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         // Verify all features were exposed by the adapter
         if !self.raw.features.contains(desc.required_features) {
             return Err(RequestDeviceError::UnsupportedFeature(
@@ -302,7 +303,7 @@ impl Adapter {
         }
 
         let caps = &self.raw.capabilities;
-        if Backends::PRIMARY.contains(Backends::from(A::VARIANT))
+        if Backends::PRIMARY.contains(Backends::from(self.raw.backend()))
             && !caps.downlevel.is_webgpu_compliant()
         {
             let missing_flags = wgt::DownlevelFlags::compliant() - caps.downlevel.flags;
@@ -495,7 +496,10 @@ impl Global {
             };
 
             #[allow(clippy::arc_with_non_send_sync)]
-            let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+            let id = self
+                .surfaces
+                .prepare(wgt::Backend::Empty, id_in) // No specific backend for Surface, since it's not specific.
+                .assign(Arc::new(surface));
             Ok(id)
         }
     }
@@ -538,7 +542,10 @@ impl Global {
             surface_per_backend: std::iter::once((Backend::Metal, raw_surface)).collect(),
         };
 
-        let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+        let id = self
+            .surfaces
+            .prepare(Backend::Metal, id_in)
+            .assign(Arc::new(surface));
         Ok(id)
     }
 
@@ -560,7 +567,10 @@ impl Global {
             surface_per_backend: std::iter::once((Backend::Dx12, surface)).collect(),
         };
 
-        let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+        let id = self
+            .surfaces
+            .prepare(Backend::Dx12, id_in)
+            .assign(Arc::new(surface));
         Ok(id)
     }
 
@@ -614,83 +624,65 @@ impl Global {
 
         api_log!("Surface::drop {id:?}");
 
-        fn unconfigure<A: HalApi>(surface: &Surface, present: &Presentation) {
-            if let Some(surface) = surface.raw(A::VARIANT) {
-                if let Some(device) = present.device.downcast_ref::<A>() {
-                    unsafe { surface.unconfigure(device.raw()) };
-                }
-            }
-        }
-
         let surface = self.surfaces.unregister(id);
         let surface = Arc::into_inner(surface.unwrap())
             .expect("Surface cannot be destroyed because is still in use");
 
         if let Some(present) = surface.presentation.lock().take() {
-            // TODO(#5124): Becomes a loop once we use Arc<Device>
-            #[cfg(vulkan)]
-            unconfigure::<hal::api::Vulkan>(&surface, &present);
-            #[cfg(metal)]
-            unconfigure::<hal::api::Metal>(&surface, &present);
-            #[cfg(dx12)]
-            unconfigure::<hal::api::Dx12>(&surface, &present);
-            #[cfg(gles)]
-            unconfigure::<hal::api::Gles>(&surface, &present);
+            for (&backend, surface) in &surface.surface_per_backend {
+                if backend == present.device.backend() {
+                    unsafe { surface.unconfigure(present.device.raw()) };
+                }
+            }
         }
         drop(surface)
     }
 
-    fn enumerate<A: HalApi>(
-        &self,
-        inputs: &AdapterInputs<markers::Adapter>,
-        list: &mut Vec<AdapterId>,
-    ) {
-        let inst = match self
-            .instance
-            .instance_per_backend
-            .iter()
-            .find(|(backend, _)| backend == &A::VARIANT)
-        {
-            Some((_, inst)) => inst.as_ref(),
-            None => return,
-        };
-        let id_backend = match inputs.find(A::VARIANT) {
-            Some(id) => id,
-            None => return,
-        };
-
-        profiling::scope!("enumerating", &*format!("{:?}", backend));
-        let hub: &crate::hub::Hub<A> = HalApi::hub(self);
-
-        let hal_adapters = unsafe { inst.enumerate_adapters(None) };
-        for raw in hal_adapters {
-            let adapter = Adapter::new(raw);
-            log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
-            let id = hub.adapters.prepare(id_backend).assign(Arc::new(adapter));
-            list.push(id);
-        }
-    }
-
     pub fn enumerate_adapters(&self, inputs: AdapterInputs<markers::Adapter>) -> Vec<AdapterId> {
         profiling::scope!("Instance::enumerate_adapters");
         api_log!("Instance::enumerate_adapters");
 
-        let mut adapters = Vec::new();
+        fn enumerate(
+            hub: &Hub,
+            backend: Backend,
+            instance: &dyn hal::DynInstance,
+            inputs: &AdapterInputs<markers::Adapter>,
+            list: &mut Vec<AdapterId>,
+        ) {
+            let Some(id_backend) = inputs.find(backend) else {
+                return;
+            };
 
-        #[cfg(vulkan)]
-        self.enumerate::<hal::vulkan::Api>(&inputs, &mut adapters);
-        #[cfg(metal)]
-        self.enumerate::<hal::metal::Api>(&inputs, &mut adapters);
-        #[cfg(dx12)]
-        self.enumerate::<hal::dx12::Api>(&inputs, &mut adapters);
-        #[cfg(gles)]
-        self.enumerate::<hal::gles::Api>(&inputs, &mut adapters);
+            profiling::scope!("enumerating", &*format!("{:?}", backend));
+
+            let hal_adapters = unsafe { instance.enumerate_adapters(None) };
+            for raw in hal_adapters {
+                let adapter = Adapter::new(raw);
+                log::info!("Adapter {:?}", adapter.raw.info);
+                let id = hub
+                    .adapters
+                    .prepare(backend, id_backend)
+                    .assign(Arc::new(adapter));
+                list.push(id);
+            }
+        }
 
+        let mut adapters = Vec::new();
+        for (backend, instance) in &self.instance.instance_per_backend {
+            enumerate(
+                &self.hub,
+                *backend,
+                instance.as_ref(),
+                &inputs,
+                &mut adapters,
+            );
+        }
         adapters
     }
 
-    fn select<A: HalApi>(
+    fn select(
         &self,
+        backend: Backend,
         selected: &mut usize,
         new_id: Option<AdapterId>,
         mut list: Vec<hal::DynExposedAdapter>,
@@ -703,9 +695,10 @@ impl Global {
             None => {
                 let adapter = Adapter::new(list.swap_remove(*selected));
                 log::info!("Adapter {:?}", adapter.raw.info);
-                let id = A::hub(self)
+                let id = self
+                    .hub
                     .adapters
-                    .prepare(new_id)
+                    .prepare(backend, new_id)
                     .assign(Arc::new(adapter));
                 Some(id)
             }
@@ -848,19 +841,19 @@ impl Global {
 
         let mut selected = preferred_gpu.unwrap_or(0);
         #[cfg(vulkan)]
-        if let Some(id) = self.select::<hal::api::Vulkan>(&mut selected, id_vulkan, adapters_vk) {
+        if let Some(id) = self.select(Backend::Vulkan, &mut selected, id_vulkan, adapters_vk) {
             return Ok(id);
         }
         #[cfg(metal)]
-        if let Some(id) = self.select::<hal::api::Metal>(&mut selected, id_metal, adapters_metal) {
+        if let Some(id) = self.select(Backend::Metal, &mut selected, id_metal, adapters_metal) {
             return Ok(id);
         }
         #[cfg(dx12)]
-        if let Some(id) = self.select::<hal::api::Dx12>(&mut selected, id_dx12, adapters_dx12) {
+        if let Some(id) = self.select(Backend::Dx12, &mut selected, id_dx12, adapters_dx12) {
             return Ok(id);
         }
         #[cfg(gles)]
-        if let Some(id) = self.select::<hal::api::Gles>(&mut selected, id_gl, adapters_gl) {
+        if let Some(id) = self.select(Backend::Gl, &mut selected, id_gl, adapters_gl) {
             return Ok(id);
         }
         let _ = selected;
@@ -872,113 +865,92 @@ impl Global {
     /// # Safety
     ///
     /// `hal_adapter` must be created from this global internal instance handle.
-    pub unsafe fn create_adapter_from_hal<A: HalApi>(
+    pub unsafe fn create_adapter_from_hal(
         &self,
         hal_adapter: hal::DynExposedAdapter,
         input: Option<AdapterId>,
     ) -> AdapterId {
         profiling::scope!("Instance::create_adapter_from_hal");
 
-        let fid = A::hub(self).adapters.prepare(input);
-
-        let id = match A::VARIANT {
-            #[cfg(vulkan)]
-            Backend::Vulkan => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(metal)]
-            Backend::Metal => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(dx12)]
-            Backend::Dx12 => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(gles)]
-            Backend::Gl => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            _ => unreachable!(),
-        };
+        let fid = self.hub.adapters.prepare(hal_adapter.backend(), input);
+        let id = fid.assign(Arc::new(Adapter::new(hal_adapter)));
+
         resource_log!("Created Adapter {:?}", id);
         id
     }
 
-    pub fn adapter_get_info<A: HalApi>(
+    pub fn adapter_get_info(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::AdapterInfo, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.info.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_get_texture_format_features<A: HalApi>(
+    pub fn adapter_get_texture_format_features(
         &self,
         adapter_id: AdapterId,
         format: wgt::TextureFormat,
     ) -> Result<wgt::TextureFormatFeatures, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.get_texture_format_features(format))
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_features<A: HalApi>(
-        &self,
-        adapter_id: AdapterId,
-    ) -> Result<wgt::Features, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+    pub fn adapter_features(&self, adapter_id: AdapterId) -> Result<wgt::Features, InvalidAdapter> {
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.features)
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_limits<A: HalApi>(
-        &self,
-        adapter_id: AdapterId,
-    ) -> Result<wgt::Limits, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+    pub fn adapter_limits(&self, adapter_id: AdapterId) -> Result<wgt::Limits, InvalidAdapter> {
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.capabilities.limits.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_downlevel_capabilities<A: HalApi>(
+    pub fn adapter_downlevel_capabilities(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::DownlevelCapabilities, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.capabilities.downlevel.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_get_presentation_timestamp<A: HalApi>(
+    pub fn adapter_get_presentation_timestamp(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::PresentationTimestamp, InvalidAdapter> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let adapter = hub.adapters.get(adapter_id).map_err(|_| InvalidAdapter)?;
 
         Ok(unsafe { adapter.raw.adapter.get_presentation_timestamp() })
     }
 
-    pub fn adapter_drop<A: HalApi>(&self, adapter_id: AdapterId) {
+    pub fn adapter_drop(&self, adapter_id: AdapterId) {
         profiling::scope!("Adapter::drop");
         api_log!("Adapter::drop {adapter_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         hub.adapters.unregister(adapter_id);
     }
 }
 
 impl Global {
-    pub fn adapter_request_device<A: HalApi>(
+    pub fn adapter_request_device(
         &self,
         adapter_id: AdapterId,
         desc: &DeviceDescriptor,
@@ -989,12 +961,12 @@ impl Global {
         profiling::scope!("Adapter::request_device");
         api_log!("Adapter::request_device");
 
-        let hub = A::hub(self);
-        let device_fid = hub.devices.prepare(device_id_in);
-        let queue_fid = hub.queues.prepare(queue_id_in);
+        let backend = adapter_id.backend();
+        let device_fid = self.hub.devices.prepare(backend, device_id_in);
+        let queue_fid = self.hub.queues.prepare(backend, queue_id_in);
 
         let error = 'error: {
-            let adapter = match hub.adapters.get(adapter_id) {
+            let adapter = match self.hub.adapters.get(adapter_id) {
                 Ok(adapter) => adapter,
                 Err(_) => break 'error RequestDeviceError::InvalidAdapter,
             };
@@ -1022,7 +994,7 @@ impl Global {
     ///
     /// - `hal_device` must be created from `adapter_id` or its internal handle.
     /// - `desc` must be a subset of `hal_device` features and limits.
-    pub unsafe fn create_device_from_hal<A: HalApi>(
+    pub unsafe fn create_device_from_hal(
         &self,
         adapter_id: AdapterId,
         hal_device: hal::DynOpenDevice,
@@ -1033,12 +1005,12 @@ impl Global {
     ) -> (DeviceId, QueueId, Option<RequestDeviceError>) {
         profiling::scope!("Global::create_device_from_hal");
 
-        let hub = A::hub(self);
-        let devices_fid = hub.devices.prepare(device_id_in);
-        let queues_fid = hub.queues.prepare(queue_id_in);
+        let backend = adapter_id.backend();
+        let devices_fid = self.hub.devices.prepare(backend, device_id_in);
+        let queues_fid = self.hub.queues.prepare(backend, queue_id_in);
 
         let error = 'error: {
-            let adapter = match hub.adapters.get(adapter_id) {
+            let adapter = match self.hub.adapters.get(adapter_id) {
                 Ok(adapter) => adapter,
                 Err(_) => break 'error RequestDeviceError::InvalidAdapter,
             };
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 351916002f..179664490c 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -290,6 +290,8 @@ define_backend_caller! { gfx_if_empty, gfx_if_empty_hidden, "empty" if all(
 /// [`wgpu_types::Backend`]: wgt::Backend
 /// [`wgpu_core::global::Global`]: crate::global::Global
 /// [`Id`]: id::Id
+//
+// TODO(#5124): Remove this altogether.
 #[macro_export]
 macro_rules! gfx_select {
     // Simple two-component expression, like `self.0.method(..)`.
@@ -303,14 +305,7 @@ macro_rules! gfx_select {
     };
 
     ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-        match $id.backend() {
-            wgt::Backend::Vulkan => $crate::gfx_if_vulkan!($($c)*.$method::<$crate::api::Vulkan> $params),
-            wgt::Backend::Metal => $crate::gfx_if_metal!($($c)*.$method::<$crate::api::Metal> $params),
-            wgt::Backend::Dx12 => $crate::gfx_if_dx12!($($c)*.$method::<$crate::api::Dx12> $params),
-            wgt::Backend::Gl => $crate::gfx_if_gles!($($c)*.$method::<$crate::api::Gles> $params),
-            wgt::Backend::Empty => $crate::gfx_if_empty!($($c)*.$method::<$crate::api::Empty> $params),
-            other => panic!("Unexpected backend {:?}", other),
-        }
+        $($c)*.$method $params
     };
 }
 
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 7e58962dbc..db1c1ba76a 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -3,7 +3,6 @@ use crate::{
     binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError, PipelineLayout},
     command::ColorAttachmentError,
     device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext},
-    hal_api::HalApi,
     id::{PipelineCacheId, PipelineLayoutId, ShaderModuleId},
     resource::{Labeled, TrackingData},
     resource_log, validation, Label,
@@ -46,15 +45,15 @@ pub struct ShaderModuleDescriptor<'a> {
 }
 
 #[derive(Debug)]
-pub struct ShaderModule<A: HalApi> {
+pub struct ShaderModule {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynShaderModule>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) interface: Option<validation::Interface>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for ShaderModule<A> {
+impl Drop for ShaderModule {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -65,12 +64,12 @@ impl<A: HalApi> Drop for ShaderModule<A> {
     }
 }
 
-crate::impl_resource_type_generic!(ShaderModule);
+crate::impl_resource_type!(ShaderModule);
 crate::impl_labeled!(ShaderModule);
 crate::impl_parent_device!(ShaderModule);
-crate::impl_storage_item_generic!(ShaderModule);
+crate::impl_storage_item!(ShaderModule);
 
-impl<A: HalApi> ShaderModule<A> {
+impl ShaderModule {
     pub(crate) fn raw(&self) -> &dyn hal::DynShaderModule {
         self.raw.as_ref()
     }
@@ -150,9 +149,9 @@ pub struct ProgrammableStageDescriptor<'a> {
 
 /// Describes a programmable pipeline stage.
 #[derive(Clone, Debug)]
-pub struct ResolvedProgrammableStageDescriptor<'a, A: HalApi> {
+pub struct ResolvedProgrammableStageDescriptor<'a> {
     /// The compiled shader module for this stage.
-    pub module: Arc<ShaderModule<A>>,
+    pub module: Arc<ShaderModule>,
     /// The name of the entry point in the compiled shader. The name is selected using the
     /// following logic:
     ///
@@ -208,14 +207,14 @@ pub struct ComputePipelineDescriptor<'a> {
 
 /// Describes a compute pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedComputePipelineDescriptor<'a, A: HalApi> {
+pub struct ResolvedComputePipelineDescriptor<'a> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: Option<Arc<PipelineLayout<A>>>,
+    pub layout: Option<Arc<PipelineLayout>>,
     /// The compiled compute stage and its entry point.
-    pub stage: ResolvedProgrammableStageDescriptor<'a, A>,
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
     /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<Arc<PipelineCache<A>>>,
+    pub cache: Option<Arc<PipelineCache>>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -240,18 +239,18 @@ pub enum CreateComputePipelineError {
 }
 
 #[derive(Debug)]
-pub struct ComputePipeline<A: HalApi> {
+pub struct ComputePipeline {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynComputePipeline>>,
-    pub(crate) layout: Arc<PipelineLayout<A>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) _shader_module: Arc<ShaderModule<A>>,
+    pub(crate) layout: Arc<PipelineLayout>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) _shader_module: Arc<ShaderModule>,
     pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for ComputePipeline<A> {
+impl Drop for ComputePipeline {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -262,13 +261,13 @@ impl<A: HalApi> Drop for ComputePipeline<A> {
     }
 }
 
-crate::impl_resource_type_generic!(ComputePipeline);
+crate::impl_resource_type!(ComputePipeline);
 crate::impl_labeled!(ComputePipeline);
 crate::impl_parent_device!(ComputePipeline);
-crate::impl_storage_item_generic!(ComputePipeline);
+crate::impl_storage_item!(ComputePipeline);
 crate::impl_trackable!(ComputePipeline);
 
-impl<A: HalApi> ComputePipeline<A> {
+impl ComputePipeline {
     pub(crate) fn raw(&self) -> &dyn hal::DynComputePipeline {
         self.raw.as_ref()
     }
@@ -298,14 +297,14 @@ impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
 }
 
 #[derive(Debug)]
-pub struct PipelineCache<A: HalApi> {
+pub struct PipelineCache {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineCache>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for PipelineCache<A> {
+impl Drop for PipelineCache {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -316,12 +315,12 @@ impl<A: HalApi> Drop for PipelineCache<A> {
     }
 }
 
-crate::impl_resource_type_generic!(PipelineCache);
+crate::impl_resource_type!(PipelineCache);
 crate::impl_labeled!(PipelineCache);
 crate::impl_parent_device!(PipelineCache);
-crate::impl_storage_item_generic!(PipelineCache);
+crate::impl_storage_item!(PipelineCache);
 
-impl<A: HalApi> PipelineCache<A> {
+impl PipelineCache {
     pub(crate) fn raw(&self) -> &dyn hal::DynPipelineCache {
         self.raw.as_ref()
     }
@@ -352,9 +351,9 @@ pub struct VertexState<'a> {
 
 /// Describes the vertex process in a render pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedVertexState<'a, A: HalApi> {
+pub struct ResolvedVertexState<'a> {
     /// The compiled vertex stage and its entry point.
-    pub stage: ResolvedProgrammableStageDescriptor<'a, A>,
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
     /// The format of any vertex buffers used with this pipeline.
     pub buffers: Cow<'a, [VertexBufferLayout<'a>]>,
 }
@@ -371,9 +370,9 @@ pub struct FragmentState<'a> {
 
 /// Describes fragment processing in a render pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedFragmentState<'a, A: HalApi> {
+pub struct ResolvedFragmentState<'a> {
     /// The compiled fragment stage and its entry point.
-    pub stage: ResolvedProgrammableStageDescriptor<'a, A>,
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
     /// The effect of draw calls on the color aspect of the output target.
     pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>,
 }
@@ -407,12 +406,12 @@ pub struct RenderPipelineDescriptor<'a> {
 
 /// Describes a render (graphics) pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedRenderPipelineDescriptor<'a, A: HalApi> {
+pub struct ResolvedRenderPipelineDescriptor<'a> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: Option<Arc<PipelineLayout<A>>>,
+    pub layout: Option<Arc<PipelineLayout>>,
     /// The vertex processing state for this pipeline.
-    pub vertex: ResolvedVertexState<'a, A>,
+    pub vertex: ResolvedVertexState<'a>,
     /// The properties of the pipeline at the primitive assembly and rasterization level.
     pub primitive: wgt::PrimitiveState,
     /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
@@ -420,12 +419,12 @@ pub struct ResolvedRenderPipelineDescriptor<'a, A: HalApi> {
     /// The multi-sampling properties of the pipeline.
     pub multisample: wgt::MultisampleState,
     /// The fragment processing state for this pipeline.
-    pub fragment: Option<ResolvedFragmentState<'a, A>>,
+    pub fragment: Option<ResolvedFragmentState<'a>>,
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
     /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<Arc<PipelineCache<A>>>,
+    pub cache: Option<Arc<PipelineCache>>,
 }
 
 #[derive(Clone, Debug)]
@@ -588,12 +587,11 @@ impl Default for VertexStep {
 }
 
 #[derive(Debug)]
-pub struct RenderPipeline<A: HalApi> {
+pub struct RenderPipeline {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynRenderPipeline>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) layout: Arc<PipelineLayout<A>>,
-    pub(crate) _shader_modules:
-        ArrayVec<Arc<ShaderModule<A>>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) layout: Arc<PipelineLayout>,
+    pub(crate) _shader_modules: ArrayVec<Arc<ShaderModule>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
     pub(crate) pass_context: RenderPassContext,
     pub(crate) flags: PipelineFlags,
     pub(crate) strip_index_format: Option<wgt::IndexFormat>,
@@ -604,7 +602,7 @@ pub struct RenderPipeline<A: HalApi> {
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for RenderPipeline<A> {
+impl Drop for RenderPipeline {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -615,13 +613,13 @@ impl<A: HalApi> Drop for RenderPipeline<A> {
     }
 }
 
-crate::impl_resource_type_generic!(RenderPipeline);
+crate::impl_resource_type!(RenderPipeline);
 crate::impl_labeled!(RenderPipeline);
 crate::impl_parent_device!(RenderPipeline);
-crate::impl_storage_item_generic!(RenderPipeline);
+crate::impl_storage_item!(RenderPipeline);
 crate::impl_trackable!(RenderPipeline);
 
-impl<A: HalApi> RenderPipeline<A> {
+impl RenderPipeline {
     pub(crate) fn raw(&self) -> &dyn hal::DynRenderPipeline {
         self.raw.as_ref()
     }
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 4ac286b497..697156b35f 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -15,10 +15,8 @@ use std::{mem::ManuallyDrop, sync::Arc};
 use crate::device::trace::Action;
 use crate::{
     conv,
-    device::any_device::AnyDevice,
-    device::{DeviceError, MissingDownlevelFlags, WaitIdleError},
+    device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     resource::{self, Trackable},
 };
@@ -30,7 +28,7 @@ const FRAME_TIMEOUT_MS: u32 = 1000;
 
 #[derive(Debug)]
 pub(crate) struct Presentation {
-    pub(crate) device: AnyDevice, // TODO(#5124): use device: Arc<Device>
+    pub(crate) device: Arc<Device>,
     pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
     pub(crate) acquired_texture: Option<id::TextureId>,
 }
@@ -115,16 +113,14 @@ pub struct SurfaceOutput {
 }
 
 impl Global {
-    pub fn surface_get_current_texture<A: HalApi>(
+    pub fn surface_get_current_texture(
         &self,
         surface_id: id::SurfaceId,
         texture_id_in: Option<id::TextureId>,
     ) -> Result<SurfaceOutput, SurfaceError> {
         profiling::scope!("SwapChain::get_next_texture");
 
-        let hub = A::hub(self);
-
-        let fid = hub.textures.prepare(texture_id_in);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -132,17 +128,14 @@ impl Global {
             .map_err(|_| SurfaceError::Invalid)?;
 
         let (device, config) = if let Some(ref present) = *surface.presentation.lock() {
-            match present.device.downcast_clone::<A>() {
-                Some(device) => {
-                    device.check_is_valid()?;
-                    (device, present.config.clone())
-                }
-                None => return Err(SurfaceError::NotConfigured),
-            }
+            present.device.check_is_valid()?;
+            (present.device.clone(), present.config.clone())
         } else {
             return Err(SurfaceError::NotConfigured);
         };
 
+        let fid = hub.textures.prepare(device.backend(), texture_id_in);
+
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
             trace.add(Action::GetSurfaceTexture {
@@ -153,7 +146,7 @@ impl Global {
 
         let fence = device.fence.read();
 
-        let suf = surface.raw(A::VARIANT).unwrap();
+        let suf = surface.raw(device.backend()).unwrap();
         let (texture_id, status) = match unsafe {
             suf.acquire_texture(
                 Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
@@ -259,13 +252,10 @@ impl Global {
         Ok(SurfaceOutput { status, texture_id })
     }
 
-    pub fn surface_present<A: HalApi>(
-        &self,
-        surface_id: id::SurfaceId,
-    ) -> Result<Status, SurfaceError> {
+    pub fn surface_present(&self, surface_id: id::SurfaceId) -> Result<Status, SurfaceError> {
         profiling::scope!("SwapChain::present");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -278,7 +268,7 @@ impl Global {
             None => return Err(SurfaceError::NotConfigured),
         };
 
-        let device = present.device.downcast_ref::<A>().unwrap();
+        let device = &present.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
@@ -303,7 +293,7 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let suf = surface.raw(A::VARIANT).unwrap();
+                let suf = surface.raw(device.backend()).unwrap();
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
                     resource::TextureInner::Surface { raw, parent_id } => {
@@ -335,13 +325,10 @@ impl Global {
         }
     }
 
-    pub fn surface_texture_discard<A: HalApi>(
-        &self,
-        surface_id: id::SurfaceId,
-    ) -> Result<(), SurfaceError> {
+    pub fn surface_texture_discard(&self, surface_id: id::SurfaceId) -> Result<(), SurfaceError> {
         profiling::scope!("SwapChain::discard");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -353,7 +340,7 @@ impl Global {
             None => return Err(SurfaceError::NotConfigured),
         };
 
-        let device = present.device.downcast_ref::<A>().unwrap();
+        let device = &present.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
@@ -378,7 +365,7 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let suf = surface.raw(A::VARIANT);
+                let suf = surface.raw(device.backend());
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
                     resource::TextureInner::Surface { raw, parent_id } => {
diff --git a/wgpu-core/src/registry.rs b/wgpu-core/src/registry.rs
index 9183cc83bb..fa7e0def6c 100644
--- a/wgpu-core/src/registry.rs
+++ b/wgpu-core/src/registry.rs
@@ -1,7 +1,5 @@
 use std::sync::Arc;
 
-use wgt::Backend;
-
 use crate::{
     id::Id,
     identity::IdentityManager,
@@ -40,21 +38,15 @@ pub(crate) struct Registry<T: StorageItem> {
     // Must only contain an id which has either never been used or has been released from `storage`
     identity: Arc<IdentityManager<T::Marker>>,
     storage: RwLock<Storage<T>>,
-    backend: Backend,
 }
 
 impl<T: StorageItem> Registry<T> {
-    pub(crate) fn new(backend: Backend) -> Self {
+    pub(crate) fn new() -> Self {
         Self {
             identity: Arc::new(IdentityManager::new()),
             storage: RwLock::new(rank::REGISTRY_STORAGE, Storage::new()),
-            backend,
         }
     }
-
-    pub(crate) fn without_backend() -> Self {
-        Self::new(Backend::Empty)
-    }
 }
 
 #[must_use]
@@ -89,14 +81,18 @@ impl<T: StorageItem> FutureId<'_, T> {
 }
 
 impl<T: StorageItem> Registry<T> {
-    pub(crate) fn prepare(&self, id_in: Option<Id<T::Marker>>) -> FutureId<T> {
+    pub(crate) fn prepare(
+        &self,
+        backend: wgt::Backend,
+        id_in: Option<Id<T::Marker>>,
+    ) -> FutureId<T> {
         FutureId {
             id: match id_in {
                 Some(id_in) => {
                     self.identity.mark_as_used(id_in);
                     id_in
                 }
-                None => self.identity.process(self.backend),
+                None => self.identity.process(backend),
             },
             data: &self.storage,
         }
@@ -164,13 +160,13 @@ mod tests {
 
     #[test]
     fn simultaneous_registration() {
-        let registry = Registry::without_backend();
+        let registry = Registry::new();
         std::thread::scope(|s| {
             for _ in 0..5 {
                 s.spawn(|| {
                     for _ in 0..1000 {
                         let value = Arc::new(TestData);
-                        let new_id = registry.prepare(None);
+                        let new_id = registry.prepare(wgt::Backend::Empty, None);
                         let id = new_id.assign(value);
                         registry.unregister(id);
                     }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 2b06799a24..184851fc2a 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -86,14 +86,14 @@ impl std::fmt::Display for ResourceErrorIdent {
     }
 }
 
-pub(crate) trait ParentDevice<A: HalApi>: Labeled {
-    fn device(&self) -> &Arc<Device<A>>;
+pub(crate) trait ParentDevice: Labeled {
+    fn device(&self) -> &Arc<Device>;
 
     fn is_equal(self: &Arc<Self>, other: &Arc<Self>) -> bool {
         Arc::ptr_eq(self, other)
     }
 
-    fn same_device_as<O: ParentDevice<A>>(&self, other: &O) -> Result<(), DeviceError> {
+    fn same_device_as<O: ParentDevice>(&self, other: &O) -> Result<(), DeviceError> {
         if Arc::ptr_eq(self.device(), other.device()) {
             Ok(())
         } else {
@@ -106,7 +106,7 @@ pub(crate) trait ParentDevice<A: HalApi>: Labeled {
         }
     }
 
-    fn same_device(&self, device: &Arc<Device<A>>) -> Result<(), DeviceError> {
+    fn same_device(&self, device: &Arc<Device>) -> Result<(), DeviceError> {
         if Arc::ptr_eq(self.device(), device) {
             Ok(())
         } else {
@@ -123,8 +123,8 @@ pub(crate) trait ParentDevice<A: HalApi>: Labeled {
 #[macro_export]
 macro_rules! impl_parent_device {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::ParentDevice<A> for $ty<A> {
-            fn device(&self) -> &Arc<Device<A>> {
+        impl $crate::resource::ParentDevice for $ty {
+            fn device(&self) -> &Arc<Device> {
                 &self.device
             }
         }
@@ -135,16 +135,6 @@ pub(crate) trait ResourceType {
     const TYPE: &'static str;
 }
 
-// TODO(#5124): Remove the typed version.
-#[macro_export]
-macro_rules! impl_resource_type_generic {
-    ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::ResourceType for $ty<A> {
-            const TYPE: &'static str = stringify!($ty);
-        }
-    };
-}
-
 #[macro_export]
 macro_rules! impl_resource_type {
     ($ty:ident) => {
@@ -173,7 +163,7 @@ pub(crate) trait Labeled: ResourceType {
 #[macro_export]
 macro_rules! impl_labeled {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::Labeled for $ty<A> {
+        impl $crate::resource::Labeled for $ty {
             fn label(&self) -> &str {
                 &self.label
             }
@@ -188,7 +178,7 @@ pub(crate) trait Trackable {
 #[macro_export]
 macro_rules! impl_trackable {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::Trackable for $ty<A> {
+        impl $crate::resource::Trackable for $ty {
             fn tracker_index(&self) -> $crate::track::TrackerIndex {
                 self.tracking_data.tracker_index()
             }
@@ -230,11 +220,11 @@ pub enum BufferMapAsyncStatus {
 }
 
 #[derive(Debug)]
-pub(crate) enum BufferMapState<A: HalApi> {
+pub(crate) enum BufferMapState {
     /// Mapped at creation.
-    Init { staging_buffer: StagingBuffer<A> },
+    Init { staging_buffer: StagingBuffer },
     /// Waiting for GPU to be done before mapping
-    Waiting(BufferPendingMapping<A>),
+    Waiting(BufferPendingMapping),
     /// Mapped
     Active {
         mapping: hal::BufferMapping,
@@ -246,9 +236,9 @@ pub(crate) enum BufferMapState<A: HalApi> {
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for BufferMapState<A> {}
+unsafe impl Send for BufferMapState {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for BufferMapState<A> {}
+unsafe impl Sync for BufferMapState {}
 
 #[repr(C)]
 pub struct BufferMapCallbackC {
@@ -423,30 +413,30 @@ pub struct DestroyedResourceError(pub ResourceErrorIdent);
 pub type BufferAccessResult = Result<(), BufferAccessError>;
 
 #[derive(Debug)]
-pub(crate) struct BufferPendingMapping<A: HalApi> {
+pub(crate) struct BufferPendingMapping {
     pub(crate) range: Range<wgt::BufferAddress>,
     pub(crate) op: BufferMapOperation,
     // hold the parent alive while the mapping is active
-    pub(crate) _parent_buffer: Arc<Buffer<A>>,
+    pub(crate) _parent_buffer: Arc<Buffer>,
 }
 
 pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
 
 #[derive(Debug)]
-pub struct Buffer<A: HalApi> {
+pub struct Buffer {
     pub(crate) raw: Snatchable<Box<dyn hal::DynBuffer>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) usage: wgt::BufferUsages,
     pub(crate) size: wgt::BufferAddress,
     pub(crate) initialization_status: RwLock<BufferInitTracker>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) map_state: Mutex<BufferMapState<A>>,
-    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
+    pub(crate) map_state: Mutex<BufferMapState>,
+    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup>>>,
 }
 
-impl<A: HalApi> Drop for Buffer<A> {
+impl Drop for Buffer {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
@@ -457,7 +447,7 @@ impl<A: HalApi> Drop for Buffer<A> {
     }
 }
 
-impl<A: HalApi> Buffer<A> {
+impl Buffer {
     pub(crate) fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynBuffer> {
         self.raw.get(guard).map(|b| b.as_ref())
     }
@@ -761,28 +751,28 @@ pub enum CreateBufferError {
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
-crate::impl_resource_type_generic!(Buffer);
+crate::impl_resource_type!(Buffer);
 crate::impl_labeled!(Buffer);
 crate::impl_parent_device!(Buffer);
-crate::impl_storage_item_generic!(Buffer);
+crate::impl_storage_item!(Buffer);
 crate::impl_trackable!(Buffer);
 
 /// A buffer that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
-pub struct DestroyedBuffer<A: HalApi> {
+pub struct DestroyedBuffer {
     raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
-    device: Arc<Device<A>>,
+    device: Arc<Device>,
     label: String,
-    bind_groups: Vec<Weak<BindGroup<A>>>,
+    bind_groups: Vec<Weak<BindGroup>>,
 }
 
-impl<A: HalApi> DestroyedBuffer<A> {
+impl DestroyedBuffer {
     pub fn label(&self) -> &dyn Debug {
         &self.label
     }
 }
 
-impl<A: HalApi> Drop for DestroyedBuffer<A> {
+impl Drop for DestroyedBuffer {
     fn drop(&mut self) {
         let mut deferred = self.device.deferred_destroy.lock();
         for bind_group in self.bind_groups.drain(..) {
@@ -800,9 +790,9 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for StagingBuffer<A> {}
+unsafe impl Send for StagingBuffer {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
+unsafe impl Sync for StagingBuffer {}
 
 /// A temporary buffer, consumed by the command that uses it.
 ///
@@ -824,16 +814,16 @@ unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
 /// [`queue_write_texture`]: Global::queue_write_texture
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
-pub struct StagingBuffer<A: HalApi> {
+pub struct StagingBuffer {
     raw: Box<dyn hal::DynBuffer>,
-    device: Arc<Device<A>>,
+    device: Arc<Device>,
     pub(crate) size: wgt::BufferSize,
     is_coherent: bool,
     ptr: NonNull<u8>,
 }
 
-impl<A: HalApi> StagingBuffer<A> {
-    pub(crate) fn new(device: &Arc<Device<A>>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
+impl StagingBuffer {
+    pub(crate) fn new(device: &Arc<Device>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
         profiling::scope!("StagingBuffer::new");
         let stage_desc = hal::BufferDescriptor {
             label: crate::hal_label(Some("(wgpu internal) Staging"), device.instance_flags),
@@ -901,7 +891,7 @@ impl<A: HalApi> StagingBuffer<A> {
         }
     }
 
-    pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {
+    pub(crate) fn flush(self) -> FlushedStagingBuffer {
         let device = self.device.raw();
         if !self.is_coherent {
             #[allow(clippy::single_range_in_vec_init)]
@@ -923,23 +913,23 @@ impl<A: HalApi> StagingBuffer<A> {
     }
 }
 
-crate::impl_resource_type_generic!(StagingBuffer);
-crate::impl_storage_item_generic!(StagingBuffer);
+crate::impl_resource_type!(StagingBuffer);
+crate::impl_storage_item!(StagingBuffer);
 
 #[derive(Debug)]
-pub struct FlushedStagingBuffer<A: HalApi> {
+pub struct FlushedStagingBuffer {
     raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
-    device: Arc<Device<A>>,
+    device: Arc<Device>,
     pub(crate) size: wgt::BufferSize,
 }
 
-impl<A: HalApi> FlushedStagingBuffer<A> {
+impl FlushedStagingBuffer {
     pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
         self.raw.as_ref()
     }
 }
 
-impl<A: HalApi> Drop for FlushedStagingBuffer<A> {
+impl Drop for FlushedStagingBuffer {
     fn drop(&mut self) {
         resource_log!("Destroy raw StagingBuffer");
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -987,9 +977,9 @@ pub enum TextureClearMode {
 }
 
 #[derive(Debug)]
-pub struct Texture<A: HalApi> {
+pub struct Texture {
     pub(crate) inner: Snatchable<TextureInner>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) desc: wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     pub(crate) hal_usage: hal::TextureUses,
     pub(crate) format_features: wgt::TextureFormatFeatures,
@@ -999,13 +989,13 @@ pub struct Texture<A: HalApi> {
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
     pub(crate) clear_mode: TextureClearMode,
-    pub(crate) views: Mutex<Vec<Weak<TextureView<A>>>>,
-    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
+    pub(crate) views: Mutex<Vec<Weak<TextureView>>>,
+    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup>>>,
 }
 
-impl<A: HalApi> Texture<A> {
+impl Texture {
     pub(crate) fn new(
-        device: &Arc<Device<A>>,
+        device: &Arc<Device>,
         inner: TextureInner,
         hal_usage: hal::TextureUses,
         desc: &TextureDescriptor,
@@ -1056,7 +1046,7 @@ impl<A: HalApi> Texture<A> {
     }
 }
 
-impl<A: HalApi> Drop for Texture<A> {
+impl Drop for Texture {
     fn drop(&mut self) {
         match self.clear_mode {
             TextureClearMode::Surface {
@@ -1092,7 +1082,7 @@ impl<A: HalApi> Drop for Texture<A> {
     }
 }
 
-impl<A: HalApi> Texture<A> {
+impl Texture {
     pub(crate) fn try_inner<'a>(
         &'a self,
         guard: &'a SnatchGuard,
@@ -1208,7 +1198,7 @@ impl Global {
     ) -> R {
         profiling::scope!("Buffer::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(buffer) = hub.buffers.get(id) {
             let snatch_guard = buffer.device.snatchable_lock.read();
@@ -1231,7 +1221,7 @@ impl Global {
     ) -> R {
         profiling::scope!("Texture::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(texture) = hub.textures.get(id) {
             let snatch_guard = texture.device.snatchable_lock.read();
@@ -1255,7 +1245,7 @@ impl Global {
     ) -> R {
         profiling::scope!("TextureView::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(texture_view) = hub.texture_views.get(id) {
             let snatch_guard = texture_view.device.snatchable_lock.read();
@@ -1279,7 +1269,7 @@ impl Global {
     ) -> R {
         profiling::scope!("Adapter::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let adapter = hub.adapters.get(id).ok();
         let hal_adapter = adapter
             .as_ref()
@@ -1299,7 +1289,7 @@ impl Global {
     ) -> R {
         profiling::scope!("Device::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let device = hub.devices.get(id).ok();
         let hal_device = device
             .as_ref()
@@ -1319,7 +1309,7 @@ impl Global {
     ) -> R {
         profiling::scope!("Device::fence_as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             let fence = device.fence.read();
@@ -1361,7 +1351,7 @@ impl Global {
     ) -> R {
         profiling::scope!("CommandEncoder::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(cmd_buf) = hub.command_buffers.get(id.into_command_buffer_id()) {
             let mut cmd_buf_data = cmd_buf.data.lock();
@@ -1380,21 +1370,21 @@ impl Global {
 
 /// A texture that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
-pub struct DestroyedTexture<A: HalApi> {
+pub struct DestroyedTexture {
     raw: ManuallyDrop<Box<dyn hal::DynTexture>>,
-    views: Vec<Weak<TextureView<A>>>,
-    bind_groups: Vec<Weak<BindGroup<A>>>,
-    device: Arc<Device<A>>,
+    views: Vec<Weak<TextureView>>,
+    bind_groups: Vec<Weak<BindGroup>>,
+    device: Arc<Device>,
     label: String,
 }
 
-impl<A: HalApi> DestroyedTexture<A> {
+impl DestroyedTexture {
     pub fn label(&self) -> &dyn Debug {
         &self.label
     }
 }
 
-impl<A: HalApi> Drop for DestroyedTexture<A> {
+impl Drop for DestroyedTexture {
     fn drop(&mut self) {
         let device = &self.device;
 
@@ -1508,13 +1498,13 @@ pub enum CreateTextureError {
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
-crate::impl_resource_type_generic!(Texture);
+crate::impl_resource_type!(Texture);
 crate::impl_labeled!(Texture);
 crate::impl_parent_device!(Texture);
-crate::impl_storage_item_generic!(Texture);
+crate::impl_storage_item!(Texture);
 crate::impl_trackable!(Texture);
 
-impl<A: HalApi> Borrow<TextureSelector> for Texture<A> {
+impl Borrow<TextureSelector> for Texture {
     fn borrow(&self) -> &TextureSelector {
         &self.full_range
     }
@@ -1575,11 +1565,11 @@ pub enum TextureViewNotRenderableReason {
 }
 
 #[derive(Debug)]
-pub struct TextureView<A: HalApi> {
+pub struct TextureView {
     pub(crate) raw: Snatchable<Box<dyn hal::DynTextureView>>,
     // if it's a surface texture - it's none
-    pub(crate) parent: Arc<Texture<A>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) parent: Arc<Texture>,
+    pub(crate) device: Arc<Device>,
     pub(crate) desc: HalTextureViewDescriptor,
     pub(crate) format_features: wgt::TextureFormatFeatures,
     /// This is `Err` only if the texture view is not renderable
@@ -1591,7 +1581,7 @@ pub struct TextureView<A: HalApi> {
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for TextureView<A> {
+impl Drop for TextureView {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
@@ -1602,7 +1592,7 @@ impl<A: HalApi> Drop for TextureView<A> {
     }
 }
 
-impl<A: HalApi> TextureView<A> {
+impl TextureView {
     pub(crate) fn raw<'a>(
         &'a self,
         snatch_guard: &'a SnatchGuard,
@@ -1676,10 +1666,10 @@ pub enum CreateTextureViewError {
 #[non_exhaustive]
 pub enum TextureViewDestroyError {}
 
-crate::impl_resource_type_generic!(TextureView);
+crate::impl_resource_type!(TextureView);
 crate::impl_labeled!(TextureView);
 crate::impl_parent_device!(TextureView);
-crate::impl_storage_item_generic!(TextureView);
+crate::impl_storage_item!(TextureView);
 crate::impl_trackable!(TextureView);
 
 /// Describes a [`Sampler`]
@@ -1712,9 +1702,9 @@ pub struct SamplerDescriptor<'a> {
 }
 
 #[derive(Debug)]
-pub struct Sampler<A: HalApi> {
+pub struct Sampler {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynSampler>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
@@ -1724,7 +1714,7 @@ pub struct Sampler<A: HalApi> {
     pub(crate) filtering: bool,
 }
 
-impl<A: HalApi> Drop for Sampler<A> {
+impl Drop for Sampler {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -1735,7 +1725,7 @@ impl<A: HalApi> Drop for Sampler<A> {
     }
 }
 
-impl<A: HalApi> Sampler<A> {
+impl Sampler {
     pub(crate) fn raw(&self) -> &dyn hal::DynSampler {
         self.raw.as_ref()
     }
@@ -1785,10 +1775,10 @@ pub enum CreateSamplerError {
     MissingFeatures(#[from] MissingFeatures),
 }
 
-crate::impl_resource_type_generic!(Sampler);
+crate::impl_resource_type!(Sampler);
 crate::impl_labeled!(Sampler);
 crate::impl_parent_device!(Sampler);
-crate::impl_storage_item_generic!(Sampler);
+crate::impl_storage_item!(Sampler);
 crate::impl_trackable!(Sampler);
 
 #[derive(Clone, Debug, Error)]
@@ -1807,16 +1797,16 @@ pub enum CreateQuerySetError {
 pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
 
 #[derive(Debug)]
-pub struct QuerySet<A: HalApi> {
+pub struct QuerySet {
     pub(crate) raw: ManuallyDrop<Box<dyn hal::DynQuerySet>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
     pub(crate) desc: wgt::QuerySetDescriptor<()>,
 }
 
-impl<A: HalApi> Drop for QuerySet<A> {
+impl Drop for QuerySet {
     fn drop(&mut self) {
         resource_log!("Destroy raw {}", self.error_ident());
         // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
@@ -1827,13 +1817,13 @@ impl<A: HalApi> Drop for QuerySet<A> {
     }
 }
 
-crate::impl_resource_type_generic!(QuerySet);
+crate::impl_resource_type!(QuerySet);
 crate::impl_labeled!(QuerySet);
 crate::impl_parent_device!(QuerySet);
-crate::impl_storage_item_generic!(QuerySet);
+crate::impl_storage_item!(QuerySet);
 crate::impl_trackable!(QuerySet);
 
-impl<A: HalApi> QuerySet<A> {
+impl QuerySet {
     pub(crate) fn raw(&self) -> &dyn hal::DynQuerySet {
         self.raw.as_ref()
     }
diff --git a/wgpu-core/src/storage.rs b/wgpu-core/src/storage.rs
index 0adcf51abd..c5e91eedd4 100644
--- a/wgpu-core/src/storage.rs
+++ b/wgpu-core/src/storage.rs
@@ -28,16 +28,6 @@ pub(crate) trait StorageItem: ResourceType {
     type Marker: Marker;
 }
 
-// TODO(#5124): Remove the typed version.
-#[macro_export]
-macro_rules! impl_storage_item_generic {
-    ($ty:ident) => {
-        impl<A: HalApi> $crate::storage::StorageItem for $ty<A> {
-            type Marker = $crate::id::markers::$ty;
-        }
-    };
-}
-
 #[macro_export]
 macro_rules! impl_storage_item {
     ($ty:ident) => {
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index ea670de35a..13629dfbc9 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -8,7 +8,6 @@ use std::sync::{Arc, Weak};
 
 use super::{PendingTransition, TrackerIndex};
 use crate::{
-    hal_api::HalApi,
     resource::{Buffer, Trackable},
     snatch::SnatchGuard,
     track::{
@@ -39,10 +38,10 @@ impl ResourceUses for BufferUses {
 
 /// Stores a bind group's buffers + their usages (within the bind group).
 #[derive(Debug)]
-pub(crate) struct BufferBindGroupState<A: HalApi> {
-    buffers: Vec<(Arc<Buffer<A>>, BufferUses)>,
+pub(crate) struct BufferBindGroupState {
+    buffers: Vec<(Arc<Buffer>, BufferUses)>,
 }
-impl<A: HalApi> BufferBindGroupState<A> {
+impl BufferBindGroupState {
     pub fn new() -> Self {
         Self {
             buffers: Vec::new(),
@@ -68,19 +67,19 @@ impl<A: HalApi> BufferBindGroupState<A> {
     }
 
     /// Adds the given resource with the given state.
-    pub fn insert_single(&mut self, buffer: Arc<Buffer<A>>, state: BufferUses) {
+    pub fn insert_single(&mut self, buffer: Arc<Buffer>, state: BufferUses) {
         self.buffers.push((buffer, state));
     }
 }
 
 /// Stores all buffer state within a single usage scope.
 #[derive(Debug)]
-pub(crate) struct BufferUsageScope<A: HalApi> {
+pub(crate) struct BufferUsageScope {
     state: Vec<BufferUses>,
-    metadata: ResourceMetadata<Arc<Buffer<A>>>,
+    metadata: ResourceMetadata<Arc<Buffer>>,
 }
 
-impl<A: HalApi> Default for BufferUsageScope<A> {
+impl Default for BufferUsageScope {
     fn default() -> Self {
         Self {
             state: Vec::new(),
@@ -89,7 +88,7 @@ impl<A: HalApi> Default for BufferUsageScope<A> {
     }
 }
 
-impl<A: HalApi> BufferUsageScope<A> {
+impl BufferUsageScope {
     fn tracker_assert_in_bounds(&self, index: usize) {
         strict_assert!(index < self.state.len());
         self.metadata.tracker_assert_in_bounds(index);
@@ -129,7 +128,7 @@ impl<A: HalApi> BufferUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BufferBindGroupState<A>,
+        bind_group: &BufferBindGroupState,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         for &(ref resource, state) in bind_group.buffers.iter() {
             let index = resource.tracker_index().as_usize();
@@ -199,7 +198,7 @@ impl<A: HalApi> BufferUsageScope<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn merge_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         new_state: BufferUses,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         let index = buffer.tracker_index().as_usize();
@@ -225,16 +224,16 @@ impl<A: HalApi> BufferUsageScope<A> {
 }
 
 /// Stores all buffer state within a command buffer.
-pub(crate) struct BufferTracker<A: HalApi> {
+pub(crate) struct BufferTracker {
     start: Vec<BufferUses>,
     end: Vec<BufferUses>,
 
-    metadata: ResourceMetadata<Arc<Buffer<A>>>,
+    metadata: ResourceMetadata<Arc<Buffer>>,
 
     temp: Vec<PendingTransition<BufferUses>>,
 }
 
-impl<A: HalApi> BufferTracker<A> {
+impl BufferTracker {
     pub fn new() -> Self {
         Self {
             start: Vec::new(),
@@ -271,12 +270,12 @@ impl<A: HalApi> BufferTracker<A> {
     }
 
     /// Returns true if the given buffer is tracked.
-    pub fn contains(&self, buffer: &Buffer<A>) -> bool {
+    pub fn contains(&self, buffer: &Buffer) -> bool {
         self.metadata.contains(buffer.tracker_index().as_usize())
     }
 
     /// Returns a list of all buffers tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer>> + '_ {
         self.metadata.owned_resources()
     }
 
@@ -301,7 +300,7 @@ impl<A: HalApi> BufferTracker<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn set_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         state: BufferUses,
     ) -> Option<PendingTransition<BufferUses>> {
         let index: usize = buffer.tracker_index().as_usize();
@@ -374,7 +373,7 @@ impl<A: HalApi> BufferTracker<A> {
     ///
     /// If the ID is higher than the length of internal vectors,
     /// the vectors will be extended. A call to set_size is not needed.
-    pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope<A>) {
+    pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope) {
         let incoming_size = scope.state.len();
         if incoming_size > self.start.len() {
             self.set_size(incoming_size);
@@ -422,7 +421,7 @@ impl<A: HalApi> BufferTracker<A> {
     /// method is called.
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut BufferUsageScope<A>,
+        scope: &mut BufferUsageScope,
         index_source: impl IntoIterator<Item = TrackerIndex>,
     ) {
         let incoming_size = scope.state.len();
@@ -461,13 +460,13 @@ impl<A: HalApi> BufferTracker<A> {
 }
 
 /// Stores all buffer state within a device.
-pub(crate) struct DeviceBufferTracker<A: HalApi> {
+pub(crate) struct DeviceBufferTracker {
     current_states: Vec<BufferUses>,
-    metadata: ResourceMetadata<Weak<Buffer<A>>>,
+    metadata: ResourceMetadata<Weak<Buffer>>,
     temp: Vec<PendingTransition<BufferUses>>,
 }
 
-impl<A: HalApi> DeviceBufferTracker<A> {
+impl DeviceBufferTracker {
     pub fn new() -> Self {
         Self {
             current_states: Vec::new(),
@@ -490,14 +489,14 @@ impl<A: HalApi> DeviceBufferTracker<A> {
     }
 
     /// Returns a list of all buffers tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Buffer<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Buffer>> + '_ {
         self.metadata.owned_resources()
     }
 
     /// Inserts a single buffer and its state into the resource tracker.
     ///
     /// If the resource already exists in the tracker, it will be overwritten.
-    pub fn insert_single(&mut self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
+    pub fn insert_single(&mut self, buffer: &Arc<Buffer>, state: BufferUses) {
         let index = buffer.tracker_index().as_usize();
 
         self.allow_index(index);
@@ -525,7 +524,7 @@ impl<A: HalApi> DeviceBufferTracker<A> {
     /// is returned. No more than one transition is needed.
     pub fn set_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         state: BufferUses,
     ) -> Option<PendingTransition<BufferUses>> {
         let index: usize = buffer.tracker_index().as_usize();
@@ -555,7 +554,7 @@ impl<A: HalApi> DeviceBufferTracker<A> {
     /// those transitions are returned.
     pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
         &'a mut self,
-        tracker: &'a BufferTracker<A>,
+        tracker: &'a BufferTracker,
         snatch_guard: &'b SnatchGuard<'b>,
     ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
         for index in tracker.metadata.owned_indices() {
@@ -621,14 +620,14 @@ impl BufferStateProvider<'_> {
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_merge<A: HalApi>(
+unsafe fn insert_or_merge(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Arc<Buffer<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Buffer>>,
     index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
 
@@ -677,14 +676,14 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_barrier_update<A: HalApi>(
+unsafe fn insert_or_barrier_update(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Arc<Buffer<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Buffer>>,
     index: usize,
     start_state_provider: BufferStateProvider<'_>,
     end_state_provider: Option<BufferStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
     barriers: &mut Vec<PendingTransition<BufferUses>>,
 ) {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
@@ -741,12 +740,12 @@ unsafe fn insert<T: Clone>(
 }
 
 #[inline(always)]
-unsafe fn merge<A: HalApi>(
+unsafe fn merge(
     current_states: &mut [BufferUses],
     _index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let current_state = unsafe { current_states.get_unchecked_mut(index) };
     let new_state = unsafe { state_provider.get_state(index) };
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index a75092d8be..1c2718981b 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -1,7 +1,7 @@
 /*! Resource State and Lifetime Trackers
 
 These structures are responsible for keeping track of resource state,
-generating barriers where needed, and making sure resources are kept
+generating barriers where needednd making sure resources are kept
 alive until the trackers die.
 
 ## General Architecture
@@ -35,7 +35,7 @@ Stateless trackers only store metadata and own the given resource.
 ## Use Case
 
 Within each type of tracker, the trackers are further split into 3 different
-use cases, Bind Group, Usage Scope, and a full Tracker.
+use cases, Bind Group, Usage Scopend a full Tracker.
 
 Bind Group trackers are just a list of different resources, their refcount,
 and how they are used. Textures are used via a selector and a usage type.
@@ -60,7 +60,7 @@ not always contain every resource. Some resources (or even most resources) go
 unused in any given command buffer. So to help speed up the process of iterating
 through possibly thousands of resources, we use a bit vector to represent if
 a resource is in the buffer or not. This allows us extremely efficient memory
-utilization, as well as being able to bail out of whole blocks of 32-64 resources
+utilizations well as being able to bail out of whole blocks of 32-64 resources
 with a single usize comparison with zero. In practice this means that merging
 partially resident buffers is extremely quick.
 
@@ -103,7 +103,6 @@ mod texture;
 
 use crate::{
     binding_model, command,
-    hal_api::HalApi,
     lock::{rank, Mutex},
     pipeline,
     resource::{self, Labeled, ResourceErrorIdent},
@@ -257,9 +256,9 @@ pub(crate) type PendingTransitionList = Vec<PendingTransition<hal::TextureUses>>
 
 impl PendingTransition<hal::BufferUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, A: HalApi>(
+    pub fn into_hal<'a>(
         self,
-        buf: &'a resource::Buffer<A>,
+        buf: &'a resource::Buffer,
         snatch_guard: &'a SnatchGuard<'a>,
     ) -> hal::BufferBarrier<'a, dyn hal::DynBuffer> {
         let buffer = buf.raw(snatch_guard).expect("Buffer is destroyed");
@@ -352,8 +351,8 @@ pub enum ResourceUsageCompatibilityError {
 }
 
 impl ResourceUsageCompatibilityError {
-    fn from_buffer<A: HalApi>(
-        buffer: &resource::Buffer<A>,
+    fn from_buffer(
+        buffer: &resource::Buffer,
         current_state: hal::BufferUses,
         new_state: hal::BufferUses,
     ) -> Self {
@@ -366,8 +365,8 @@ impl ResourceUsageCompatibilityError {
         }
     }
 
-    fn from_texture<A: HalApi>(
-        texture: &resource::Texture<A>,
+    fn from_texture(
+        texture: &resource::Texture,
         selector: TextureSelector,
         current_state: hal::TextureUses,
         new_state: hal::TextureUses,
@@ -417,13 +416,13 @@ impl<T: ResourceUses> fmt::Display for InvalidUse<T> {
 /// All bind group states are sorted by their ID so that when adding to a tracker,
 /// they are added in the most efficient order possible (ascending order).
 #[derive(Debug)]
-pub(crate) struct BindGroupStates<A: HalApi> {
-    pub buffers: BufferBindGroupState<A>,
-    pub views: TextureViewBindGroupState<A>,
-    pub samplers: StatelessTracker<resource::Sampler<A>>,
+pub(crate) struct BindGroupStates {
+    pub buffers: BufferBindGroupState,
+    pub views: TextureViewBindGroupState,
+    pub samplers: StatelessTracker<resource::Sampler>,
 }
 
-impl<A: HalApi> BindGroupStates<A> {
+impl BindGroupStates {
     pub fn new() -> Self {
         Self {
             buffers: BufferBindGroupState::new(),
@@ -450,15 +449,15 @@ impl<A: HalApi> BindGroupStates<A> {
 /// that are not normally included in a usage scope, but are used by render bundles
 /// and need to be owned by the render bundles.
 #[derive(Debug)]
-pub(crate) struct RenderBundleScope<A: HalApi> {
-    pub buffers: BufferUsageScope<A>,
-    pub textures: TextureUsageScope<A>,
+pub(crate) struct RenderBundleScope {
+    pub buffers: BufferUsageScope,
+    pub textures: TextureUsageScope,
     // Don't need to track views and samplers, they are never used directly, only by bind groups.
-    pub bind_groups: StatelessTracker<binding_model::BindGroup<A>>,
-    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline<A>>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
 }
 
-impl<A: HalApi> RenderBundleScope<A> {
+impl RenderBundleScope {
     /// Create the render bundle scope and pull the maximum IDs from the hubs.
     pub fn new() -> Self {
         Self {
@@ -471,7 +470,7 @@ impl<A: HalApi> RenderBundleScope<A> {
 
     /// Merge the inner contents of a bind group into the render bundle tracker.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -480,7 +479,7 @@ impl<A: HalApi> RenderBundleScope<A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BindGroupStates<A>,
+        bind_group: &BindGroupStates,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? };
         unsafe { self.textures.merge_bind_group(&bind_group.views)? };
@@ -492,18 +491,18 @@ impl<A: HalApi> RenderBundleScope<A> {
 /// A pool for storing the memory used by [`UsageScope`]s. We take and store this memory when the
 /// scope is dropped to avoid reallocating. The memory required only grows and allocation cost is
 /// significant when a large number of resources have been used.
-pub(crate) type UsageScopePool<A> = Mutex<Vec<(BufferUsageScope<A>, TextureUsageScope<A>)>>;
+pub(crate) type UsageScopePool = Mutex<Vec<(BufferUsageScope, TextureUsageScope)>>;
 
 /// A usage scope tracker. Only needs to store stateful resources as stateless
 /// resources cannot possibly have a usage conflict.
 #[derive(Debug)]
-pub(crate) struct UsageScope<'a, A: HalApi> {
-    pub pool: &'a UsageScopePool<A>,
-    pub buffers: BufferUsageScope<A>,
-    pub textures: TextureUsageScope<A>,
+pub(crate) struct UsageScope<'a> {
+    pub pool: &'a UsageScopePool,
+    pub buffers: BufferUsageScope,
+    pub textures: TextureUsageScope,
 }
 
-impl<'a, A: HalApi> Drop for UsageScope<'a, A> {
+impl<'a> Drop for UsageScope<'a> {
     fn drop(&mut self) {
         // clear vecs and push into pool
         self.buffers.clear();
@@ -515,14 +514,14 @@ impl<'a, A: HalApi> Drop for UsageScope<'a, A> {
     }
 }
 
-impl<A: HalApi> UsageScope<'static, A> {
+impl UsageScope<'static> {
     pub fn new_pooled<'d>(
-        pool: &'d UsageScopePool<A>,
+        pool: &'d UsageScopePool,
         tracker_indices: &TrackerIndexAllocators,
-    ) -> UsageScope<'d, A> {
+    ) -> UsageScope<'d> {
         let pooled = pool.lock().pop().unwrap_or_default();
 
-        let mut scope = UsageScope::<'d, A> {
+        let mut scope = UsageScope::<'d> {
             pool,
             buffers: pooled.0,
             textures: pooled.1,
@@ -534,10 +533,10 @@ impl<A: HalApi> UsageScope<'static, A> {
     }
 }
 
-impl<'a, A: HalApi> UsageScope<'a, A> {
+impl<'a> UsageScope<'a> {
     /// Merge the inner contents of a bind group into the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -546,7 +545,7 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BindGroupStates<A>,
+        bind_group: &BindGroupStates,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         unsafe {
             self.buffers.merge_bind_group(&bind_group.buffers)?;
@@ -558,7 +557,7 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
 
     /// Merge the inner contents of a bind group into the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by a bind group or are merged directly into the command buffer tracker.
     ///
     /// # Safety
@@ -567,7 +566,7 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_render_bundle(
         &mut self,
-        render_bundle: &RenderBundleScope<A>,
+        render_bundle: &RenderBundleScope,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         self.buffers.merge_usage_scope(&render_bundle.buffers)?;
         self.textures.merge_usage_scope(&render_bundle.textures)?;
@@ -577,12 +576,12 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
 }
 
 /// A tracker used by Device.
-pub(crate) struct DeviceTracker<A: HalApi> {
-    pub buffers: DeviceBufferTracker<A>,
-    pub textures: DeviceTextureTracker<A>,
+pub(crate) struct DeviceTracker {
+    pub buffers: DeviceBufferTracker,
+    pub textures: DeviceTextureTracker,
 }
 
-impl<A: HalApi> DeviceTracker<A> {
+impl DeviceTracker {
     pub fn new() -> Self {
         Self {
             buffers: DeviceBufferTracker::new(),
@@ -592,18 +591,18 @@ impl<A: HalApi> DeviceTracker<A> {
 }
 
 /// A full double sided tracker used by CommandBuffers.
-pub(crate) struct Tracker<A: HalApi> {
-    pub buffers: BufferTracker<A>,
-    pub textures: TextureTracker<A>,
-    pub views: StatelessTracker<resource::TextureView<A>>,
-    pub bind_groups: StatelessTracker<binding_model::BindGroup<A>>,
-    pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline<A>>,
-    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline<A>>,
-    pub bundles: StatelessTracker<command::RenderBundle<A>>,
-    pub query_sets: StatelessTracker<resource::QuerySet<A>>,
+pub(crate) struct Tracker {
+    pub buffers: BufferTracker,
+    pub textures: TextureTracker,
+    pub views: StatelessTracker<resource::TextureView>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup>,
+    pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
+    pub bundles: StatelessTracker<command::RenderBundle>,
+    pub query_sets: StatelessTracker<resource::QuerySet>,
 }
 
-impl<A: HalApi> Tracker<A> {
+impl Tracker {
     pub fn new() -> Self {
         Self {
             buffers: BufferTracker::new(),
@@ -632,7 +631,7 @@ impl<A: HalApi> Tracker<A> {
     /// bind group as a source of which IDs to look at. The bind groups
     /// must have first been added to the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -641,8 +640,8 @@ impl<A: HalApi> Tracker<A> {
     /// value given to `set_size`
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut UsageScope<A>,
-        bind_group: &BindGroupStates<A>,
+        scope: &mut UsageScope,
+        bind_group: &BindGroupStates,
     ) {
         unsafe {
             self.buffers.set_and_remove_from_usage_scope_sparse(
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index 9b11527645..1c74bffd97 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -20,7 +20,6 @@
 
 use super::{range::RangedStates, PendingTransition, PendingTransitionList, TrackerIndex};
 use crate::{
-    hal_api::HalApi,
     resource::{Texture, TextureInner, TextureView, Trackable},
     snatch::SnatchGuard,
     track::{
@@ -152,10 +151,10 @@ impl ComplexTextureState {
 
 /// Stores a bind group's texture views + their usages (within the bind group).
 #[derive(Debug)]
-pub(crate) struct TextureViewBindGroupState<A: HalApi> {
-    views: Vec<(Arc<TextureView<A>>, TextureUses)>,
+pub(crate) struct TextureViewBindGroupState {
+    views: Vec<(Arc<TextureView>, TextureUses)>,
 }
-impl<A: HalApi> TextureViewBindGroupState<A> {
+impl TextureViewBindGroupState {
     pub fn new() -> Self {
         Self { views: Vec::new() }
     }
@@ -170,7 +169,7 @@ impl<A: HalApi> TextureViewBindGroupState<A> {
     }
 
     /// Adds the given resource with the given state.
-    pub fn insert_single(&mut self, view: Arc<TextureView<A>>, usage: TextureUses) {
+    pub fn insert_single(&mut self, view: Arc<TextureView>, usage: TextureUses) {
         self.views.push((view, usage));
     }
 }
@@ -202,12 +201,12 @@ impl TextureStateSet {
 
 /// Stores all texture state within a single usage scope.
 #[derive(Debug)]
-pub(crate) struct TextureUsageScope<A: HalApi> {
+pub(crate) struct TextureUsageScope {
     set: TextureStateSet,
-    metadata: ResourceMetadata<Arc<Texture<A>>>,
+    metadata: ResourceMetadata<Arc<Texture>>,
 }
 
-impl<A: HalApi> Default for TextureUsageScope<A> {
+impl Default for TextureUsageScope {
     fn default() -> Self {
         Self {
             set: TextureStateSet::new(),
@@ -216,7 +215,7 @@ impl<A: HalApi> Default for TextureUsageScope<A> {
     }
 }
 
-impl<A: HalApi> TextureUsageScope<A> {
+impl TextureUsageScope {
     fn tracker_assert_in_bounds(&self, index: usize) {
         self.metadata.tracker_assert_in_bounds(index);
 
@@ -305,7 +304,7 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &TextureViewBindGroupState<A>,
+        bind_group: &TextureViewBindGroupState,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         for (view, usage) in bind_group.views.iter() {
             unsafe { self.merge_single(&view.parent, Some(view.selector.clone()), *usage)? };
@@ -329,7 +328,7 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: Option<TextureSelector>,
         new_state: TextureUses,
     ) -> Result<(), ResourceUsageCompatibilityError> {
@@ -353,26 +352,26 @@ impl<A: HalApi> TextureUsageScope<A> {
     }
 }
 
-pub(crate) trait TextureTrackerSetSingle<A: HalApi> {
+pub(crate) trait TextureTrackerSetSingle {
     fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>>;
 }
 
 /// Stores all texture state within a command buffer.
-pub(crate) struct TextureTracker<A: HalApi> {
+pub(crate) struct TextureTracker {
     start_set: TextureStateSet,
     end_set: TextureStateSet,
 
-    metadata: ResourceMetadata<Arc<Texture<A>>>,
+    metadata: ResourceMetadata<Arc<Texture>>,
 
     temp: Vec<PendingTransition<TextureUses>>,
 }
 
-impl<A: HalApi> TextureTracker<A> {
+impl TextureTracker {
     pub fn new() -> Self {
         Self {
             start_set: TextureStateSet::new(),
@@ -425,12 +424,12 @@ impl<A: HalApi> TextureTracker<A> {
     }
 
     /// Returns true if the tracker owns the given texture.
-    pub fn contains(&self, texture: &Texture<A>) -> bool {
+    pub fn contains(&self, texture: &Texture) -> bool {
         self.metadata.contains(texture.tracker_index().as_usize())
     }
 
     /// Returns a list of all textures tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture>> + '_ {
         self.metadata.owned_resources()
     }
 
@@ -461,7 +460,7 @@ impl<A: HalApi> TextureTracker<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -539,7 +538,7 @@ impl<A: HalApi> TextureTracker<A> {
     ///
     /// If the ID is higher than the length of internal vectors,
     /// the vectors will be extended. A call to set_size is not needed.
-    pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope<A>) {
+    pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
             self.set_size(incoming_size);
@@ -587,8 +586,8 @@ impl<A: HalApi> TextureTracker<A> {
     /// method is called.
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut TextureUsageScope<A>,
-        bind_group_state: &TextureViewBindGroupState<A>,
+        scope: &mut TextureUsageScope,
+        bind_group_state: &TextureViewBindGroupState,
     ) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
@@ -624,10 +623,10 @@ impl<A: HalApi> TextureTracker<A> {
     }
 }
 
-impl<A: HalApi> TextureTrackerSetSingle<A> for TextureTracker<A> {
+impl TextureTrackerSetSingle for TextureTracker {
     fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -636,13 +635,13 @@ impl<A: HalApi> TextureTrackerSetSingle<A> for TextureTracker<A> {
 }
 
 /// Stores all texture state within a device.
-pub(crate) struct DeviceTextureTracker<A: HalApi> {
+pub(crate) struct DeviceTextureTracker {
     current_state_set: TextureStateSet,
-    metadata: ResourceMetadata<Weak<Texture<A>>>,
+    metadata: ResourceMetadata<Weak<Texture>>,
     temp: Vec<PendingTransition<TextureUses>>,
 }
 
-impl<A: HalApi> DeviceTextureTracker<A> {
+impl DeviceTextureTracker {
     pub fn new() -> Self {
         Self {
             current_state_set: TextureStateSet::new(),
@@ -674,14 +673,14 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     }
 
     /// Returns a list of all textures tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Texture<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Texture>> + '_ {
         self.metadata.owned_resources()
     }
 
     /// Inserts a single texture and a state into the resource tracker.
     ///
     /// If the resource already exists in the tracker, it will be overwritten.
-    pub fn insert_single(&mut self, texture: &Arc<Texture<A>>, usage: TextureUses) {
+    pub fn insert_single(&mut self, texture: &Arc<Texture>, usage: TextureUses) {
         let index = texture.tracker_index().as_usize();
 
         self.allow_index(index);
@@ -710,7 +709,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     /// is returned.
     pub fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -752,7 +751,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     /// those transitions are returned.
     pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
         &'a mut self,
-        tracker: &'a TextureTracker<A>,
+        tracker: &'a TextureTracker,
         snatch_guard: &'b SnatchGuard<'b>,
     ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
         for index in tracker.metadata.owned_indices() {
@@ -796,7 +795,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     /// those transitions are returned.
     pub fn set_from_usage_scope_and_drain_transitions<'a, 'b: 'a>(
         &'a mut self,
-        scope: &'a TextureUsageScope<A>,
+        scope: &'a TextureUsageScope,
         snatch_guard: &'b SnatchGuard<'b>,
     ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
         for index in scope.metadata.owned_indices() {
@@ -856,10 +855,10 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     }
 }
 
-impl<A: HalApi> TextureTrackerSetSingle<A> for DeviceTextureTracker<A> {
+impl TextureTrackerSetSingle for DeviceTextureTracker {
     fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -978,13 +977,13 @@ impl<'a> TextureStateProvider<'a> {
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_merge<A: HalApi>(
+unsafe fn insert_or_merge(
     texture_selector: &TextureSelector,
     current_state_set: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Arc<Texture<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Texture>>,
     index: usize,
     state_provider: TextureStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
 
@@ -1034,15 +1033,15 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_barrier_update<A: HalApi>(
+unsafe fn insert_or_barrier_update(
     texture_selector: &TextureSelector,
     start_state: Option<&mut TextureStateSet>,
     current_state_set: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Arc<Texture<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Texture>>,
     index: usize,
     start_state_provider: TextureStateProvider<'_>,
     end_state_provider: Option<TextureStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
     barriers: &mut Vec<PendingTransition<TextureUses>>,
 ) {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
@@ -1163,12 +1162,12 @@ unsafe fn insert<T: Clone>(
 }
 
 #[inline(always)]
-unsafe fn merge<A: HalApi>(
+unsafe fn merge(
     texture_selector: &TextureSelector,
     current_state_set: &mut TextureStateSet,
     index: usize,
     state_provider: TextureStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) };
     let current_state = if *current_simple == TextureUses::COMPLEX {
diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs
index 9431683528..417ad56169 100644
--- a/wgpu/src/api/surface_texture.rs
+++ b/wgpu/src/api/surface_texture.rs
@@ -36,7 +36,6 @@ impl SurfaceTexture {
         self.presented = true;
         DynContext::surface_present(
             &*self.texture.context,
-            &self.texture.id,
             // This call to as_ref is essential because we want the DynContext implementation to see the inner
             // value of the Box (T::SurfaceOutputDetail), not the Box itself.
             self.detail.as_ref(),
@@ -49,7 +48,6 @@ impl Drop for SurfaceTexture {
         if !self.presented && !thread::panicking() {
             DynContext::surface_texture_discard(
                 &*self.texture.context,
-                &self.texture.id,
                 // This call to as_ref is essential because we want the DynContext implementation to see the inner
                 // value of the Box (T::SurfaceOutputDetail), not the Box itself.
                 self.detail.as_ref(),
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 6865c439a1..702f170837 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1495,15 +1495,11 @@ impl crate::context::Context for ContextWebGpu {
         )
     }
 
-    fn surface_present(&self, _texture: &Self::TextureId, _detail: &Self::SurfaceOutputDetail) {
+    fn surface_present(&self, _detail: &Self::SurfaceOutputDetail) {
         // Swapchain is presented automatically
     }
 
-    fn surface_texture_discard(
-        &self,
-        _texture: &Self::TextureId,
-        _detail: &Self::SurfaceOutputDetail,
-    ) {
+    fn surface_texture_discard(&self, _detail: &Self::SurfaceOutputDetail) {
         // Can't really discard this on the Web
     }
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 32ee37183f..08acd37595 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -72,10 +72,7 @@ impl ContextWgpuCore {
         &self,
         hal_adapter: hal::ExposedAdapter<A>,
     ) -> wgc::id::AdapterId {
-        unsafe {
-            self.0
-                .create_adapter_from_hal::<A>(hal_adapter.into(), None)
-        }
+        unsafe { self.0.create_adapter_from_hal(hal_adapter.into(), None) }
     }
 
     pub unsafe fn adapter_as_hal<
@@ -112,7 +109,7 @@ impl ContextWgpuCore {
             log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
         }
         let (device_id, queue_id, error) = unsafe {
-            self.0.create_device_from_hal::<A>(
+            self.0.create_device_from_hal(
                 *adapter,
                 hal_device.into(),
                 &desc.map_label(|l| l.map(Borrowed)),
@@ -146,7 +143,7 @@ impl ContextWgpuCore {
         let descriptor = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
         let (id, error) = unsafe {
             self.0
-                .create_texture_from_hal::<A>(Box::new(hal_texture), device.id, &descriptor, None)
+                .create_texture_from_hal(Box::new(hal_texture), device.id, &descriptor, None)
         };
         if let Some(cause) = error {
             self.handle_error(
@@ -795,20 +792,14 @@ impl crate::Context for ContextWgpuCore {
     fn surface_get_current_texture(
         &self,
         surface: &Self::SurfaceId,
-        surface_data: &Self::SurfaceData,
+        _surface_data: &Self::SurfaceData,
     ) -> (
         Option<Self::TextureId>,
         Option<Self::TextureData>,
         SurfaceStatus,
         Self::SurfaceOutputDetail,
     ) {
-        let device_id = surface_data
-            .configured_device
-            .lock()
-            .expect("Surface was not configured?");
-        match wgc::gfx_select!(
-            device_id => self.0.surface_get_current_texture(*surface, None)
-        ) {
+        match self.0.surface_get_current_texture(*surface, None) {
             Ok(wgc::present::SurfaceOutput { status, texture_id }) => {
                 let (id, data) = {
                     (
@@ -833,19 +824,15 @@ impl crate::Context for ContextWgpuCore {
         }
     }
 
-    fn surface_present(&self, texture: &Self::TextureId, detail: &Self::SurfaceOutputDetail) {
-        match wgc::gfx_select!(texture => self.0.surface_present(detail.surface_id)) {
+    fn surface_present(&self, detail: &Self::SurfaceOutputDetail) {
+        match self.0.surface_present(detail.surface_id) {
             Ok(_status) => (),
             Err(err) => self.handle_error_fatal(err, "Surface::present"),
         }
     }
 
-    fn surface_texture_discard(
-        &self,
-        texture: &Self::TextureId,
-        detail: &Self::SurfaceOutputDetail,
-    ) {
-        match wgc::gfx_select!(texture => self.0.surface_texture_discard(detail.surface_id)) {
+    fn surface_texture_discard(&self, detail: &Self::SurfaceOutputDetail) {
+        match self.0.surface_texture_discard(detail.surface_id) {
             Ok(_status) => (),
             Err(err) => self.handle_error_fatal(err, "Surface::discard_texture"),
         }
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 2c2c82c4bc..d28e4bc692 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -178,12 +178,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         SurfaceStatus,
         Self::SurfaceOutputDetail,
     );
-    fn surface_present(&self, texture: &Self::TextureId, detail: &Self::SurfaceOutputDetail);
-    fn surface_texture_discard(
-        &self,
-        texture: &Self::TextureId,
-        detail: &Self::SurfaceOutputDetail,
-    );
+    fn surface_present(&self, detail: &Self::SurfaceOutputDetail);
+    fn surface_texture_discard(&self, detail: &Self::SurfaceOutputDetail);
 
     fn device_features(&self, device: &Self::DeviceId, device_data: &Self::DeviceData) -> Features;
     fn device_limits(&self, device: &Self::DeviceId, device_data: &Self::DeviceData) -> Limits;
@@ -1241,8 +1237,8 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         SurfaceStatus,
         Box<dyn AnyWasmNotSendSync>,
     );
-    fn surface_present(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync);
-    fn surface_texture_discard(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync);
+    fn surface_present(&self, detail: &dyn AnyWasmNotSendSync);
+    fn surface_texture_discard(&self, detail: &dyn AnyWasmNotSendSync);
 
     fn device_features(&self, device: &ObjectId, device_data: &crate::Data) -> Features;
     fn device_limits(&self, device: &ObjectId, device_data: &crate::Data) -> Limits;
@@ -2204,14 +2200,12 @@ where
         )
     }
 
-    fn surface_present(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync) {
-        let texture = <T::TextureId>::from(*texture);
-        Context::surface_present(self, &texture, detail.downcast_ref().unwrap())
+    fn surface_present(&self, detail: &dyn AnyWasmNotSendSync) {
+        Context::surface_present(self, detail.downcast_ref().unwrap())
     }
 
-    fn surface_texture_discard(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync) {
-        let texture = <T::TextureId>::from(*texture);
-        Context::surface_texture_discard(self, &texture, detail.downcast_ref().unwrap())
+    fn surface_texture_discard(&self, detail: &dyn AnyWasmNotSendSync) {
+        Context::surface_texture_discard(self, detail.downcast_ref().unwrap())
     }
 
     fn device_features(&self, device: &ObjectId, device_data: &crate::Data) -> Features {

From 1bfe8845d5effac738fbc02624f65a81700329ce Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 10 Aug 2024 12:36:04 +0200
Subject: [PATCH 216/226] ignore 'arc instead of rc' warnings on wasm

---
 wgpu-core/src/instance.rs | 1 -
 wgpu-core/src/lib.rs      | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 8c7585be99..a71117cfe1 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -495,7 +495,6 @@ impl Global {
                 surface_per_backend,
             };
 
-            #[allow(clippy::arc_with_non_send_sync)]
             let id = self
                 .surfaces
                 .prepare(wgt::Backend::Empty, id_in) // No specific backend for Surface, since it's not specific.
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 179664490c..9b27d64a7b 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -62,6 +62,13 @@ the documentation for `wgpu-core` is empty unless built with
     unused_extern_crates,
     unused_qualifications
 )]
+// We use `Arc` in wgpu-core, but on wasm (unless opted out via `fragile-send-sync-non-atomic-wasm`)
+// wgpu-hal resources are not Send/Sync, causing a clippy warning for unnecessary `Arc`s.
+// We could use `Rc`s in this case as recommended, but unless atomics are enabled
+// this doesn't make a difference.
+// Therefore, this is only really a concern for users targeting WebGL
+// (the only reason to use wgpu-core on the web in the first place) that have atomics enabled.
+#![cfg_attr(not(send_sync), allow(clippy::arc_with_non_send_sync))]
 
 pub mod binding_model;
 pub mod command;

From 9794f338235309e3cee9908baaecbcac004cf36b Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 10 Aug 2024 23:47:33 +0200
Subject: [PATCH 217/226] Handle webgl's `queue_copy_external_image_to_texture`

---
 wgpu-core/src/device/queue.rs | 36 +++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index deab6bff21..e516e0dac7 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1020,16 +1020,36 @@ impl Global {
             size: hal_copy_size,
         };
 
+        let mut trackers = device.trackers.lock();
+        let transitions = trackers
+            .textures
+            .set_single(&dst, selector, hal::TextureUses::COPY_DST);
+
+        // `copy_external_image_to_texture` is exclusive to the WebGL backend.
+        // Don't go through the `DynCommandEncoder` abstraction and directly to the WebGL backend.
+        let encoder_webgl = encoder
+            .as_any_mut()
+            .downcast_mut::<hal::gles::CommandEncoder>()
+            .unwrap();
+        let dst_raw_webgl = dst_raw
+            .as_any()
+            .downcast_ref::<hal::gles::Texture>()
+            .unwrap();
+        let transitions_webgl = transitions.map(|pending| {
+            let dyn_transition = pending.into_hal(dst_raw);
+            hal::TextureBarrier {
+                texture: dst_raw_webgl,
+                range: dyn_transition.range,
+                usage: dyn_transition.usage,
+            }
+        });
+
+        use hal::CommandEncoder as _;
         unsafe {
-            let mut trackers = device.trackers.lock();
-            let transitions =
-                trackers
-                    .textures
-                    .set_single(&dst, selector, hal::TextureUses::COPY_DST);
-            encoder.transition_textures(transitions.map(|pending| pending.into_hal(dst_raw)));
-            encoder.copy_external_image_to_texture(
+            encoder_webgl.transition_textures(transitions_webgl);
+            encoder_webgl.copy_external_image_to_texture(
                 source,
-                dst_raw,
+                dst_raw_webgl,
                 destination.premultiplied_alpha,
                 iter::once(regions),
             );

From 0287eaf022e7eee072ca4e21aa51672c271091e9 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Tue, 6 Aug 2024 23:17:47 +0200
Subject: [PATCH 218/226] Remove `gfx_select`.

---
 deno_webgpu/binding.rs         |  10 +-
 deno_webgpu/buffer.rs          |  45 ++--
 deno_webgpu/bundle.rs          |   4 +-
 deno_webgpu/command_encoder.rs | 102 +++++----
 deno_webgpu/lib.rs             |  58 ++---
 deno_webgpu/pipeline.rs        |  28 +--
 deno_webgpu/queue.rs           |  22 +-
 deno_webgpu/sampler.rs         |   4 +-
 deno_webgpu/shader.rs          |   4 +-
 deno_webgpu/surface.rs         |   4 +-
 deno_webgpu/texture.rs         |  20 +-
 player/src/bin/play.rs         | 159 +++++++-------
 player/tests/test.rs           |  59 ++---
 wgpu-core/src/hub.rs           |   6 +-
 wgpu-core/src/lib.rs           | 163 --------------
 wgpu/src/backend/wgpu_core.rs  | 383 +++++++++++++++------------------
 16 files changed, 431 insertions(+), 640 deletions(-)

diff --git a/deno_webgpu/binding.rs b/deno_webgpu/binding.rs
index 0efeb6716a..f1f3a80d35 100644
--- a/deno_webgpu/binding.rs
+++ b/deno_webgpu/binding.rs
@@ -21,7 +21,7 @@ impl Resource for WebGpuBindGroupLayout {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.bind_group_layout_drop(self.1));
+        self.0.bind_group_layout_drop(self.1);
     }
 }
 
@@ -35,7 +35,7 @@ impl Resource for WebGpuBindGroup {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.bind_group_drop(self.1));
+        self.0.bind_group_drop(self.1);
     }
 }
 
@@ -191,7 +191,7 @@ pub fn op_webgpu_create_bind_group_layout(
         entries: Cow::from(entries),
     };
 
-    gfx_put!(device => instance.device_create_bind_group_layout(
+    gfx_put!(instance.device_create_bind_group_layout(
     device,
     &descriptor,
     None
@@ -226,7 +226,7 @@ pub fn op_webgpu_create_pipeline_layout(
         push_constant_ranges: Default::default(),
     };
 
-    gfx_put!(device => instance.device_create_pipeline_layout(
+    gfx_put!(instance.device_create_pipeline_layout(
     device,
     &descriptor,
     None
@@ -305,7 +305,7 @@ pub fn op_webgpu_create_bind_group(
         entries: Cow::from(entries),
     };
 
-    gfx_put!(device => instance.device_create_bind_group(
+    gfx_put!(instance.device_create_bind_group(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/buffer.rs b/deno_webgpu/buffer.rs
index 9a4900112a..08afcd133d 100644
--- a/deno_webgpu/buffer.rs
+++ b/deno_webgpu/buffer.rs
@@ -27,7 +27,7 @@ impl Resource for WebGpuBuffer {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.buffer_drop(self.1));
+        self.0.buffer_drop(self.1);
     }
 }
 
@@ -62,7 +62,7 @@ pub fn op_webgpu_create_buffer(
         mapped_at_creation,
     };
 
-    gfx_put!(device => instance.device_create_buffer(
+    gfx_put!(instance.device_create_buffer(
     device,
     &descriptor,
     None
@@ -97,20 +97,21 @@ pub async fn op_webgpu_buffer_get_map_async(
         });
 
         // TODO(lucacasonato): error handling
-        let maybe_err = gfx_select!(buffer => instance.buffer_map_async(
-            buffer,
-            offset,
-            Some(size),
-            wgpu_core::resource::BufferMapOperation {
-                host: match mode {
-                    1 => wgpu_core::device::HostMap::Read,
-                    2 => wgpu_core::device::HostMap::Write,
-                    _ => unreachable!(),
+        let maybe_err = instance
+            .buffer_map_async(
+                buffer,
+                offset,
+                Some(size),
+                wgpu_core::resource::BufferMapOperation {
+                    host: match mode {
+                        1 => wgpu_core::device::HostMap::Read,
+                        2 => wgpu_core::device::HostMap::Write,
+                        _ => unreachable!(),
+                    },
+                    callback: Some(wgpu_core::resource::BufferMapCallback::from_rust(callback)),
                 },
-                callback: Some(wgpu_core::resource::BufferMapCallback::from_rust(callback)),
-            }
-        ))
-        .err();
+            )
+            .err();
 
         if maybe_err.is_some() {
             return Ok(WebGpuResult::maybe_err(maybe_err));
@@ -124,7 +125,8 @@ pub async fn op_webgpu_buffer_get_map_async(
             {
                 let state = state.borrow();
                 let instance = state.borrow::<super::Instance>();
-                gfx_select!(device => instance.device_poll(device, wgpu_types::Maintain::wait()))
+                instance
+                    .device_poll(device, wgpu_types::Maintain::wait())
                     .unwrap();
             }
             tokio::time::sleep(Duration::from_millis(10)).await;
@@ -157,12 +159,9 @@ pub fn op_webgpu_buffer_get_mapped_range(
     let buffer_resource = state.resource_table.get::<WebGpuBuffer>(buffer_rid)?;
     let buffer = buffer_resource.1;
 
-    let (slice_pointer, range_size) = gfx_select!(buffer => instance.buffer_get_mapped_range(
-      buffer,
-      offset,
-      size
-    ))
-    .map_err(|e| DomExceptionOperationError::new(&e.to_string()))?;
+    let (slice_pointer, range_size) = instance
+        .buffer_get_mapped_range(buffer, offset, size)
+        .map_err(|e| DomExceptionOperationError::new(&e.to_string()))?;
 
     // SAFETY: guarantee to be safe from wgpu
     let slice =
@@ -199,5 +198,5 @@ pub fn op_webgpu_buffer_unmap(
         slice.copy_from_slice(buf);
     }
 
-    gfx_ok!(buffer => instance.buffer_unmap(buffer))
+    gfx_ok!(instance.buffer_unmap(buffer))
 }
diff --git a/deno_webgpu/bundle.rs b/deno_webgpu/bundle.rs
index dfe5ccf494..0d1421d202 100644
--- a/deno_webgpu/bundle.rs
+++ b/deno_webgpu/bundle.rs
@@ -30,7 +30,7 @@ impl Resource for WebGpuRenderBundle {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.render_bundle_drop(self.1));
+        self.0.render_bundle_drop(self.1);
     }
 }
 
@@ -108,7 +108,7 @@ pub fn op_webgpu_render_bundle_encoder_finish(
         .into_inner();
     let instance = state.borrow::<super::Instance>();
 
-    gfx_put!(render_bundle_encoder.parent() => instance.render_bundle_encoder_finish(
+    gfx_put!(instance.render_bundle_encoder_finish(
     render_bundle_encoder,
     &wgpu_core::command::RenderBundleDescriptor {
       label: Some(label),
diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs
index ba21bb05b5..84537f3c0b 100644
--- a/deno_webgpu/command_encoder.rs
+++ b/deno_webgpu/command_encoder.rs
@@ -23,7 +23,7 @@ impl Resource for WebGpuCommandEncoder {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.command_encoder_drop(self.1));
+        self.0.command_encoder_drop(self.1);
     }
 }
 
@@ -38,7 +38,7 @@ impl Resource for WebGpuCommandBuffer {
 
     fn close(self: Rc<Self>) {
         if let Some(id) = *self.1.borrow() {
-            gfx_select!(id => self.0.command_buffer_drop(id));
+            self.0.command_buffer_drop(id);
         }
     }
 }
@@ -58,7 +58,7 @@ pub fn op_webgpu_create_command_encoder(
 
     let descriptor = wgpu_types::CommandEncoderDescriptor { label: Some(label) };
 
-    gfx_put!(device => instance.device_create_command_encoder(
+    gfx_put!(instance.device_create_command_encoder(
     device,
     &descriptor,
     None
@@ -210,7 +210,8 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
         occlusion_query_set: occlusion_query_set_resource,
     };
 
-    let (render_pass, error) = gfx_select!(command_encoder => instance.command_encoder_create_render_pass_dyn(*command_encoder, &descriptor));
+    let (render_pass, error) =
+        instance.command_encoder_create_render_pass_dyn(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::render_pass::WebGpuRenderPass(RefCell::new(
@@ -262,7 +263,8 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
         timestamp_writes: timestamp_writes.as_ref(),
     };
 
-    let (compute_pass, error) = gfx_select!(command_encoder => instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor));
+    let (compute_pass, error) =
+        instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::compute_pass::WebGpuComputePass(RefCell::new(
@@ -297,13 +299,13 @@ pub fn op_webgpu_command_encoder_copy_buffer_to_buffer(
         .get::<super::buffer::WebGpuBuffer>(destination)?;
     let destination_buffer = destination_buffer_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_copy_buffer_to_buffer(
-      command_encoder,
-      source_buffer,
-      source_offset,
-      destination_buffer,
-      destination_offset,
-      size
+    gfx_ok!(instance.command_encoder_copy_buffer_to_buffer(
+        command_encoder,
+        source_buffer,
+        source_offset,
+        destination_buffer,
+        destination_offset,
+        size
     ))
 }
 
@@ -360,11 +362,11 @@ pub fn op_webgpu_command_encoder_copy_buffer_to_texture(
         origin: destination.origin,
         aspect: destination.aspect,
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_buffer_to_texture(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_buffer_to_texture(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -403,11 +405,11 @@ pub fn op_webgpu_command_encoder_copy_texture_to_buffer(
             rows_per_image: destination.rows_per_image,
         },
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_texture_to_buffer(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_texture_to_buffer(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -444,11 +446,11 @@ pub fn op_webgpu_command_encoder_copy_texture_to_texture(
         origin: destination.origin,
         aspect: destination.aspect,
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_texture_to_texture(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_texture_to_texture(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -470,11 +472,11 @@ pub fn op_webgpu_command_encoder_clear_buffer(
         .resource_table
         .get::<super::buffer::WebGpuBuffer>(buffer_rid)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_clear_buffer(
-      command_encoder,
-      destination_resource.1,
-      offset,
-      Some(size)
+    gfx_ok!(instance.command_encoder_clear_buffer(
+        command_encoder,
+        destination_resource.1,
+        offset,
+        Some(size)
     ))
 }
 
@@ -491,7 +493,7 @@ pub fn op_webgpu_command_encoder_push_debug_group(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_push_debug_group(command_encoder, group_label))
+    gfx_ok!(instance.command_encoder_push_debug_group(command_encoder, group_label))
 }
 
 #[op2]
@@ -506,7 +508,7 @@ pub fn op_webgpu_command_encoder_pop_debug_group(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_pop_debug_group(command_encoder))
+    gfx_ok!(instance.command_encoder_pop_debug_group(command_encoder))
 }
 
 #[op2]
@@ -522,10 +524,7 @@ pub fn op_webgpu_command_encoder_insert_debug_marker(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_insert_debug_marker(
-      command_encoder,
-      marker_label
-    ))
+    gfx_ok!(instance.command_encoder_insert_debug_marker(command_encoder, marker_label))
 }
 
 #[op2]
@@ -545,10 +544,10 @@ pub fn op_webgpu_command_encoder_write_timestamp(
         .resource_table
         .get::<super::WebGpuQuerySet>(query_set)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_write_timestamp(
-      command_encoder,
-      query_set_resource.1,
-      query_index
+    gfx_ok!(instance.command_encoder_write_timestamp(
+        command_encoder,
+        query_set_resource.1,
+        query_index
     ))
 }
 
@@ -575,13 +574,13 @@ pub fn op_webgpu_command_encoder_resolve_query_set(
         .resource_table
         .get::<super::buffer::WebGpuBuffer>(destination)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_resolve_query_set(
-      command_encoder,
-      query_set_resource.1,
-      first_query,
-      query_count,
-      destination_resource.1,
-      destination_offset
+    gfx_ok!(instance.command_encoder_resolve_query_set(
+        command_encoder,
+        query_set_resource.1,
+        first_query,
+        query_count,
+        destination_resource.1,
+        destination_offset
     ))
 }
 
@@ -600,10 +599,7 @@ pub fn op_webgpu_command_encoder_finish(
 
     let descriptor = wgpu_types::CommandBufferDescriptor { label: Some(label) };
 
-    let (val, maybe_err) = gfx_select!(command_encoder => instance.command_encoder_finish(
-      command_encoder,
-      &descriptor
-    ));
+    let (val, maybe_err) = instance.command_encoder_finish(command_encoder, &descriptor);
 
     let rid = state.resource_table.add(WebGpuCommandBuffer(
         instance.clone(),
diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index 86120be713..c2dfb240fa 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -23,32 +23,17 @@ pub const UNSTABLE_FEATURE_NAME: &str = "webgpu";
 
 #[macro_use]
 mod macros {
-    // TODO(#5124): remove this macro.
-    macro_rules! gfx_select {
-    ($id:expr => $p0:ident.$p1:tt.$method:ident $params:tt) => {
-      gfx_select!($id => {$p0.$p1}, $method $params)
-    };
-
-    ($id:expr => $p0:ident.$method:ident $params:tt) => {
-      gfx_select!($id => {$p0}, $method $params)
-    };
-
-    ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-        $($c)*.$method $params
-    };
-  }
-
     macro_rules! gfx_put {
-    ($id:expr => $global:ident.$method:ident( $($param:expr),* ) => $state:expr, $rc:expr) => {{
-      let (val, maybe_err) = gfx_select!($id => $global.$method($($param),*));
+    ($global:ident.$method:ident( $($param:expr),* ) => $state:expr, $rc:expr) => {{
+      let (val, maybe_err) = $global.$method($($param),*);
       let rid = $state.resource_table.add($rc($global.clone(), val));
       Ok(WebGpuResult::rid_err(rid, maybe_err))
     }};
   }
 
     macro_rules! gfx_ok {
-    ($id:expr => $global:ident.$method:ident( $($param:expr),* )) => {{
-      let maybe_err = gfx_select!($id => $global.$method($($param),*)).err();
+    ($global:ident.$method:ident( $($param:expr),* )) => {{
+      let maybe_err = $global.$method($($param),*).err();
       Ok(WebGpuResult::maybe_err(maybe_err))
     }};
   }
@@ -78,7 +63,7 @@ impl Resource for WebGpuAdapter {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.adapter_drop(self.1));
+        self.0.adapter_drop(self.1);
     }
 }
 
@@ -89,7 +74,7 @@ impl Resource for WebGpuDevice {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.device_drop(self.1));
+        self.0.device_drop(self.1);
     }
 }
 
@@ -100,7 +85,7 @@ impl Resource for WebGpuQuerySet {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.query_set_drop(self.1));
+        self.0.query_set_drop(self.1);
     }
 }
 
@@ -429,9 +414,9 @@ pub fn op_webgpu_request_adapter(
             })
         }
     };
-    let adapter_features = gfx_select!(adapter => instance.adapter_features(adapter))?;
+    let adapter_features = instance.adapter_features(adapter)?;
     let features = deserialize_features(&adapter_features);
-    let adapter_limits = gfx_select!(adapter => instance.adapter_limits(adapter))?;
+    let adapter_limits = instance.adapter_limits(adapter)?;
 
     let instance = instance.clone();
 
@@ -664,21 +649,24 @@ pub fn op_webgpu_request_device(
         memory_hints: wgpu_types::MemoryHints::default(),
     };
 
-    let (device, queue, maybe_err) = gfx_select!(adapter => instance.adapter_request_device(
-      adapter,
-      &descriptor,
-      std::env::var("DENO_WEBGPU_TRACE").ok().as_ref().map(std::path::Path::new),
-      None,
-      None
-    ));
+    let (device, queue, maybe_err) = instance.adapter_request_device(
+        adapter,
+        &descriptor,
+        std::env::var("DENO_WEBGPU_TRACE")
+            .ok()
+            .as_ref()
+            .map(std::path::Path::new),
+        None,
+        None,
+    );
     adapter_resource.close();
     if let Some(err) = maybe_err {
         return Err(DomExceptionOperationError::new(&err.to_string()).into());
     }
 
-    let device_features = gfx_select!(device => instance.device_features(device))?;
+    let device_features = instance.device_features(device)?;
     let features = deserialize_features(&device_features);
-    let limits = gfx_select!(device => instance.device_limits(device))?;
+    let limits = instance.device_limits(device)?;
 
     let instance = instance.clone();
     let instance2 = instance.clone();
@@ -717,7 +705,7 @@ pub fn op_webgpu_request_adapter_info(
     let adapter = adapter_resource.1;
     let instance = state.borrow::<Instance>();
 
-    let info = gfx_select!(adapter => instance.adapter_get_info(adapter))?;
+    let info = instance.adapter_get_info(adapter)?;
     adapter_resource.close();
 
     Ok(GPUAdapterInfo {
@@ -770,7 +758,7 @@ pub fn op_webgpu_create_query_set(
         count: args.count,
     };
 
-    gfx_put!(device => instance.device_create_query_set(
+    gfx_put!(instance.device_create_query_set(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index 86d530332f..0ab3c40262 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -24,7 +24,7 @@ impl Resource for WebGpuPipelineLayout {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.pipeline_layout_drop(self.1));
+        self.0.pipeline_layout_drop(self.1);
     }
 }
 
@@ -38,7 +38,7 @@ impl Resource for WebGpuComputePipeline {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.compute_pipeline_drop(self.1));
+        self.0.compute_pipeline_drop(self.1);
     }
 }
 
@@ -52,7 +52,7 @@ impl Resource for WebGpuRenderPipeline {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.render_pipeline_drop(self.1));
+        self.0.render_pipeline_drop(self.1);
     }
 }
 
@@ -116,12 +116,8 @@ pub fn op_webgpu_create_compute_pipeline(
         cache: None,
     };
 
-    let (compute_pipeline, maybe_err) = gfx_select!(device => instance.device_create_compute_pipeline(
-      device,
-      &descriptor,
-      None,
-      None,
-    ));
+    let (compute_pipeline, maybe_err) =
+        instance.device_create_compute_pipeline(device, &descriptor, None, None);
 
     let rid = state
         .resource_table
@@ -150,7 +146,8 @@ pub fn op_webgpu_compute_pipeline_get_bind_group_layout(
         .get::<WebGpuComputePipeline>(compute_pipeline_rid)?;
     let compute_pipeline = compute_pipeline_resource.1;
 
-    let (bind_group_layout, maybe_err) = gfx_select!(compute_pipeline => instance.compute_pipeline_get_bind_group_layout(compute_pipeline, index, None));
+    let (bind_group_layout, maybe_err) =
+        instance.compute_pipeline_get_bind_group_layout(compute_pipeline, index, None);
 
     let rid = state
         .resource_table
@@ -383,12 +380,8 @@ pub fn op_webgpu_create_render_pipeline(
         cache: None,
     };
 
-    let (render_pipeline, maybe_err) = gfx_select!(device => instance.device_create_render_pipeline(
-      device,
-      &descriptor,
-      None,
-      None,
-    ));
+    let (render_pipeline, maybe_err) =
+        instance.device_create_render_pipeline(device, &descriptor, None, None);
 
     let rid = state
         .resource_table
@@ -410,7 +403,8 @@ pub fn op_webgpu_render_pipeline_get_bind_group_layout(
         .get::<WebGpuRenderPipeline>(render_pipeline_rid)?;
     let render_pipeline = render_pipeline_resource.1;
 
-    let (bind_group_layout, maybe_err) = gfx_select!(render_pipeline => instance.render_pipeline_get_bind_group_layout(render_pipeline, index, None));
+    let (bind_group_layout, maybe_err) =
+        instance.render_pipeline_get_bind_group_layout(render_pipeline, index, None);
 
     let rid = state
         .resource_table
diff --git a/deno_webgpu/queue.rs b/deno_webgpu/queue.rs
index 2640134455..fdbf993f8c 100644
--- a/deno_webgpu/queue.rs
+++ b/deno_webgpu/queue.rs
@@ -20,7 +20,7 @@ impl Resource for WebGpuQueue {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.queue_drop(self.1));
+        self.0.queue_drop(self.1);
     }
 }
 
@@ -44,7 +44,7 @@ pub fn op_webgpu_queue_submit(
         })
         .collect::<Result<Vec<_>, AnyError>>()?;
 
-    let maybe_err = gfx_select!(queue => instance.queue_submit(queue, &ids)).err();
+    let maybe_err = instance.queue_submit(queue, &ids).err();
 
     for rid in command_buffers {
         let resource = state.resource_table.take::<WebGpuCommandBuffer>(rid)?;
@@ -95,13 +95,9 @@ pub fn op_webgpu_write_buffer(
         Some(size) => &buf[data_offset..(data_offset + size)],
         None => &buf[data_offset..],
     };
-    let maybe_err = gfx_select!(queue => instance.queue_write_buffer(
-      queue,
-      buffer,
-      buffer_offset,
-      data
-    ))
-    .err();
+    let maybe_err = instance
+        .queue_write_buffer(queue, buffer, buffer_offset, data)
+        .err();
 
     Ok(WebGpuResult::maybe_err(maybe_err))
 }
@@ -131,11 +127,5 @@ pub fn op_webgpu_write_texture(
     };
     let data_layout = data_layout.into();
 
-    gfx_ok!(queue => instance.queue_write_texture(
-      queue,
-      &destination,
-      buf,
-      &data_layout,
-      &size
-    ))
+    gfx_ok!(instance.queue_write_texture(queue, &destination, buf, &data_layout, &size))
 }
diff --git a/deno_webgpu/sampler.rs b/deno_webgpu/sampler.rs
index 822c4bda14..59b6f4e302 100644
--- a/deno_webgpu/sampler.rs
+++ b/deno_webgpu/sampler.rs
@@ -21,7 +21,7 @@ impl Resource for WebGpuSampler {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.sampler_drop(self.1));
+        self.0.sampler_drop(self.1);
     }
 }
 
@@ -71,7 +71,7 @@ pub fn op_webgpu_create_sampler(
         border_color: None, // native-only
     };
 
-    gfx_put!(device => instance.device_create_sampler(
+    gfx_put!(instance.device_create_sampler(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/shader.rs b/deno_webgpu/shader.rs
index 17cde43936..4c7a30b2bd 100644
--- a/deno_webgpu/shader.rs
+++ b/deno_webgpu/shader.rs
@@ -20,7 +20,7 @@ impl Resource for WebGpuShaderModule {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.shader_module_drop(self.1));
+        self.0.shader_module_drop(self.1);
     }
 }
 
@@ -45,7 +45,7 @@ pub fn op_webgpu_create_shader_module(
         shader_bound_checks: wgpu_types::ShaderBoundChecks::default(),
     };
 
-    gfx_put!(device => instance.device_create_shader_module(
+    gfx_put!(instance.device_create_shader_module(
     device,
     &descriptor,
     source,
diff --git a/deno_webgpu/surface.rs b/deno_webgpu/surface.rs
index 9d9ba0d573..b48dbd2c8b 100644
--- a/deno_webgpu/surface.rs
+++ b/deno_webgpu/surface.rs
@@ -63,7 +63,7 @@ pub fn op_webgpu_surface_configure(
         desired_maximum_frame_latency: 2,
     };
 
-    let err = gfx_select!(device => instance.surface_configure(surface, device, &conf));
+    let err = instance.surface_configure(surface, device, &conf);
 
     Ok(WebGpuResult::maybe_err(err))
 }
@@ -79,7 +79,7 @@ pub fn op_webgpu_surface_get_current_texture(
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
-    let output = gfx_select!(device => instance.surface_get_current_texture(surface, None))?;
+    let output = instance.surface_get_current_texture(surface, None)?;
 
     match output.status {
         SurfaceStatus::Good | SurfaceStatus::Suboptimal => {
diff --git a/deno_webgpu/texture.rs b/deno_webgpu/texture.rs
index 8acba24998..a432c7b627 100644
--- a/deno_webgpu/texture.rs
+++ b/deno_webgpu/texture.rs
@@ -24,7 +24,7 @@ impl Resource for WebGpuTexture {
     fn close(self: Rc<Self>) {
         if self.owned {
             let instance = &self.instance;
-            gfx_select!(self.id => instance.texture_drop(self.id));
+            instance.texture_drop(self.id);
         }
     }
 }
@@ -39,7 +39,7 @@ impl Resource for WebGpuTextureView {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.texture_view_drop(self.1)).unwrap();
+        self.0.texture_view_drop(self.1).unwrap();
     }
 }
 
@@ -80,11 +80,7 @@ pub fn op_webgpu_create_texture(
         view_formats: args.view_formats,
     };
 
-    let (val, maybe_err) = gfx_select!(device => instance.device_create_texture(
-      device,
-      &descriptor,
-      None
-    ));
+    let (val, maybe_err) = instance.device_create_texture(device, &descriptor, None);
 
     let rid = state.resource_table.add(WebGpuTexture {
         instance: instance.clone(),
@@ -125,9 +121,9 @@ pub fn op_webgpu_create_texture_view(
         range: args.range,
     };
 
-    gfx_put!(texture => instance.texture_create_view(
-    texture,
-    &descriptor,
-    None
-  ) => state, WebGpuTextureView)
+    gfx_put!(instance.texture_create_view(
+        texture,
+        &descriptor,
+        None
+    ) => state, WebGpuTextureView)
 }
diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs
index 8b6555369f..4726fe63a7 100644
--- a/player/src/bin/play.rs
+++ b/player/src/bin/play.rs
@@ -3,7 +3,7 @@
 #[cfg(not(target_arch = "wasm32"))]
 fn main() {
     use player::GlobalPlay as _;
-    use wgc::{device::trace, gfx_select};
+    use wgc::device::trace;
 
     use std::{
         fs,
@@ -78,17 +78,17 @@ fn main() {
                 )
                 .expect("Unable to find an adapter for selected backend");
 
-            let info = gfx_select!(adapter => global.adapter_get_info(adapter)).unwrap();
+            let info = global.adapter_get_info(adapter).unwrap();
             log::info!("Picked '{}'", info.name);
             let device_id = wgc::id::Id::zip(1, 0, backend);
             let queue_id = wgc::id::Id::zip(1, 0, backend);
-            let (_, _, error) = gfx_select!(adapter => global.adapter_request_device(
+            let (_, _, error) = global.adapter_request_device(
                 adapter,
                 &desc,
                 None,
                 Some(device_id),
-                Some(queue_id)
-            ));
+                Some(queue_id),
+            );
             if let Some(e) = error {
                 panic!("{:?}", e);
             }
@@ -100,14 +100,14 @@ fn main() {
     log::info!("Executing actions");
     #[cfg(not(feature = "winit"))]
     {
-        gfx_select!(device => global.device_start_capture(device));
+        global.device_start_capture(device);
 
         while let Some(action) = actions.pop() {
-            gfx_select!(device => global.process(device, queue, action, &dir, &mut command_buffer_id_manager));
+            global.process(device, queue, action, &dir, &mut command_buffer_id_manager);
         }
 
-        gfx_select!(device => global.device_stop_capture(device));
-        gfx_select!(device => global.device_poll(device, wgt::Maintain::wait())).unwrap();
+        global.device_stop_capture(device);
+        global.device_poll(device, wgt::Maintain::wait()).unwrap();
     }
     #[cfg(feature = "winit")]
     {
@@ -119,81 +119,92 @@ fn main() {
         let mut resize_config = None;
         let mut frame_count = 0;
         let mut done = false;
-        event_loop.run(move |event, target| {
-            target.set_control_flow(ControlFlow::Poll);
-
-            match event {
-                Event::WindowEvent { event, .. } => match event {
-                    WindowEvent::RedrawRequested if resize_config.is_none() => {
-
-                    match actions.pop() {
-                        Some(trace::Action::ConfigureSurface(_device_id, config)) => {
-                            log::info!("Configuring the surface");
-                            let current_size: (u32, u32) = window.inner_size().into();
-                            let size = (config.width, config.height);
-                            if current_size != size {
-                                let _ = window.request_inner_size(winit::dpi::PhysicalSize::new(
-                                    config.width,
-                                    config.height,
-                                ));
-                                resize_config = Some(config);
-                                target.exit();
-                            } else {
-                                let error = gfx_select!(device => global.surface_configure(surface, device, &config));
+        event_loop
+            .run(move |event, target| {
+                target.set_control_flow(ControlFlow::Poll);
+
+                match event {
+                    Event::WindowEvent { event, .. } => match event {
+                        WindowEvent::RedrawRequested if resize_config.is_none() => {
+                            match actions.pop() {
+                                Some(trace::Action::ConfigureSurface(_device_id, config)) => {
+                                    log::info!("Configuring the surface");
+                                    let current_size: (u32, u32) = window.inner_size().into();
+                                    let size = (config.width, config.height);
+                                    if current_size != size {
+                                        let _ = window.request_inner_size(
+                                            winit::dpi::PhysicalSize::new(
+                                                config.width,
+                                                config.height,
+                                            ),
+                                        );
+                                        resize_config = Some(config);
+                                        target.exit();
+                                    } else {
+                                        let error =
+                                            global.surface_configure(surface, device, &config);
+                                        if let Some(e) = error {
+                                            panic!("{:?}", e);
+                                        }
+                                    }
+                                }
+                                Some(trace::Action::Present(id)) => {
+                                    frame_count += 1;
+                                    log::debug!("Presenting frame {}", frame_count);
+                                    global.surface_present(id).unwrap();
+                                    target.exit();
+                                }
+                                Some(trace::Action::DiscardSurfaceTexture(id)) => {
+                                    log::debug!("Discarding frame {}", frame_count);
+                                    global.surface_texture_discard(id).unwrap();
+                                    target.exit();
+                                }
+                                Some(action) => {
+                                    global.process(
+                                        device,
+                                        queue,
+                                        action,
+                                        &dir,
+                                        &mut command_buffer_id_manager,
+                                    );
+                                }
+                                None => {
+                                    if !done {
+                                        println!("Finished the end at frame {}", frame_count);
+                                        done = true;
+                                    }
+                                    target.exit();
+                                }
+                            }
+                        }
+                        WindowEvent::Resized(_) => {
+                            if let Some(config) = resize_config.take() {
+                                let error = global.surface_configure(surface, device, &config);
                                 if let Some(e) = error {
                                     panic!("{:?}", e);
                                 }
                             }
                         }
-                        Some(trace::Action::Present(id)) => {
-                            frame_count += 1;
-                            log::debug!("Presenting frame {}", frame_count);
-                            gfx_select!(device => global.surface_present(id)).unwrap();
-                                target.exit();
-                        }
-                        Some(trace::Action::DiscardSurfaceTexture(id)) => {
-                            log::debug!("Discarding frame {}", frame_count);
-                            gfx_select!(device => global.surface_texture_discard(id)).unwrap();
-                                target.exit();
-                        }
-                        Some(action) => {
-                            gfx_select!(device => global.process(device, queue, action, &dir, &mut command_buffer_id_manager));
-                        }
-                        None => {
-                            if !done {
-                                println!("Finished the end at frame {}", frame_count);
-                                done = true;
-                            }
-                                target.exit();
+                        WindowEvent::KeyboardInput {
+                            event:
+                                KeyEvent {
+                                    logical_key: Key::Named(NamedKey::Escape),
+                                    state: ElementState::Pressed,
+                                    ..
+                                },
+                            ..
                         }
-                    }
+                        | WindowEvent::CloseRequested => target.exit(),
+                        _ => {}
                     },
-                    WindowEvent::Resized(_) => {
-                        if let Some(config) = resize_config.take() {
-                            let error = gfx_select!(device => global.surface_configure(surface, device, &config));
-                            if let Some(e) = error {
-                                panic!("{:?}", e);
-                            }
-                        }
-                    }
-                    WindowEvent::KeyboardInput {
-                        event: KeyEvent {
-                            logical_key: Key::Named(NamedKey::Escape),
-                            state: ElementState::Pressed,
-                            ..
-                        },
-                        ..
+                    Event::LoopExiting => {
+                        log::info!("Closing");
+                        global.device_poll(device, wgt::Maintain::wait()).unwrap();
                     }
-                    | WindowEvent::CloseRequested => target.exit(),
                     _ => {}
-                },
-                Event::LoopExiting => {
-                    log::info!("Closing");
-                    gfx_select!(device => global.device_poll(device, wgt::Maintain::wait())).unwrap();
                 }
-                _ => {}
-            }
-        }).unwrap();
+            })
+            .unwrap();
     }
 }
 
diff --git a/player/tests/test.rs b/player/tests/test.rs
index f16e7fa32b..ee8e2ecc0d 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -107,7 +107,7 @@ impl Test<'_> {
         let backend = adapter.backend();
         let device_id = wgc::id::Id::zip(test_num, 0, backend);
         let queue_id = wgc::id::Id::zip(test_num, 0, backend);
-        let (_, _, error) = wgc::gfx_select!(adapter => global.adapter_request_device(
+        let (_, _, error) = global.adapter_request_device(
             adapter,
             &wgt::DeviceDescriptor {
                 label: None,
@@ -117,8 +117,8 @@ impl Test<'_> {
             },
             None,
             Some(device_id),
-            Some(queue_id)
-        ));
+            Some(queue_id),
+        );
         if let Some(e) = error {
             panic!("{:?}", e);
         }
@@ -126,35 +126,47 @@ impl Test<'_> {
         let mut command_buffer_id_manager = wgc::identity::IdentityManager::new();
         println!("\t\t\tRunning...");
         for action in self.actions {
-            wgc::gfx_select!(device_id => global.process(device_id, queue_id, action, dir, &mut command_buffer_id_manager));
+            global.process(
+                device_id,
+                queue_id,
+                action,
+                dir,
+                &mut command_buffer_id_manager,
+            );
         }
         println!("\t\t\tMapping...");
         for expect in &self.expectations {
             let buffer = wgc::id::Id::zip(expect.buffer.index, expect.buffer.epoch, backend);
-            wgc::gfx_select!(device_id => global.buffer_map_async(
-                buffer,
-                expect.offset,
-                Some(expect.data.len() as u64),
-                wgc::resource::BufferMapOperation {
-                    host: wgc::device::HostMap::Read,
-                    callback: Some(wgc::resource::BufferMapCallback::from_rust(
-                        Box::new(map_callback)
-                    )),
-                }
-            ))
-            .unwrap();
+            global
+                .buffer_map_async(
+                    buffer,
+                    expect.offset,
+                    Some(expect.data.len() as u64),
+                    wgc::resource::BufferMapOperation {
+                        host: wgc::device::HostMap::Read,
+                        callback: Some(wgc::resource::BufferMapCallback::from_rust(Box::new(
+                            map_callback,
+                        ))),
+                    },
+                )
+                .unwrap();
         }
 
         println!("\t\t\tWaiting...");
-        wgc::gfx_select!(device_id => global.device_poll(device_id, wgt::Maintain::wait()))
+        global
+            .device_poll(device_id, wgt::Maintain::wait())
             .unwrap();
 
         for expect in self.expectations {
             println!("\t\t\tChecking {}", expect.name);
             let buffer = wgc::id::Id::zip(expect.buffer.index, expect.buffer.epoch, backend);
-            let (ptr, size) =
-                wgc::gfx_select!(device_id => global.buffer_get_mapped_range(buffer, expect.offset, Some(expect.data.len() as wgt::BufferAddress)))
-                    .unwrap();
+            let (ptr, size) = global
+                .buffer_get_mapped_range(
+                    buffer,
+                    expect.offset,
+                    Some(expect.data.len() as wgt::BufferAddress),
+                )
+                .unwrap();
             let contents = unsafe { slice::from_raw_parts(ptr.as_ptr(), size as usize) };
             let expected_data = match expect.data {
                 ExpectedData::Raw(vec) => vec,
@@ -231,11 +243,8 @@ impl Corpus {
                 };
 
                 println!("\tBackend {:?}", backend);
-                let supported_features =
-                    wgc::gfx_select!(adapter => global.adapter_features(adapter)).unwrap();
-                let downlevel_caps =
-                    wgc::gfx_select!(adapter => global.adapter_downlevel_capabilities(adapter))
-                        .unwrap();
+                let supported_features = global.adapter_features(adapter).unwrap();
+                let downlevel_caps = global.adapter_downlevel_capabilities(adapter).unwrap();
 
                 let test = Test::load(dir.join(test_path), adapter.backend());
                 if !supported_features.contains(test.features) {
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index cfdca16832..5cbb736301 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -10,10 +10,7 @@ of course `Debug`.
 [`id::BufferId`]: crate::id::BufferId
 
 Each `Id` contains not only an index for the resource it denotes but
-also a Backend indicating which `wgpu` backend it belongs to. You
-can use the [`gfx_select`] macro to dynamically dispatch on an id's
-backend to a function specialized at compile time for a specific
-backend. See that macro's documentation for details.
+also a Backend indicating which `wgpu` backend it belongs to.
 
 `Id`s also incorporate a generation number, for additional validation.
 
@@ -96,7 +93,6 @@ creation fails, the id supplied for that resource is marked to indicate
 as much, allowing subsequent operations using that id to be properly
 flagged as errors as well.
 
-[`gfx_select`]: crate::gfx_select
 [`process`]: crate::identity::IdentityManager::process
 [`Id<R>`]: crate::id::Id
 [wrapped in a mutex]: trait.IdentityHandler.html#impl-IdentityHandler%3CI%3E-for-Mutex%3CIdentityManager%3E
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 9b27d64a7b..ea7960fa57 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -153,169 +153,6 @@ If you are running this program on native and not in a browser and wish to work
 Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \
 platform supports.";
 
-// #[cfg] attributes in exported macros are interesting!
-//
-// The #[cfg] conditions in a macro's expansion are evaluated using the
-// configuration options (features, target architecture and os, etc.) in force
-// where the macro is *used*, not where it is *defined*. That is, if crate A
-// defines a macro like this:
-//
-//     #[macro_export]
-//     macro_rules! if_bleep {
-//         { } => {
-//             #[cfg(feature = "bleep")]
-//             bleep();
-//         }
-//     }
-//
-// and then crate B uses it like this:
-//
-//     fn f() {
-//         if_bleep! { }
-//     }
-//
-// then it is crate B's `"bleep"` feature, not crate A's, that determines
-// whether the macro expands to a function call or an empty statement. The
-// entire configuration predicate is evaluated in the use's context, not the
-// definition's.
-//
-// Since `wgpu-core` selects back ends using features, we need to make sure the
-// arms of the `gfx_select!` macro are pruned according to `wgpu-core`'s
-// features, not those of whatever crate happens to be using `gfx_select!`. This
-// means we can't use `#[cfg]` attributes in `gfx_select!`s definition itself.
-// Instead, for each backend, `gfx_select!` must use a macro whose definition is
-// selected by `#[cfg]` in `wgpu-core`. The configuration predicate is still
-// evaluated when the macro is used; we've just moved the `#[cfg]` into a macro
-// used by `wgpu-core` itself.
-
-/// Define an exported macro named `$public` that expands to an expression if
-/// the feature `$feature` is enabled, or to a panic otherwise.
-///
-/// This is used in the definition of `gfx_select!`, to dispatch the
-/// call to the appropriate backend, but panic if that backend was not
-/// compiled in.
-///
-/// For a call like this:
-///
-/// ```ignore
-/// define_backend_caller! { name, private, "feature" if cfg_condition }
-/// ```
-///
-/// define a macro `name`, used like this:
-///
-/// ```ignore
-/// name!(expr)
-/// ```
-///
-/// that expands to `expr` if `#[cfg(cfg_condition)]` is enabled, or a
-/// panic otherwise. The panic message complains that `"feature"` is
-/// not enabled.
-///
-/// Because of odd technical limitations on exporting macros expanded
-/// by other macros, you must supply both a public-facing name for the
-/// macro and a private name, `$private`, which is never used
-/// outside this macro. For details:
-/// <https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997>
-macro_rules! define_backend_caller {
-    { $public:ident, $private:ident, $feature:literal if $cfg:meta } => {
-        #[cfg($cfg)]
-        #[macro_export]
-        macro_rules! $private {
-            ( $call:expr ) => ( $call )
-        }
-
-        #[cfg(not($cfg))]
-        #[macro_export]
-        macro_rules! $private {
-            ( $call:expr ) => (
-                panic!("Identifier refers to disabled backend feature {:?}", $feature)
-            )
-        }
-
-        // See note about rust-lang#52234 above.
-        #[doc(hidden)] pub use $private as $public;
-    }
-}
-
-// Define a macro for each `gfx_select!` match arm. For example,
-//
-//     gfx_if_vulkan!(expr)
-//
-// expands to `expr` if the `"vulkan"` feature is enabled, or to a panic
-// otherwise.
-define_backend_caller! { gfx_if_vulkan, gfx_if_vulkan_hidden, "vulkan" if all(feature = "vulkan", not(target_arch = "wasm32")) }
-define_backend_caller! { gfx_if_metal, gfx_if_metal_hidden, "metal" if all(feature = "metal", any(target_os = "macos", target_os = "ios")) }
-define_backend_caller! { gfx_if_dx12, gfx_if_dx12_hidden, "dx12" if all(feature = "dx12", windows) }
-define_backend_caller! { gfx_if_gles, gfx_if_gles_hidden, "gles" if feature = "gles" }
-define_backend_caller! { gfx_if_empty, gfx_if_empty_hidden, "empty" if all(
-    not(any(feature = "metal", feature = "vulkan", feature = "gles")),
-    any(target_os = "macos", target_os = "ios"),
-) }
-
-/// Dispatch on an [`Id`]'s backend to a backend-generic method.
-///
-/// Uses of this macro have the form:
-///
-/// ```ignore
-///
-///     gfx_select!(id => value.method(args...))
-///
-/// ```
-///
-/// This expands to an expression that calls `value.method::<A>(args...)` for
-/// the backend `A` selected by `id`. The expansion matches on `id.backend()`,
-/// with an arm for each backend type in [`wgpu_types::Backend`] which calls the
-/// specialization of `method` for the given backend. This allows resource
-/// identifiers to select backends dynamically, even though many `wgpu_core`
-/// methods are compiled and optimized for a specific back end.
-///
-/// This macro is typically used to call methods on [`wgpu_core::global::Global`],
-/// many of which take a single `hal::Api` type parameter. For example, to
-/// create a new buffer on the device indicated by `device_id`, one would say:
-///
-/// ```ignore
-/// gfx_select!(device_id => global.device_create_buffer(device_id, ...))
-/// ```
-///
-/// where the `device_create_buffer` method is defined like this:
-///
-/// ```ignore
-/// impl Global {
-///    pub fn device_create_buffer<A: HalApi>(&self, ...) -> ...
-///    { ... }
-/// }
-/// ```
-///
-/// That `gfx_select!` call uses `device_id`'s backend to select the right
-/// backend type `A` for a call to `Global::device_create_buffer<A>`.
-///
-/// However, there's nothing about this macro that is specific to `hub::Global`.
-/// For example, Firefox's embedding of `wgpu_core` defines its own types with
-/// methods that take `hal::Api` type parameters. Firefox uses `gfx_select!` to
-/// dynamically dispatch to the right specialization based on the resource's id.
-///
-/// [`wgpu_types::Backend`]: wgt::Backend
-/// [`wgpu_core::global::Global`]: crate::global::Global
-/// [`Id`]: id::Id
-//
-// TODO(#5124): Remove this altogether.
-#[macro_export]
-macro_rules! gfx_select {
-    // Simple two-component expression, like `self.0.method(..)`.
-    ($id:expr => $c0:ident.$c1:tt.$method:ident $params:tt) => {
-        $crate::gfx_select!($id => {$c0.$c1}, $method $params)
-    };
-
-    // Simple identifier-only expression, like `global.method(..)`.
-    ($id:expr => $c0:ident.$method:ident $params:tt) => {
-        $crate::gfx_select!($id => {$c0}, $method $params)
-    };
-
-    ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-        $($c)*.$method $params
-    };
-}
-
 #[cfg(feature = "api_log_info")]
 macro_rules! api_log {
     ($($arg:tt)+) => (log::info!($($arg)+))
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 08acd37595..30a8743fb2 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -25,8 +25,8 @@ use std::{
     sync::Arc,
 };
 use wgc::{
-    command::bundle_ffi::*, device::DeviceLostClosure, gfx_select, id::CommandEncoderId,
-    id::TextureViewId, pipeline::CreateShaderModuleError,
+    command::bundle_ffi::*, device::DeviceLostClosure, id::CommandEncoderId, id::TextureViewId,
+    pipeline::CreateShaderModuleError,
 };
 use wgt::WasmNotSendSync;
 
@@ -646,13 +646,13 @@ impl crate::Context for ContextWgpuCore {
         if trace_dir.is_some() {
             log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
         }
-        let (device_id, queue_id, error) = wgc::gfx_select!(*adapter => self.0.adapter_request_device(
+        let (device_id, queue_id, error) = self.0.adapter_request_device(
             *adapter,
             &desc.map_label(|l| l.map(Borrowed)),
             None,
             None,
-            None
-        ));
+            None,
+        );
         if let Some(err) = error {
             return ready(Err(err.into()));
         }
@@ -683,7 +683,7 @@ impl crate::Context for ContextWgpuCore {
         surface: &Self::SurfaceId,
         _surface_data: &Self::SurfaceData,
     ) -> bool {
-        match wgc::gfx_select!(adapter => self.0.adapter_is_surface_supported(*adapter, *surface)) {
+        match self.0.adapter_is_surface_supported(*adapter, *surface) {
             Ok(result) => result,
             Err(err) => self.handle_error_fatal(err, "Adapter::is_surface_supported"),
         }
@@ -694,7 +694,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> Features {
-        match wgc::gfx_select!(*adapter => self.0.adapter_features(*adapter)) {
+        match self.0.adapter_features(*adapter) {
             Ok(features) => features,
             Err(err) => self.handle_error_fatal(err, "Adapter::features"),
         }
@@ -705,7 +705,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> Limits {
-        match wgc::gfx_select!(*adapter => self.0.adapter_limits(*adapter)) {
+        match self.0.adapter_limits(*adapter) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Adapter::limits"),
         }
@@ -716,7 +716,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> DownlevelCapabilities {
-        match wgc::gfx_select!(*adapter => self.0.adapter_downlevel_capabilities(*adapter)) {
+        match self.0.adapter_downlevel_capabilities(*adapter) {
             Ok(downlevel) => downlevel,
             Err(err) => self.handle_error_fatal(err, "Adapter::downlevel_properties"),
         }
@@ -727,7 +727,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &wgc::id::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> AdapterInfo {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_info(*adapter)) {
+        match self.0.adapter_get_info(*adapter) {
             Ok(info) => info,
             Err(err) => self.handle_error_fatal(err, "Adapter::get_info"),
         }
@@ -739,8 +739,7 @@ impl crate::Context for ContextWgpuCore {
         _adapter_data: &Self::AdapterData,
         format: wgt::TextureFormat,
     ) -> wgt::TextureFormatFeatures {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_texture_format_features(*adapter, format))
-        {
+        match self.0.adapter_get_texture_format_features(*adapter, format) {
             Ok(info) => info,
             Err(err) => self.handle_error_fatal(err, "Adapter::get_texture_format_features"),
         }
@@ -751,7 +750,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> wgt::PresentationTimestamp {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_presentation_timestamp(*adapter)) {
+        match self.0.adapter_get_presentation_timestamp(*adapter) {
             Ok(timestamp) => timestamp,
             Err(err) => self.handle_error_fatal(err, "Adapter::correlate_presentation_timestamp"),
         }
@@ -764,7 +763,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> wgt::SurfaceCapabilities {
-        match wgc::gfx_select!(adapter => self.0.surface_get_capabilities(*surface, *adapter)) {
+        match self.0.surface_get_capabilities(*surface, *adapter) {
             Ok(caps) => caps,
             Err(wgc::instance::GetSurfaceSupportError::Unsupported) => {
                 wgt::SurfaceCapabilities::default()
@@ -781,7 +780,7 @@ impl crate::Context for ContextWgpuCore {
         _device_data: &Self::DeviceData,
         config: &crate::SurfaceConfiguration,
     ) {
-        let error = wgc::gfx_select!(device => self.0.surface_configure(*surface, *device, config));
+        let error = self.0.surface_configure(*surface, *device, config);
         if let Some(e) = error {
             self.handle_error_fatal(e, "Surface::configure");
         } else {
@@ -843,14 +842,14 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> Features {
-        match wgc::gfx_select!(device => self.0.device_features(*device)) {
+        match self.0.device_features(*device) {
             Ok(features) => features,
             Err(err) => self.handle_error_fatal(err, "Device::features"),
         }
     }
 
     fn device_limits(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) -> Limits {
-        match wgc::gfx_select!(device => self.0.device_limits(*device)) {
+        match self.0.device_limits(*device) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Device::limits"),
         }
@@ -861,7 +860,7 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> DownlevelCapabilities {
-        match wgc::gfx_select!(device => self.0.device_downlevel_properties(*device)) {
+        match self.0.device_downlevel_properties(*device) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Device::downlevel_properties"),
         }
@@ -913,9 +912,9 @@ impl crate::Context for ContextWgpuCore {
             ShaderSource::Naga(module) => wgc::pipeline::ShaderModuleSource::Naga(module),
             ShaderSource::Dummy(_) => panic!("found `ShaderSource::Dummy`"),
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_shader_module(*device, &descriptor, source, None)
-        );
+        let (id, error) = self
+            .0
+            .device_create_shader_module(*device, &descriptor, source, None);
         let compilation_info = match error {
             Some(cause) => {
                 self.handle_error(
@@ -944,9 +943,14 @@ impl crate::Context for ContextWgpuCore {
             // runtime checks
             shader_bound_checks: unsafe { wgt::ShaderBoundChecks::unchecked() },
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_shader_module_spirv(*device, &descriptor, Borrowed(&desc.source), None)
-        );
+        let (id, error) = unsafe {
+            self.0.device_create_shader_module_spirv(
+                *device,
+                &descriptor,
+                Borrowed(&desc.source),
+                None,
+            )
+        };
         let compilation_info = match error {
             Some(cause) => {
                 self.handle_error(
@@ -972,9 +976,9 @@ impl crate::Context for ContextWgpuCore {
             label: desc.label.map(Borrowed),
             entries: Borrowed(desc.entries),
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_bind_group_layout(*device, &descriptor, None)
-        );
+        let (id, error) = self
+            .0
+            .device_create_bind_group_layout(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1083,11 +1087,7 @@ impl crate::Context for ContextWgpuCore {
             entries: Borrowed(&entries),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_bind_group(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self.0.device_create_bind_group(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1124,11 +1124,9 @@ impl crate::Context for ContextWgpuCore {
             push_constant_ranges: Borrowed(desc.push_constant_ranges),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_layout(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self
+            .0
+            .device_create_pipeline_layout(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1191,12 +1189,9 @@ impl crate::Context for ContextWgpuCore {
             cache: desc.cache.map(|c| c.id.into()),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_render_pipeline(
-            *device,
-            &descriptor,
-            None,
-            None,
-        ));
+        let (id, error) = self
+            .0
+            .device_create_render_pipeline(*device, &descriptor, None, None);
         if let Some(cause) = error {
             if let wgc::pipeline::CreateRenderPipelineError::Internal { stage, ref error } = cause {
                 log::error!("Shader translation error for stage {:?}: {}", stage, error);
@@ -1233,12 +1228,9 @@ impl crate::Context for ContextWgpuCore {
             cache: desc.cache.map(|c| c.id.into()),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_compute_pipeline(
-            *device,
-            &descriptor,
-            None,
-            None,
-        ));
+        let (id, error) = self
+            .0
+            .device_create_compute_pipeline(*device, &descriptor, None, None);
         if let Some(cause) = error {
             if let wgc::pipeline::CreateComputePipelineError::Internal(ref error) = cause {
                 log::error!(
@@ -1271,11 +1263,10 @@ impl crate::Context for ContextWgpuCore {
             data: desc.data.map(Borrowed),
             fallback: desc.fallback,
         };
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = unsafe {
+            self.0
+                .device_create_pipeline_cache(*device, &descriptor, None)
+        };
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1293,11 +1284,9 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &crate::BufferDescriptor<'_>,
     ) -> (Self::BufferId, Self::BufferData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_buffer(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+        let (id, error) =
+            self.0
+                .device_create_buffer(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1320,11 +1309,7 @@ impl crate::Context for ContextWgpuCore {
         desc: &TextureDescriptor<'_>,
     ) -> (Self::TextureId, Self::TextureData) {
         let wgt_desc = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_texture(
-            *device,
-            &wgt_desc,
-            None
-        ));
+        let (id, error) = self.0.device_create_texture(*device, &wgt_desc, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1364,11 +1349,7 @@ impl crate::Context for ContextWgpuCore {
             border_color: desc.border_color,
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_sampler(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self.0.device_create_sampler(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1385,11 +1366,9 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &wgt::QuerySetDescriptor<Label<'_>>,
     ) -> (Self::QuerySetId, Self::QuerySetData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_query_set(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+        let (id, error) =
+            self.0
+                .device_create_query_set(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error_nolabel(&device_data.error_sink, cause, "Device::create_query_set");
         }
@@ -1401,11 +1380,11 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &CommandEncoderDescriptor<'_>,
     ) -> (Self::CommandEncoderId, Self::CommandEncoderData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_command_encoder(
+        let (id, error) = self.0.device_create_command_encoder(
             *device,
             &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+            None,
+        );
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1442,7 +1421,7 @@ impl crate::Context for ContextWgpuCore {
     }
     #[doc(hidden)]
     fn device_make_invalid(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_make_invalid(*device));
+        self.0.device_make_invalid(*device);
     }
     #[cfg_attr(not(any(native, Emscripten)), allow(unused))]
     fn device_drop(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
@@ -1450,13 +1429,13 @@ impl crate::Context for ContextWgpuCore {
         {
             // Call device_poll, but don't check for errors. We have to use its
             // return value, but we just drop it.
-            let _ = wgc::gfx_select!(device => self.0.device_poll(*device, wgt::Maintain::wait()));
-            wgc::gfx_select!(device => self.0.device_drop(*device));
+            let _ = self.0.device_poll(*device, wgt::Maintain::wait());
+            self.0.device_drop(*device);
         }
     }
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     fn queue_drop(&self, queue: &Self::QueueId, _device_data: &Self::QueueData) {
-        wgc::gfx_select!(queue => self.0.queue_drop(*queue));
+        self.0.queue_drop(*queue);
     }
     fn device_set_device_lost_callback(
         &self,
@@ -1465,10 +1444,11 @@ impl crate::Context for ContextWgpuCore {
         device_lost_callback: crate::context::DeviceLostCallback,
     ) {
         let device_lost_closure = DeviceLostClosure::from_rust(device_lost_callback);
-        wgc::gfx_select!(device => self.0.device_set_device_lost_closure(*device, device_lost_closure));
+        self.0
+            .device_set_device_lost_closure(*device, device_lost_closure);
     }
     fn device_destroy(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_destroy(*device));
+        self.0.device_destroy(*device);
     }
     fn device_mark_lost(
         &self,
@@ -1478,7 +1458,7 @@ impl crate::Context for ContextWgpuCore {
     ) {
         // We do not provide a reason to device_lose, because all reasons other than
         // destroyed (which this is not) are "unknown".
-        wgc::gfx_select!(device => self.0.device_mark_lost(*device, message));
+        self.0.device_mark_lost(*device, message);
     }
     fn device_poll(
         &self,
@@ -1487,10 +1467,7 @@ impl crate::Context for ContextWgpuCore {
         maintain: crate::Maintain,
     ) -> wgt::MaintainResult {
         let maintain_inner = maintain.map_index(|i| *i.0.as_ref().downcast_ref().unwrap());
-        match wgc::gfx_select!(device => self.0.device_poll(
-            *device,
-            maintain_inner
-        )) {
+        match self.0.device_poll(*device, maintain_inner) {
             Ok(done) => match done {
                 true => wgt::MaintainResult::SubmissionQueueEmpty,
                 false => wgt::MaintainResult::Ok,
@@ -1550,8 +1527,12 @@ impl crate::Context for ContextWgpuCore {
             ))),
         };
 
-        match wgc::gfx_select!(buffer => self.0.buffer_map_async(*buffer, range.start, Some(range.end-range.start), operation))
-        {
+        match self.0.buffer_map_async(
+            *buffer,
+            range.start,
+            Some(range.end - range.start),
+            operation,
+        ) {
             Ok(()) => (),
             Err(cause) => {
                 self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async")
@@ -1565,11 +1546,10 @@ impl crate::Context for ContextWgpuCore {
         sub_range: Range<wgt::BufferAddress>,
     ) -> Box<dyn crate::context::BufferMappedRange> {
         let size = sub_range.end - sub_range.start;
-        match wgc::gfx_select!(buffer => self.0.buffer_get_mapped_range(
-            *buffer,
-            sub_range.start,
-            Some(size)
-        )) {
+        match self
+            .0
+            .buffer_get_mapped_range(*buffer, sub_range.start, Some(size))
+        {
             Ok((ptr, size)) => Box::new(BufferMappedRange {
                 ptr,
                 size: size as usize,
@@ -1579,7 +1559,7 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn buffer_unmap(&self, buffer: &Self::BufferId, buffer_data: &Self::BufferData) {
-        match wgc::gfx_select!(buffer => self.0.buffer_unmap(*buffer)) {
+        match self.0.buffer_unmap(*buffer) {
             Ok(()) => (),
             Err(cause) => {
                 self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::buffer_unmap")
@@ -1613,9 +1593,7 @@ impl crate::Context for ContextWgpuCore {
                 array_layer_count: desc.array_layer_count,
             },
         };
-        let (id, error) = wgc::gfx_select!(
-            texture => self.0.texture_create_view(*texture, &descriptor, None)
-        );
+        let (id, error) = self.0.texture_create_view(*texture, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &texture_data.error_sink,
@@ -1632,25 +1610,25 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn adapter_drop(&self, adapter: &Self::AdapterId, _adapter_data: &Self::AdapterData) {
-        wgc::gfx_select!(*adapter => self.0.adapter_drop(*adapter))
+        self.0.adapter_drop(*adapter)
     }
 
     fn buffer_destroy(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
         // Per spec, no error to report. Even calling destroy multiple times is valid.
-        let _ = wgc::gfx_select!(buffer => self.0.buffer_destroy(*buffer));
+        let _ = self.0.buffer_destroy(*buffer);
     }
 
     fn buffer_drop(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
-        wgc::gfx_select!(buffer => self.0.buffer_drop(*buffer))
+        self.0.buffer_drop(*buffer)
     }
 
     fn texture_destroy(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
         // Per spec, no error to report. Even calling destroy multiple times is valid.
-        let _ = wgc::gfx_select!(texture => self.0.texture_destroy(*texture));
+        let _ = self.0.texture_destroy(*texture);
     }
 
     fn texture_drop(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
-        wgc::gfx_select!(texture => self.0.texture_drop(*texture))
+        self.0.texture_drop(*texture)
     }
 
     fn texture_view_drop(
@@ -1658,15 +1636,15 @@ impl crate::Context for ContextWgpuCore {
         texture_view: &Self::TextureViewId,
         __texture_view_data: &Self::TextureViewData,
     ) {
-        let _ = wgc::gfx_select!(*texture_view => self.0.texture_view_drop(*texture_view));
+        let _ = self.0.texture_view_drop(*texture_view);
     }
 
     fn sampler_drop(&self, sampler: &Self::SamplerId, _sampler_data: &Self::SamplerData) {
-        wgc::gfx_select!(*sampler => self.0.sampler_drop(*sampler))
+        self.0.sampler_drop(*sampler)
     }
 
     fn query_set_drop(&self, query_set: &Self::QuerySetId, _query_set_data: &Self::QuerySetData) {
-        wgc::gfx_select!(*query_set => self.0.query_set_drop(*query_set))
+        self.0.query_set_drop(*query_set)
     }
 
     fn bind_group_drop(
@@ -1674,7 +1652,7 @@ impl crate::Context for ContextWgpuCore {
         bind_group: &Self::BindGroupId,
         _bind_group_data: &Self::BindGroupData,
     ) {
-        wgc::gfx_select!(*bind_group => self.0.bind_group_drop(*bind_group))
+        self.0.bind_group_drop(*bind_group)
     }
 
     fn bind_group_layout_drop(
@@ -1682,7 +1660,7 @@ impl crate::Context for ContextWgpuCore {
         bind_group_layout: &Self::BindGroupLayoutId,
         _bind_group_layout_data: &Self::BindGroupLayoutData,
     ) {
-        wgc::gfx_select!(*bind_group_layout => self.0.bind_group_layout_drop(*bind_group_layout))
+        self.0.bind_group_layout_drop(*bind_group_layout)
     }
 
     fn pipeline_layout_drop(
@@ -1690,14 +1668,14 @@ impl crate::Context for ContextWgpuCore {
         pipeline_layout: &Self::PipelineLayoutId,
         _pipeline_layout_data: &Self::PipelineLayoutData,
     ) {
-        wgc::gfx_select!(*pipeline_layout => self.0.pipeline_layout_drop(*pipeline_layout))
+        self.0.pipeline_layout_drop(*pipeline_layout)
     }
     fn shader_module_drop(
         &self,
         shader_module: &Self::ShaderModuleId,
         _shader_module_data: &Self::ShaderModuleData,
     ) {
-        wgc::gfx_select!(*shader_module => self.0.shader_module_drop(*shader_module))
+        self.0.shader_module_drop(*shader_module)
     }
     fn command_encoder_drop(
         &self,
@@ -1705,7 +1683,7 @@ impl crate::Context for ContextWgpuCore {
         command_encoder_data: &Self::CommandEncoderData,
     ) {
         if command_encoder_data.open {
-            wgc::gfx_select!(command_encoder => self.0.command_encoder_drop(*command_encoder))
+            self.0.command_encoder_drop(*command_encoder)
         }
     }
 
@@ -1714,7 +1692,7 @@ impl crate::Context for ContextWgpuCore {
         command_buffer: &Self::CommandBufferId,
         _command_buffer_data: &Self::CommandBufferData,
     ) {
-        wgc::gfx_select!(*command_buffer => self.0.command_buffer_drop(*command_buffer))
+        self.0.command_buffer_drop(*command_buffer)
     }
 
     fn render_bundle_drop(
@@ -1722,7 +1700,7 @@ impl crate::Context for ContextWgpuCore {
         render_bundle: &Self::RenderBundleId,
         _render_bundle_data: &Self::RenderBundleData,
     ) {
-        wgc::gfx_select!(*render_bundle => self.0.render_bundle_drop(*render_bundle))
+        self.0.render_bundle_drop(*render_bundle)
     }
 
     fn compute_pipeline_drop(
@@ -1730,7 +1708,7 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::ComputePipelineId,
         _pipeline_data: &Self::ComputePipelineData,
     ) {
-        wgc::gfx_select!(*pipeline => self.0.compute_pipeline_drop(*pipeline))
+        self.0.compute_pipeline_drop(*pipeline)
     }
 
     fn render_pipeline_drop(
@@ -1738,7 +1716,7 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::RenderPipelineId,
         _pipeline_data: &Self::RenderPipelineData,
     ) {
-        wgc::gfx_select!(*pipeline => self.0.render_pipeline_drop(*pipeline))
+        self.0.render_pipeline_drop(*pipeline)
     }
 
     fn pipeline_cache_drop(
@@ -1746,7 +1724,7 @@ impl crate::Context for ContextWgpuCore {
         cache: &Self::PipelineCacheId,
         _cache_data: &Self::PipelineCacheData,
     ) {
-        wgc::gfx_select!(*cache => self.0.pipeline_cache_drop(*cache))
+        self.0.pipeline_cache_drop(*cache)
     }
 
     fn compute_pipeline_get_bind_group_layout(
@@ -1755,7 +1733,9 @@ impl crate::Context for ContextWgpuCore {
         _pipeline_data: &Self::ComputePipelineData,
         index: u32,
     ) -> (Self::BindGroupLayoutId, Self::BindGroupLayoutData) {
-        let (id, error) = wgc::gfx_select!(*pipeline => self.0.compute_pipeline_get_bind_group_layout(*pipeline, index, None));
+        let (id, error) = self
+            .0
+            .compute_pipeline_get_bind_group_layout(*pipeline, index, None);
         if let Some(err) = error {
             panic!("Error reflecting bind group {index}: {err}");
         }
@@ -1768,7 +1748,9 @@ impl crate::Context for ContextWgpuCore {
         _pipeline_data: &Self::RenderPipelineData,
         index: u32,
     ) -> (Self::BindGroupLayoutId, Self::BindGroupLayoutData) {
-        let (id, error) = wgc::gfx_select!(*pipeline => self.0.render_pipeline_get_bind_group_layout(*pipeline, index, None));
+        let (id, error) = self
+            .0
+            .render_pipeline_get_bind_group_layout(*pipeline, index, None);
         if let Some(err) = error {
             panic!("Error reflecting bind group {index}: {err}");
         }
@@ -1787,14 +1769,14 @@ impl crate::Context for ContextWgpuCore {
         destination_offset: wgt::BufferAddress,
         copy_size: wgt::BufferAddress,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_buffer_to_buffer(
+        if let Err(cause) = self.0.command_encoder_copy_buffer_to_buffer(
             *encoder,
             *source,
             source_offset,
             *destination,
             destination_offset,
-            copy_size
-        )) {
+            copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1811,12 +1793,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyTexture<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_buffer_to_texture(
+        if let Err(cause) = self.0.command_encoder_copy_buffer_to_texture(
             *encoder,
             &map_buffer_copy_view(source),
             &map_texture_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1833,12 +1815,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyBuffer<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_texture_to_buffer(
+        if let Err(cause) = self.0.command_encoder_copy_texture_to_buffer(
             *encoder,
             &map_texture_copy_view(source),
             &map_buffer_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1855,12 +1837,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyTexture<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_texture_to_texture(
+        if let Err(cause) = self.0.command_encoder_copy_texture_to_texture(
             *encoder,
             &map_texture_copy_view(source),
             &map_texture_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1884,10 +1866,13 @@ impl crate::Context for ContextWgpuCore {
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = gfx_select!(encoder => self.0.command_encoder_create_compute_pass_dyn(*encoder, &wgc::command::ComputePassDescriptor {
-            label: desc.label.map(Borrowed),
-            timestamp_writes: timestamp_writes.as_ref(),
-        }));
+        let (pass, err) = self.0.command_encoder_create_compute_pass_dyn(
+            *encoder,
+            &wgc::command::ComputePassDescriptor {
+                label: desc.label.map(Borrowed),
+                timestamp_writes: timestamp_writes.as_ref(),
+            },
+        );
 
         if let Some(cause) = err {
             self.handle_error(
@@ -1943,13 +1928,18 @@ impl crate::Context for ContextWgpuCore {
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = gfx_select!(encoder => self.0.command_encoder_create_render_pass_dyn(*encoder, &wgc::command::RenderPassDescriptor {
-            label: desc.label.map(Borrowed),
-            timestamp_writes: timestamp_writes.as_ref(),
-            color_attachments: std::borrow::Cow::Borrowed(&colors),
-            depth_stencil_attachment: depth_stencil.as_ref(),
-            occlusion_query_set: desc.occlusion_query_set.map(|query_set| query_set.id.into()),
-        }));
+        let (pass, err) = self.0.command_encoder_create_render_pass_dyn(
+            *encoder,
+            &wgc::command::RenderPassDescriptor {
+                label: desc.label.map(Borrowed),
+                timestamp_writes: timestamp_writes.as_ref(),
+                color_attachments: std::borrow::Cow::Borrowed(&colors),
+                depth_stencil_attachment: depth_stencil.as_ref(),
+                occlusion_query_set: desc
+                    .occlusion_query_set
+                    .map(|query_set| query_set.id.into()),
+            },
+        );
 
         if let Some(cause) = err {
             self.handle_error(
@@ -1976,8 +1966,7 @@ impl crate::Context for ContextWgpuCore {
     ) -> (Self::CommandBufferId, Self::CommandBufferData) {
         let descriptor = wgt::CommandBufferDescriptor::default();
         encoder_data.open = false; // prevent the drop
-        let (id, error) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_finish(encoder, &descriptor));
+        let (id, error) = self.0.command_encoder_finish(encoder, &descriptor);
         if let Some(cause) = error {
             self.handle_error_nolabel(&encoder_data.error_sink, cause, "a CommandEncoder");
         }
@@ -1991,11 +1980,10 @@ impl crate::Context for ContextWgpuCore {
         texture: &crate::Texture,
         subresource_range: &wgt::ImageSubresourceRange,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_clear_texture(
-            *encoder,
-            texture.id.into(),
-            subresource_range
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_clear_texture(*encoder, texture.id.into(), subresource_range)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2012,11 +2000,10 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferAddress>,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_clear_buffer(
-            *encoder,
-            buffer.id.into(),
-            offset, size
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_clear_buffer(*encoder, buffer.id.into(), offset, size)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2031,9 +2018,7 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         label: &str,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_insert_debug_marker(*encoder, label))
-        {
+        if let Err(cause) = self.0.command_encoder_insert_debug_marker(*encoder, label) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2048,9 +2033,7 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         label: &str,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_push_debug_group(*encoder, label))
-        {
+        if let Err(cause) = self.0.command_encoder_push_debug_group(*encoder, label) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2064,9 +2047,7 @@ impl crate::Context for ContextWgpuCore {
         encoder: &Self::CommandEncoderId,
         encoder_data: &Self::CommandEncoderData,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_pop_debug_group(*encoder))
-        {
+        if let Err(cause) = self.0.command_encoder_pop_debug_group(*encoder) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2083,11 +2064,10 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_write_timestamp(
-            *encoder,
-            *query_set,
-            query_index
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_write_timestamp(*encoder, *query_set, query_index)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2108,14 +2088,14 @@ impl crate::Context for ContextWgpuCore {
         _destination_data: &Self::BufferData,
         destination_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_resolve_query_set(
+        if let Err(cause) = self.0.command_encoder_resolve_query_set(
             *encoder,
             *query_set,
             first_query,
             query_count,
             *destination,
-            destination_offset
-        )) {
+            destination_offset,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2130,11 +2110,11 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: Self::RenderBundleEncoderData,
         desc: &crate::RenderBundleDescriptor<'_>,
     ) -> (Self::RenderBundleId, Self::RenderBundleData) {
-        let (id, error) = wgc::gfx_select!(encoder_data.parent() => self.0.render_bundle_encoder_finish(
+        let (id, error) = self.0.render_bundle_encoder_finish(
             encoder_data,
             &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+            None,
+        );
         if let Some(err) = error {
             self.handle_error_fatal(err, "RenderBundleEncoder::finish");
         }
@@ -2150,9 +2130,7 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         data: &[u8],
     ) {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_write_buffer(*queue, *buffer, offset, data)
-        ) {
+        match self.0.queue_write_buffer(*queue, *buffer, offset, data) {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer")
@@ -2169,9 +2147,10 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: wgt::BufferSize,
     ) -> Option<()> {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_validate_write_buffer(*queue, *buffer, offset, size)
-        ) {
+        match self
+            .0
+            .queue_validate_write_buffer(*queue, *buffer, offset, size)
+        {
             Ok(()) => Some(()),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer_with");
@@ -2186,9 +2165,7 @@ impl crate::Context for ContextWgpuCore {
         queue_data: &Self::QueueData,
         size: wgt::BufferSize,
     ) -> Option<Box<dyn crate::context::QueueWriteBuffer>> {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_create_staging_buffer(*queue, size, None)
-        ) {
+        match self.0.queue_create_staging_buffer(*queue, size, None) {
             Ok((buffer_id, ptr)) => Some(Box::new(QueueWriteBuffer {
                 buffer_id,
                 mapping: BufferMappedRange {
@@ -2216,9 +2193,10 @@ impl crate::Context for ContextWgpuCore {
             .as_any()
             .downcast_ref::<QueueWriteBuffer>()
             .unwrap();
-        match wgc::gfx_select!(
-            *queue => self.0.queue_write_staging_buffer(*queue, *buffer, offset, staging_buffer.buffer_id)
-        ) {
+        match self
+            .0
+            .queue_write_staging_buffer(*queue, *buffer, offset, staging_buffer.buffer_id)
+        {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer_with");
@@ -2235,13 +2213,13 @@ impl crate::Context for ContextWgpuCore {
         data_layout: wgt::ImageDataLayout,
         size: wgt::Extent3d,
     ) {
-        match wgc::gfx_select!(*queue => self.0.queue_write_texture(
+        match self.0.queue_write_texture(
             *queue,
             &map_texture_copy_view(texture),
             data,
             &data_layout,
-            &size
-        )) {
+            &size,
+        ) {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_texture")
@@ -2258,12 +2236,12 @@ impl crate::Context for ContextWgpuCore {
         dest: crate::ImageCopyTextureTagged<'_>,
         size: wgt::Extent3d,
     ) {
-        match wgc::gfx_select!(*queue => self.0.queue_copy_external_image_to_texture(
+        match self.0.queue_copy_external_image_to_texture(
             *queue,
             source,
             map_texture_tagged_copy_view(dest),
-            size
-        )) {
+            size,
+        ) {
             Ok(()) => (),
             Err(err) => self.handle_error_nolabel(
                 &queue_data.error_sink,
@@ -2283,14 +2261,13 @@ impl crate::Context for ContextWgpuCore {
             .map(|(i, _)| i)
             .collect::<SmallVec<[_; 4]>>();
 
-        let index = match wgc::gfx_select!(*queue => self.0.queue_submit(*queue, &temp_command_buffers))
-        {
+        let index = match self.0.queue_submit(*queue, &temp_command_buffers) {
             Ok(index) => index,
             Err(err) => self.handle_error_fatal(err, "Queue::submit"),
         };
 
         for cmdbuf in &temp_command_buffers {
-            wgc::gfx_select!(*queue => self.0.command_buffer_drop(*cmdbuf));
+            self.0.command_buffer_drop(*cmdbuf);
         }
 
         index
@@ -2301,9 +2278,7 @@ impl crate::Context for ContextWgpuCore {
         queue: &Self::QueueId,
         _queue_data: &Self::QueueData,
     ) -> f32 {
-        let res = wgc::gfx_select!(queue => self.0.queue_get_timestamp_period(
-            *queue
-        ));
+        let res = self.0.queue_get_timestamp_period(*queue);
         match res {
             Ok(v) => v,
             Err(cause) => {
@@ -2320,18 +2295,18 @@ impl crate::Context for ContextWgpuCore {
     ) {
         let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback);
 
-        let res = wgc::gfx_select!(queue => self.0.queue_on_submitted_work_done(*queue, closure));
+        let res = self.0.queue_on_submitted_work_done(*queue, closure);
         if let Err(cause) = res {
             self.handle_error_fatal(cause, "Queue::on_submitted_work_done");
         }
     }
 
     fn device_start_capture(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_start_capture(*device));
+        self.0.device_start_capture(*device);
     }
 
     fn device_stop_capture(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_stop_capture(*device));
+        self.0.device_stop_capture(*device);
     }
 
     fn device_get_internal_counters(
@@ -2339,7 +2314,7 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> wgt::InternalCounters {
-        wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
+        self.0.device_get_internal_counters(*device)
     }
 
     fn device_generate_allocator_report(
@@ -2347,7 +2322,7 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> Option<wgt::AllocatorReport> {
-        wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device))
+        self.0.device_generate_allocator_report(*device)
     }
 
     fn pipeline_cache_get_data(
@@ -2356,7 +2331,7 @@ impl crate::Context for ContextWgpuCore {
         // TODO: Used for error handling?
         _cache_data: &Self::PipelineCacheData,
     ) -> Option<Vec<u8>> {
-        wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache))
+        self.0.pipeline_cache_get_data(*cache)
     }
 
     fn compute_pass_set_pipeline(

From 0fb772b5dffd370f27920c52747a556372b6689f Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Tue, 6 Aug 2024 23:29:21 +0200
Subject: [PATCH 219/226] remove dyn render & compute pass

---
 deno_webgpu/command_encoder.rs            |   4 +-
 deno_webgpu/compute_pass.rs               |  81 ++--
 deno_webgpu/render_pass.rs                | 219 ++++++-----
 wgpu-core/src/command/compute.rs          |  15 +-
 wgpu-core/src/command/dyn_compute_pass.rs | 178 ---------
 wgpu-core/src/command/dyn_render_pass.rs  | 458 ----------------------
 wgpu-core/src/command/mod.rs              |   7 +-
 wgpu-core/src/command/render.rs           |  15 +-
 wgpu/src/backend/wgpu_core.rs             | 233 ++++++-----
 9 files changed, 312 insertions(+), 898 deletions(-)
 delete mode 100644 wgpu-core/src/command/dyn_compute_pass.rs
 delete mode 100644 wgpu-core/src/command/dyn_render_pass.rs

diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs
index 84537f3c0b..d7306a37a7 100644
--- a/deno_webgpu/command_encoder.rs
+++ b/deno_webgpu/command_encoder.rs
@@ -211,7 +211,7 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
     };
 
     let (render_pass, error) =
-        instance.command_encoder_create_render_pass_dyn(*command_encoder, &descriptor);
+        instance.command_encoder_create_render_pass(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::render_pass::WebGpuRenderPass(RefCell::new(
@@ -264,7 +264,7 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
     };
 
     let (compute_pass, error) =
-        instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor);
+        instance.command_encoder_create_compute_pass(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::compute_pass::WebGpuComputePass(RefCell::new(
diff --git a/deno_webgpu/compute_pass.rs b/deno_webgpu/compute_pass.rs
index 3b653ef349..e3e69860ab 100644
--- a/deno_webgpu/compute_pass.rs
+++ b/deno_webgpu/compute_pass.rs
@@ -10,9 +10,7 @@ use std::cell::RefCell;
 
 use super::error::WebGpuResult;
 
-pub(crate) struct WebGpuComputePass(
-    pub(crate) RefCell<Box<dyn wgpu_core::command::DynComputePass>>,
-);
+pub(crate) struct WebGpuComputePass(pub(crate) RefCell<wgpu_core::command::ComputePass>);
 impl Resource for WebGpuComputePass {
     fn name(&self) -> Cow<str> {
         "webGPUComputePass".into()
@@ -33,10 +31,12 @@ pub fn op_webgpu_compute_pass_set_pipeline(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .set_pipeline(state.borrow(), compute_pipeline_resource.1)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_set_pipeline(
+            &mut compute_pass_resource.0.borrow_mut(),
+            compute_pipeline_resource.1,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -54,10 +54,9 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .dispatch_workgroups(state.borrow(), x, y, z)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_dispatch_workgroups(&mut compute_pass_resource.0.borrow_mut(), x, y, z)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -77,10 +76,13 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups_indirect(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .dispatch_workgroups_indirect(state.borrow(), buffer_resource.1, indirect_offset)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_dispatch_workgroups_indirect(
+            &mut compute_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -95,7 +97,9 @@ pub fn op_webgpu_compute_pass_end(
         .resource_table
         .take::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().end(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_end(&mut compute_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -127,12 +131,14 @@ pub fn op_webgpu_compute_pass_set_bind_group(
 
     let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len];
 
-    compute_pass_resource.0.borrow_mut().set_bind_group(
-        state.borrow(),
-        index,
-        bind_group_resource.1,
-        dynamic_offsets_data,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_set_bind_group(
+            &mut compute_pass_resource.0.borrow_mut(),
+            index,
+            bind_group_resource.1,
+            dynamic_offsets_data,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -148,11 +154,13 @@ pub fn op_webgpu_compute_pass_push_debug_group(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().push_debug_group(
-        state.borrow(),
-        group_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_push_debug_group(
+            &mut compute_pass_resource.0.borrow_mut(),
+            group_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -167,10 +175,9 @@ pub fn op_webgpu_compute_pass_pop_debug_group(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .pop_debug_group(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_pop_debug_group(&mut compute_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -186,11 +193,13 @@ pub fn op_webgpu_compute_pass_insert_debug_marker(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().insert_debug_marker(
-        state.borrow(),
-        marker_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_insert_debug_marker(
+            &mut compute_pass_resource.0.borrow_mut(),
+            marker_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
diff --git a/deno_webgpu/render_pass.rs b/deno_webgpu/render_pass.rs
index 941245971c..2d4557cf03 100644
--- a/deno_webgpu/render_pass.rs
+++ b/deno_webgpu/render_pass.rs
@@ -12,7 +12,7 @@ use std::cell::RefCell;
 
 use super::error::WebGpuResult;
 
-pub(crate) struct WebGpuRenderPass(pub(crate) RefCell<Box<dyn wgpu_core::command::DynRenderPass>>);
+pub(crate) struct WebGpuRenderPass(pub(crate) RefCell<wgpu_core::command::RenderPass>);
 impl Resource for WebGpuRenderPass {
     fn name(&self) -> Cow<str> {
         "webGPURenderPass".into()
@@ -41,15 +41,17 @@ pub fn op_webgpu_render_pass_set_viewport(
         .resource_table
         .get::<WebGpuRenderPass>(args.render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().set_viewport(
-        state.borrow(),
-        args.x,
-        args.y,
-        args.width,
-        args.height,
-        args.min_depth,
-        args.max_depth,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_viewport(
+            &mut render_pass_resource.0.borrow_mut(),
+            args.x,
+            args.y,
+            args.width,
+            args.height,
+            args.min_depth,
+            args.max_depth,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -68,10 +70,15 @@ pub fn op_webgpu_render_pass_set_scissor_rect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_scissor_rect(state.borrow(), x, y, width, height)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_scissor_rect(
+            &mut render_pass_resource.0.borrow_mut(),
+            x,
+            y,
+            width,
+            height,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -87,10 +94,9 @@ pub fn op_webgpu_render_pass_set_blend_constant(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_blend_constant(state.borrow(), color)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_blend_constant(&mut render_pass_resource.0.borrow_mut(), color)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -106,10 +112,9 @@ pub fn op_webgpu_render_pass_set_stencil_reference(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_stencil_reference(state.borrow(), reference)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_stencil_reference(&mut render_pass_resource.0.borrow_mut(), reference)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -125,10 +130,9 @@ pub fn op_webgpu_render_pass_begin_occlusion_query(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .begin_occlusion_query(state.borrow(), query_index)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_begin_occlusion_query(&mut render_pass_resource.0.borrow_mut(), query_index)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -143,10 +147,9 @@ pub fn op_webgpu_render_pass_end_occlusion_query(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .end_occlusion_query(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_end_occlusion_query(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -172,10 +175,9 @@ pub fn op_webgpu_render_pass_execute_bundles(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .execute_bundles(state.borrow(), &bundles)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_execute_bundles(&mut render_pass_resource.0.borrow_mut(), &bundles)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -190,7 +192,9 @@ pub fn op_webgpu_render_pass_end(
         .resource_table
         .take::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().end(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_end(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -222,12 +226,14 @@ pub fn op_webgpu_render_pass_set_bind_group(
 
     let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len];
 
-    render_pass_resource.0.borrow_mut().set_bind_group(
-        state.borrow(),
-        index,
-        bind_group_resource.1,
-        dynamic_offsets_data,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_bind_group(
+            &mut render_pass_resource.0.borrow_mut(),
+            index,
+            bind_group_resource.1,
+            dynamic_offsets_data,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -243,11 +249,13 @@ pub fn op_webgpu_render_pass_push_debug_group(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().push_debug_group(
-        state.borrow(),
-        group_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_push_debug_group(
+            &mut render_pass_resource.0.borrow_mut(),
+            group_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -262,10 +270,9 @@ pub fn op_webgpu_render_pass_pop_debug_group(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .pop_debug_group(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_pop_debug_group(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -281,11 +288,13 @@ pub fn op_webgpu_render_pass_insert_debug_marker(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().insert_debug_marker(
-        state.borrow(),
-        marker_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_insert_debug_marker(
+            &mut render_pass_resource.0.borrow_mut(),
+            marker_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -304,10 +313,12 @@ pub fn op_webgpu_render_pass_set_pipeline(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_pipeline(state.borrow(), render_pipeline_resource.1)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_pipeline(
+            &mut render_pass_resource.0.borrow_mut(),
+            render_pipeline_resource.1,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -338,13 +349,15 @@ pub fn op_webgpu_render_pass_set_index_buffer(
         None
     };
 
-    render_pass_resource.0.borrow_mut().set_index_buffer(
-        state.borrow(),
-        buffer_resource.1,
-        index_format,
-        offset,
-        size,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_index_buffer(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            index_format,
+            offset,
+            size,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -375,13 +388,15 @@ pub fn op_webgpu_render_pass_set_vertex_buffer(
         None
     };
 
-    render_pass_resource.0.borrow_mut().set_vertex_buffer(
-        state.borrow(),
-        slot,
-        buffer_resource.1,
-        offset,
-        size,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_vertex_buffer(
+            &mut render_pass_resource.0.borrow_mut(),
+            slot,
+            buffer_resource.1,
+            offset,
+            size,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -400,13 +415,15 @@ pub fn op_webgpu_render_pass_draw(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw(
-        state.borrow(),
-        vertex_count,
-        instance_count,
-        first_vertex,
-        first_instance,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw(
+            &mut render_pass_resource.0.borrow_mut(),
+            vertex_count,
+            instance_count,
+            first_vertex,
+            first_instance,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -426,14 +443,16 @@ pub fn op_webgpu_render_pass_draw_indexed(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw_indexed(
-        state.borrow(),
-        index_count,
-        instance_count,
-        first_index,
-        base_vertex,
-        first_instance,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indexed(
+            &mut render_pass_resource.0.borrow_mut(),
+            index_count,
+            instance_count,
+            first_index,
+            base_vertex,
+            first_instance,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -453,11 +472,13 @@ pub fn op_webgpu_render_pass_draw_indirect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw_indirect(
-        state.borrow(),
-        buffer_resource.1,
-        indirect_offset,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indirect(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -477,11 +498,13 @@ pub fn op_webgpu_render_pass_draw_indexed_indirect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw_indexed_indirect(
-        state.borrow(),
-        buffer_resource.1,
-        indirect_offset,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indexed_indirect(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 93e7c15168..5f23fb7221 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -31,7 +31,7 @@ use wgt::{BufferAddress, DynamicOffset};
 use std::sync::Arc;
 use std::{fmt, mem, str};
 
-use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions, DynComputePass};
+use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions};
 
 pub struct ComputePass {
     /// All pass data & records is stored here.
@@ -328,19 +328,6 @@ impl Global {
         (ComputePass::new(Some(cmd_buf), arc_desc), None)
     }
 
-    /// Creates a type erased compute pass.
-    ///
-    /// If creation fails, an invalid pass is returned.
-    /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_compute_pass_dyn(
-        &self,
-        encoder_id: id::CommandEncoderId,
-        desc: &ComputePassDescriptor,
-    ) -> (Box<dyn DynComputePass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_compute_pass(encoder_id, desc);
-        (Box::new(pass), err)
-    }
-
     pub fn compute_pass_end(&self, pass: &mut ComputePass) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::Pass;
 
diff --git a/wgpu-core/src/command/dyn_compute_pass.rs b/wgpu-core/src/command/dyn_compute_pass.rs
deleted file mode 100644
index 273feaddf7..0000000000
--- a/wgpu-core/src/command/dyn_compute_pass.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-use wgt::WasmNotSendSync;
-
-use crate::{global, id};
-
-use super::{ComputePass, ComputePassError};
-
-/// Trait for type erasing ComputePass.
-// TODO(#5124): wgpu-core's ComputePass trait should not be hal type dependent.
-// Practically speaking this allows us merge gfx_select with type erasure:
-// The alternative would be to introduce ComputePassId which then first needs to be looked up and then dispatch via gfx_select.
-pub trait DynComputePass: std::fmt::Debug + WasmNotSendSync {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), ComputePassError>;
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::ComputePipelineId,
-    ) -> Result<(), ComputePassError>;
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), ComputePassError>;
-    fn dispatch_workgroups(
-        &mut self,
-        context: &global::Global,
-        groups_x: u32,
-        groups_y: u32,
-        groups_z: u32,
-    ) -> Result<(), ComputePassError>;
-    fn dispatch_workgroups_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), ComputePassError>;
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError>;
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), ComputePassError>;
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError>;
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError>;
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError>;
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), ComputePassError>;
-    fn end(&mut self, context: &global::Global) -> Result<(), ComputePassError>;
-
-    fn label(&self) -> Option<&str>;
-}
-
-impl DynComputePass for ComputePass {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_bind_group(self, index, bind_group_id, offsets)
-    }
-
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::ComputePipelineId,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_pipeline(self, pipeline_id)
-    }
-
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_push_constants(self, offset, data)
-    }
-
-    fn dispatch_workgroups(
-        &mut self,
-        context: &global::Global,
-        groups_x: u32,
-        groups_y: u32,
-        groups_z: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_dispatch_workgroups(self, groups_x, groups_y, groups_z)
-    }
-
-    fn dispatch_workgroups_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_dispatch_workgroups_indirect(self, buffer_id, offset)
-    }
-
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_push_debug_group(self, label, color)
-    }
-
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), ComputePassError> {
-        context.compute_pass_pop_debug_group(self)
-    }
-
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_insert_debug_marker(self, label, color)
-    }
-
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_write_timestamp(self, query_set_id, query_index)
-    }
-
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_begin_pipeline_statistics_query(self, query_set_id, query_index)
-    }
-
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_end_pipeline_statistics_query(self)
-    }
-
-    fn end(&mut self, context: &global::Global) -> Result<(), ComputePassError> {
-        context.compute_pass_end(self)
-    }
-
-    fn label(&self) -> Option<&str> {
-        self.label()
-    }
-}
diff --git a/wgpu-core/src/command/dyn_render_pass.rs b/wgpu-core/src/command/dyn_render_pass.rs
deleted file mode 100644
index d20ca09780..0000000000
--- a/wgpu-core/src/command/dyn_render_pass.rs
+++ /dev/null
@@ -1,458 +0,0 @@
-use wgt::WasmNotSendSync;
-
-use crate::{global, id};
-
-use super::{RenderPass, RenderPassError};
-
-/// Trait for type erasing RenderPass.
-// TODO(#5124): wgpu-core's RenderPass trait should not be hal type dependent.
-// Practically speaking this allows us merge gfx_select with type erasure:
-// The alternative would be to introduce RenderPassId which then first needs to be looked up and then dispatch via gfx_select.
-pub trait DynRenderPass: std::fmt::Debug + WasmNotSendSync {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), RenderPassError>;
-    fn set_index_buffer(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        index_format: wgt::IndexFormat,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError>;
-    fn set_vertex_buffer(
-        &mut self,
-        context: &global::Global,
-        slot: u32,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError>;
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::RenderPipelineId,
-    ) -> Result<(), RenderPassError>;
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        stages: wgt::ShaderStages,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), RenderPassError>;
-    fn draw(
-        &mut self,
-        context: &global::Global,
-        vertex_count: u32,
-        instance_count: u32,
-        first_vertex: u32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError>;
-    fn draw_indexed(
-        &mut self,
-        context: &global::Global,
-        index_count: u32,
-        instance_count: u32,
-        first_index: u32,
-        base_vertex: i32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError>;
-    fn draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError>;
-    fn draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indexed_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn set_blend_constant(
-        &mut self,
-        context: &global::Global,
-        color: wgt::Color,
-    ) -> Result<(), RenderPassError>;
-    fn set_scissor_rect(
-        &mut self,
-        context: &global::Global,
-        x: u32,
-        y: u32,
-        width: u32,
-        height: u32,
-    ) -> Result<(), RenderPassError>;
-    fn set_viewport(
-        &mut self,
-        context: &global::Global,
-        x: f32,
-        y: f32,
-        width: f32,
-        height: f32,
-        min_depth: f32,
-        max_depth: f32,
-    ) -> Result<(), RenderPassError>;
-    fn set_stencil_reference(
-        &mut self,
-        context: &global::Global,
-        reference: u32,
-    ) -> Result<(), RenderPassError>;
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError>;
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), RenderPassError>;
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError>;
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError>;
-    fn begin_occlusion_query(
-        &mut self,
-        context: &global::Global,
-        query_index: u32,
-    ) -> Result<(), RenderPassError>;
-    fn end_occlusion_query(&mut self, context: &global::Global) -> Result<(), RenderPassError>;
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError>;
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), RenderPassError>;
-    fn execute_bundles(
-        &mut self,
-        context: &global::Global,
-        bundles: &[id::RenderBundleId],
-    ) -> Result<(), RenderPassError>;
-    fn end(&mut self, context: &global::Global) -> Result<(), RenderPassError>;
-
-    fn label(&self) -> Option<&str>;
-}
-
-impl DynRenderPass for RenderPass {
-    fn set_index_buffer(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        index_format: wgt::IndexFormat,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_index_buffer(self, buffer_id, index_format, offset, size)
-    }
-
-    fn set_vertex_buffer(
-        &mut self,
-        context: &global::Global,
-        slot: u32,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_vertex_buffer(self, slot, buffer_id, offset, size)
-    }
-
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_bind_group(self, index, bind_group_id, offsets)
-    }
-
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::RenderPipelineId,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_pipeline(self, pipeline_id)
-    }
-
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        stages: wgt::ShaderStages,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_push_constants(self, stages, offset, data)
-    }
-
-    fn draw(
-        &mut self,
-        context: &global::Global,
-        vertex_count: u32,
-        instance_count: u32,
-        first_vertex: u32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw(
-            self,
-            vertex_count,
-            instance_count,
-            first_vertex,
-            first_instance,
-        )
-    }
-
-    fn draw_indexed(
-        &mut self,
-        context: &global::Global,
-        index_count: u32,
-        instance_count: u32,
-        first_index: u32,
-        base_vertex: i32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw_indexed(
-            self,
-            index_count,
-            instance_count,
-            first_index,
-            base_vertex,
-            first_instance,
-        )
-    }
-
-    fn draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw_indirect(self, buffer_id, offset)
-    }
-
-    fn draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw_indexed_indirect(self, buffer_id, offset)
-    }
-
-    fn multi_draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indirect(self, buffer_id, offset, count)
-    }
-
-    fn multi_draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indexed_indirect(self, buffer_id, offset, count)
-    }
-
-    fn multi_draw_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indirect_count(
-            self,
-            buffer_id,
-            offset,
-            count_buffer_id,
-            count_buffer_offset,
-            max_count,
-        )
-    }
-
-    fn multi_draw_indexed_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indexed_indirect_count(
-            self,
-            buffer_id,
-            offset,
-            count_buffer_id,
-            count_buffer_offset,
-            max_count,
-        )
-    }
-
-    fn set_blend_constant(
-        &mut self,
-        context: &global::Global,
-        color: wgt::Color,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_blend_constant(self, color)
-    }
-
-    fn set_scissor_rect(
-        &mut self,
-        context: &global::Global,
-        x: u32,
-        y: u32,
-        width: u32,
-        height: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_scissor_rect(self, x, y, width, height)
-    }
-
-    fn set_viewport(
-        &mut self,
-        context: &global::Global,
-        x: f32,
-        y: f32,
-        width: f32,
-        height: f32,
-        min_depth: f32,
-        max_depth: f32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_viewport(self, x, y, width, height, min_depth, max_depth)
-    }
-
-    fn set_stencil_reference(
-        &mut self,
-        context: &global::Global,
-        reference: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_stencil_reference(self, reference)
-    }
-
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_push_debug_group(self, label, color)
-    }
-
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), RenderPassError> {
-        context.render_pass_pop_debug_group(self)
-    }
-
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_insert_debug_marker(self, label, color)
-    }
-
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_write_timestamp(self, query_set_id, query_index)
-    }
-
-    fn begin_occlusion_query(
-        &mut self,
-        context: &global::Global,
-        query_index: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_begin_occlusion_query(self, query_index)
-    }
-
-    fn end_occlusion_query(&mut self, context: &global::Global) -> Result<(), RenderPassError> {
-        context.render_pass_end_occlusion_query(self)
-    }
-
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_begin_pipeline_statistics_query(self, query_set_id, query_index)
-    }
-
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_end_pipeline_statistics_query(self)
-    }
-
-    fn execute_bundles(
-        &mut self,
-        context: &global::Global,
-        bundles: &[id::RenderBundleId],
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_execute_bundles(self, bundles)
-    }
-
-    fn end(&mut self, context: &global::Global) -> Result<(), RenderPassError> {
-        context.render_pass_end(self)
-    }
-
-    fn label(&self) -> Option<&str> {
-        self.label()
-    }
-}
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index d2714087df..313bf813a1 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -5,8 +5,6 @@ mod clear;
 mod compute;
 mod compute_command;
 mod draw;
-mod dyn_compute_pass;
-mod dyn_render_pass;
 mod memory_init;
 mod query;
 mod render;
@@ -18,9 +16,8 @@ use std::sync::Arc;
 
 pub(crate) use self::clear::clear_texture;
 pub use self::{
-    bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*,
-    dyn_compute_pass::DynComputePass, dyn_render_pass::DynRenderPass, query::*, render::*,
-    render_command::RenderCommand, transfer::*,
+    bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*, query::*,
+    render::*, render_command::RenderCommand, transfer::*,
 };
 pub(crate) use allocator::CommandAllocator;
 
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 1128e60a54..1f11ba0937 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -52,7 +52,7 @@ use super::{
     memory_init::TextureSurfaceDiscard, CommandBufferTextureMemoryActions, CommandEncoder,
     QueryResetMap,
 };
-use super::{DrawKind, DynRenderPass, Rect};
+use super::{DrawKind, Rect};
 
 /// Operation to perform to the output attachment at the start of a renderpass.
 #[repr(C)]
@@ -1461,19 +1461,6 @@ impl Global {
         (RenderPass::new(Some(cmd_buf), arc_desc), err)
     }
 
-    /// Creates a type erased render pass.
-    ///
-    /// If creation fails, an invalid pass is returned.
-    /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_render_pass_dyn(
-        &self,
-        encoder_id: id::CommandEncoderId,
-        desc: &RenderPassDescriptor<'_>,
-    ) -> (Box<dyn DynRenderPass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_render_pass(encoder_id, desc);
-        (Box::new(pass), err)
-    }
-
     #[doc(hidden)]
     #[cfg(any(feature = "serde", feature = "replay"))]
     pub fn render_pass_end_with_unresolved_commands(
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 30a8743fb2..413524ab0d 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -489,13 +489,13 @@ impl Queue {
 
 #[derive(Debug)]
 pub struct ComputePass {
-    pass: Box<dyn wgc::command::DynComputePass>,
+    pass: wgc::command::ComputePass,
     error_sink: ErrorSink,
 }
 
 #[derive(Debug)]
 pub struct RenderPass {
-    pass: Box<dyn wgc::command::DynRenderPass>,
+    pass: wgc::command::RenderPass,
     error_sink: ErrorSink,
 }
 
@@ -1866,7 +1866,7 @@ impl crate::Context for ContextWgpuCore {
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = self.0.command_encoder_create_compute_pass_dyn(
+        let (pass, err) = self.0.command_encoder_create_compute_pass(
             *encoder,
             &wgc::command::ComputePassDescriptor {
                 label: desc.label.map(Borrowed),
@@ -1928,7 +1928,7 @@ impl crate::Context for ContextWgpuCore {
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = self.0.command_encoder_create_render_pass_dyn(
+        let (pass, err) = self.0.command_encoder_create_render_pass(
             *encoder,
             &wgc::command::RenderPassDescriptor {
                 label: desc.label.map(Borrowed),
@@ -2341,7 +2341,10 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::ComputePipelineId,
         _pipeline_data: &Self::ComputePipelineData,
     ) {
-        if let Err(cause) = pass_data.pass.set_pipeline(&self.0, *pipeline) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2360,9 +2363,9 @@ impl crate::Context for ContextWgpuCore {
         _bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_bind_group(&self.0, index, *bind_group, offsets)
+        if let Err(cause) =
+            self.0
+                .compute_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2380,7 +2383,10 @@ impl crate::Context for ContextWgpuCore {
         offset: u32,
         data: &[u8],
     ) {
-        if let Err(cause) = pass_data.pass.set_push_constants(&self.0, offset, data) {
+        if let Err(cause) =
+            self.0
+                .compute_pass_set_push_constants(&mut pass_data.pass, offset, data)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2396,7 +2402,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::ComputePassData,
         label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.insert_debug_marker(&self.0, label, 0) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_insert_debug_marker(&mut pass_data.pass, label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2412,7 +2421,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::ComputePassData,
         group_label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.push_debug_group(&self.0, group_label, 0) {
+        if let Err(cause) =
+            self.0
+                .compute_pass_push_debug_group(&mut pass_data.pass, group_label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2427,7 +2439,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.pop_debug_group(&self.0) {
+        if let Err(cause) = self.0.compute_pass_pop_debug_group(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2445,9 +2457,9 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .write_timestamp(&self.0, *query_set, query_index)
+        if let Err(cause) =
+            self.0
+                .compute_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2466,11 +2478,11 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .begin_pipeline_statistics_query(&self.0, *query_set, query_index)
-        {
+        if let Err(cause) = self.0.compute_pass_begin_pipeline_statistics_query(
+            &mut pass_data.pass,
+            *query_set,
+            query_index,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2485,7 +2497,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_pipeline_statistics_query(&self.0) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2503,7 +2518,10 @@ impl crate::Context for ContextWgpuCore {
         y: u32,
         z: u32,
     ) {
-        if let Err(cause) = pass_data.pass.dispatch_workgroups(&self.0, x, y, z) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_dispatch_workgroups(&mut pass_data.pass, x, y, z)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2521,11 +2539,11 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .dispatch_workgroups_indirect(&self.0, *indirect_buffer, indirect_offset)
-        {
+        if let Err(cause) = self.0.compute_pass_dispatch_workgroups_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2540,7 +2558,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.end(&self.0) {
+        if let Err(cause) = self.0.compute_pass_end(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2742,7 +2760,10 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::RenderPipelineId,
         _pipeline_data: &Self::RenderPipelineData,
     ) {
-        if let Err(cause) = pass_data.pass.set_pipeline(&self.0, *pipeline) {
+        if let Err(cause) = self
+            .0
+            .render_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2761,9 +2782,9 @@ impl crate::Context for ContextWgpuCore {
         _bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_bind_group(&self.0, index, *bind_group, offsets)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2784,11 +2805,13 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferSize>,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .set_index_buffer(&self.0, *buffer, index_format, offset, size)
-        {
+        if let Err(cause) = self.0.render_pass_set_index_buffer(
+            &mut pass_data.pass,
+            *buffer,
+            index_format,
+            offset,
+            size,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2808,9 +2831,9 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferSize>,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_vertex_buffer(&self.0, slot, *buffer, offset, size)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_vertex_buffer(&mut pass_data.pass, slot, *buffer, offset, size)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2829,9 +2852,9 @@ impl crate::Context for ContextWgpuCore {
         offset: u32,
         data: &[u8],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_push_constants(&self.0, stages, offset, data)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_push_constants(&mut pass_data.pass, stages, offset, data)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2849,8 +2872,8 @@ impl crate::Context for ContextWgpuCore {
         vertices: Range<u32>,
         instances: Range<u32>,
     ) {
-        if let Err(cause) = pass_data.pass.draw(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_draw(
+            &mut pass_data.pass,
             vertices.end - vertices.start,
             instances.end - instances.start,
             vertices.start,
@@ -2873,8 +2896,8 @@ impl crate::Context for ContextWgpuCore {
         base_vertex: i32,
         instances: Range<u32>,
     ) {
-        if let Err(cause) = pass_data.pass.draw_indexed(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_draw_indexed(
+            &mut pass_data.pass,
             indices.end - indices.start,
             instances.end - instances.start,
             indices.start,
@@ -2898,9 +2921,9 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .draw_indirect(&self.0, *indirect_buffer, indirect_offset)
+        if let Err(cause) =
+            self.0
+                .render_pass_draw_indirect(&mut pass_data.pass, *indirect_buffer, indirect_offset)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2919,11 +2942,11 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .draw_indexed_indirect(&self.0, *indirect_buffer, indirect_offset)
-        {
+        if let Err(cause) = self.0.render_pass_draw_indexed_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2942,11 +2965,12 @@ impl crate::Context for ContextWgpuCore {
         indirect_offset: wgt::BufferAddress,
         count: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .multi_draw_indirect(&self.0, *indirect_buffer, indirect_offset, count)
-        {
+        if let Err(cause) = self.0.render_pass_multi_draw_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+            count,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2965,8 +2989,8 @@ impl crate::Context for ContextWgpuCore {
         indirect_offset: wgt::BufferAddress,
         count: u32,
     ) {
-        if let Err(cause) = pass_data.pass.multi_draw_indexed_indirect(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_multi_draw_indexed_indirect(
+            &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             count,
@@ -2992,8 +3016,8 @@ impl crate::Context for ContextWgpuCore {
         count_buffer_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        if let Err(cause) = pass_data.pass.multi_draw_indirect_count(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_multi_draw_indirect_count(
+            &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             *count_buffer,
@@ -3021,8 +3045,8 @@ impl crate::Context for ContextWgpuCore {
         count_buffer_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        if let Err(cause) = pass_data.pass.multi_draw_indexed_indirect_count(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_multi_draw_indexed_indirect_count(
+            &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             *count_buffer,
@@ -3044,7 +3068,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         color: wgt::Color,
     ) {
-        if let Err(cause) = pass_data.pass.set_blend_constant(&self.0, color) {
+        if let Err(cause) = self
+            .0
+            .render_pass_set_blend_constant(&mut pass_data.pass, color)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3063,9 +3090,9 @@ impl crate::Context for ContextWgpuCore {
         width: u32,
         height: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_scissor_rect(&self.0, x, y, width, height)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_scissor_rect(&mut pass_data.pass, x, y, width, height)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -3087,10 +3114,15 @@ impl crate::Context for ContextWgpuCore {
         min_depth: f32,
         max_depth: f32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_viewport(&self.0, x, y, width, height, min_depth, max_depth)
-        {
+        if let Err(cause) = self.0.render_pass_set_viewport(
+            &mut pass_data.pass,
+            x,
+            y,
+            width,
+            height,
+            min_depth,
+            max_depth,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3106,7 +3138,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         reference: u32,
     ) {
-        if let Err(cause) = pass_data.pass.set_stencil_reference(&self.0, reference) {
+        if let Err(cause) = self
+            .0
+            .render_pass_set_stencil_reference(&mut pass_data.pass, reference)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3122,7 +3157,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.insert_debug_marker(&self.0, label, 0) {
+        if let Err(cause) = self
+            .0
+            .render_pass_insert_debug_marker(&mut pass_data.pass, label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3138,7 +3176,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         group_label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.push_debug_group(&self.0, group_label, 0) {
+        if let Err(cause) = self
+            .0
+            .render_pass_push_debug_group(&mut pass_data.pass, group_label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3153,7 +3194,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.pop_debug_group(&self.0) {
+        if let Err(cause) = self.0.render_pass_pop_debug_group(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3171,9 +3212,9 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .write_timestamp(&self.0, *query_set, query_index)
+        if let Err(cause) =
+            self.0
+                .render_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -3190,7 +3231,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data.pass.begin_occlusion_query(&self.0, query_index) {
+        if let Err(cause) = self
+            .0
+            .render_pass_begin_occlusion_query(&mut pass_data.pass, query_index)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3205,7 +3249,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_occlusion_query(&self.0) {
+        if let Err(cause) = self.0.render_pass_end_occlusion_query(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3223,11 +3267,11 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .begin_pipeline_statistics_query(&self.0, *query_set, query_index)
-        {
+        if let Err(cause) = self.0.render_pass_begin_pipeline_statistics_query(
+            &mut pass_data.pass,
+            *query_set,
+            query_index,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3242,7 +3286,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_pipeline_statistics_query(&self.0) {
+        if let Err(cause) = self
+            .0
+            .render_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3259,9 +3306,9 @@ impl crate::Context for ContextWgpuCore {
         render_bundles: &mut dyn Iterator<Item = (Self::RenderBundleId, &Self::RenderBundleData)>,
     ) {
         let temp_render_bundles = render_bundles.map(|(i, _)| i).collect::<SmallVec<[_; 4]>>();
-        if let Err(cause) = pass_data
-            .pass
-            .execute_bundles(&self.0, &temp_render_bundles)
+        if let Err(cause) = self
+            .0
+            .render_pass_execute_bundles(&mut pass_data.pass, &temp_render_bundles)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -3277,7 +3324,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.end(&self.0) {
+        if let Err(cause) = self.0.render_pass_end(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,

From ab17d29237443d4f6fdd49efe7cc15a0ccbf687a Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 10 Aug 2024 12:54:50 +0200
Subject: [PATCH 220/226] re-enable docs for wgpu-core

---
 wgpu-core/src/lib.rs | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index ea7960fa57..ccbe64d527 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -2,20 +2,6 @@
 //! It is designed for integration into browsers, as well as wrapping
 //! into other language-specific user-friendly libraries.
 //!
-#![cfg_attr(
-    not(any(not(doc), wgpu_core_doc)),
-    doc = r#"\
-## Documentation hidden
-
-As a workaround for [an issue in rustdoc](https://github.com/rust-lang/rust/issues/114891)
-that [affects `wgpu-core` documentation builds \
-severely](https://github.com/gfx-rs/wgpu/issues/4905),
-the documentation for `wgpu-core` is empty unless built with
-`RUSTFLAGS="--cfg wgpu_core_doc"`, which may take a very long time.
-"#
-)]
-#![cfg(any(not(doc), wgpu_core_doc))]
-//!
 //! ## Feature flags
 #![doc = document_features::document_features!()]
 //!

From 92ecafebac1a152173b066f9a8bea55e5f157386 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 10 Aug 2024 23:00:38 +0200
Subject: [PATCH 221/226] changelog entry

---
 CHANGELOG.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f0366ea2c..fb1bc4a0ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,22 @@ Bottom level categories:
 
 ## Unreleased
 
+### Major Changes
+
+#### `wgpu-core` is no longer generic over `wgpu-hal` backends
+Dynamic dispatch between different backends has been moved from the user facing `wgpu` crate,
+to a new dynamic dispatch mechanism inside the backend abstraction layer `wgpu-hal`.
+
+Whenever targeting more than a single backend (default on Windows & Linux) this leads to faster compile times and smaller binaries!
+This also solves a long standing issue with `cargo doc` failing to run for `wgpu-core`.
+
+Benchmarking indicated that compute pass recording is slower as a consequence,
+whereas on render passes speed improvements have been observed.
+However, this effort simplifies many of the internals of the wgpu family of crates
+which we're hoping to build performance improvements upon in the future.
+
+By @wumpf in [#6069](https://github.com/gfx-rs/wgpu/pull/6069), [#6099](https://github.com/gfx-rs/wgpu/pull/6099), [#6100](https://github.com/gfx-rs/wgpu/pull/6100).
+
 ### New Features
 
 #### Naga

From 9706f3c721452e33c8570352e0c6ff3950d4c3c9 Mon Sep 17 00:00:00 2001
From: Vecvec <vectorsofvectors@gmail.com>
Date: Thu, 15 Aug 2024 10:53:42 +1200
Subject: [PATCH 222/226] fix merge

---
 examples/src/ray_cube_compute/mod.rs  |   6 +-
 examples/src/ray_cube_fragment/mod.rs |   4 +-
 examples/src/ray_scene/mod.rs         |   4 +-
 player/src/lib.rs                     |  12 +-
 wgpu-core/src/binding_model.rs        |   2 +-
 wgpu-core/src/command/compute.rs      |  10 +-
 wgpu-core/src/command/mod.rs          |   8 +-
 wgpu-core/src/command/ray_tracing.rs  | 373 +++++++++-----------------
 wgpu-core/src/command/render.rs       |   1 -
 wgpu-core/src/device/global.rs        |   2 +-
 wgpu-core/src/device/life.rs          |  73 +++++
 wgpu-core/src/device/queue.rs         |   7 +-
 wgpu-core/src/device/ray_tracing.rs   | 148 +++++-----
 wgpu-core/src/device/resource.rs      |  16 +-
 wgpu-core/src/hub.rs                  |   4 +-
 wgpu-core/src/ray_tracing.rs          |  62 ++---
 wgpu-core/src/resource.rs             | 100 +++++--
 wgpu-core/src/track/mod.rs            |  29 +-
 wgpu-core/src/track/ray_tracing.rs    |  99 +++++++
 wgpu-core/src/track/stateless.rs      |  11 +
 wgpu/src/api/bind_group.rs            |   2 +
 wgpu/src/backend/wgpu_core.rs         |  24 +-
 22 files changed, 555 insertions(+), 442 deletions(-)
 create mode 100644 wgpu-core/src/track/ray_tracing.rs

diff --git a/examples/src/ray_cube_compute/mod.rs b/examples/src/ray_cube_compute/mod.rs
index b814bb8286..1cf2dc52de 100644
--- a/examples/src/ray_cube_compute/mod.rs
+++ b/examples/src/ray_cube_compute/mod.rs
@@ -395,7 +395,7 @@ impl crate::framework::Example for Example {
             label: Some("rt"),
             layout: None,
             module: &shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -426,13 +426,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &blit_shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &blit_shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.format.into())],
             }),
diff --git a/examples/src/ray_cube_fragment/mod.rs b/examples/src/ray_cube_fragment/mod.rs
index b42e6b94e1..854d0caa41 100644
--- a/examples/src/ray_cube_fragment/mod.rs
+++ b/examples/src/ray_cube_fragment/mod.rs
@@ -201,13 +201,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.format.into())],
             }),
diff --git a/examples/src/ray_scene/mod.rs b/examples/src/ray_scene/mod.rs
index d2c34d2da4..ef6a6bfb69 100644
--- a/examples/src/ray_scene/mod.rs
+++ b/examples/src/ray_scene/mod.rs
@@ -378,13 +378,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.format.into())],
             }),
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 2a6f84d91f..c340b828a2 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -453,22 +453,22 @@ impl GlobalPlay for wgc::global::Global {
                 self.queue_submit(queue, &[cmdbuf]).unwrap();
             }
             Action::CreateBlas { id, desc, sizes } => {
-                self.device_create_blas::<A>(device, &desc, sizes, Some(id));
+                self.device_create_blas(device, &desc, sizes, Some(id));
             }
             Action::FreeBlas(id) => {
-                self.blas_destroy::<A>(id).unwrap();
+                self.blas_destroy(id).unwrap();
             }
             Action::DestroyBlas(id) => {
-                self.blas_drop::<A>(id, true);
+                self.blas_drop(id);
             }
             Action::CreateTlas { id, desc } => {
-                self.device_create_tlas::<A>(device, &desc, Some(id));
+                self.device_create_tlas(device, &desc, Some(id));
             }
             Action::FreeTlas(id) => {
-                self.tlas_destroy::<A>(id).unwrap();
+                self.tlas_destroy(id).unwrap();
             }
             Action::DestroyTlas(id) => {
-                self.tlas_drop::<A>(id, true);
+                self.tlas_drop(id);
             }
         }
     }
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 5e2341b6d7..9fd344c48c 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -811,7 +811,7 @@ pub enum ResolvedBindingResource<'a> {
     SamplerArray(Cow<'a, [Arc<Sampler>]>),
     TextureView(Arc<TextureView>),
     TextureViewArray(Cow<'a, [Arc<TextureView>]>),
-    AccelerationStructure(Arc<Tlas<A>>),
+    AccelerationStructure(Arc<Tlas>),
 }
 
 #[derive(Clone, Debug, Error)]
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index c0e05093d9..2c1d62cbb7 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -28,13 +28,11 @@ use crate::{
 use thiserror::Error;
 use wgt::{BufferAddress, DynamicOffset};
 
-use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions, DynComputePass};
+use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions};
 use crate::ray_tracing::TlasAction;
 use std::sync::Arc;
 use std::{fmt, mem, str};
 
-use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions};
-
 pub struct ComputePass {
     /// All pass data & records is stored here.
     ///
@@ -216,7 +214,7 @@ struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
     tracker: &'cmd_buf mut Tracker,
     buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
     texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
-    tlas_actions: &'cmd_buf mut Vec<TlasAction<A>>,
+    tlas_actions: &'cmd_buf mut Vec<TlasAction>,
 
     temp_offsets: Vec<u32>,
     dynamic_offset_count: usize,
@@ -694,9 +692,9 @@ fn set_bind_group(
     let used_resource = bind_group
         .used
         .acceleration_structures
-        .used_resources()
+        .into_iter()
         .map(|tlas| TlasAction {
-            tlas,
+            tlas: tlas.clone(),
             kind: crate::ray_tracing::TlasActionKind::Use,
         });
 
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 4839b162b6..d31a41bd8a 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -245,8 +245,8 @@ pub(crate) struct BakedCommands {
     pub(crate) trackers: Tracker,
     buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
     texture_memory_actions: CommandBufferTextureMemoryActions,
-    blas_actions: Vec<BlasAction<A>>,
-    tlas_actions: Vec<TlasAction<A>>,
+    blas_actions: Vec<BlasAction>,
+    tlas_actions: Vec<TlasAction>,
 }
 
 /// The mutable state of a [`CommandBuffer`].
@@ -273,8 +273,8 @@ pub struct CommandBufferMutable {
     texture_memory_actions: CommandBufferTextureMemoryActions,
 
     pub(crate) pending_query_resets: QueryResetMap,
-    blas_actions: Vec<BlasAction<A>>,
-    tlas_actions: Vec<TlasAction<A>>,
+    blas_actions: Vec<BlasAction>,
+    tlas_actions: Vec<TlasAction>,
     #[cfg(feature = "trace")]
     pub(crate) commands: Option<Vec<TraceCommand>>,
 }
diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index 640853a632..b20592e60f 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -19,7 +19,7 @@ use wgt::{math::align_to, BufferAddress, BufferUsages};
 use super::{BakedCommands, CommandBufferMutable, CommandEncoderError};
 use crate::lock::rank;
 use crate::ray_tracing::BlasTriangleGeometry;
-use crate::resource::{Buffer, Labeled, StagingBuffer, Trackable};
+use crate::resource::{AccelerationStructure, Buffer, Labeled, ScratchBuffer, StagingBuffer, Trackable};
 use crate::snatch::SnatchGuard;
 use crate::storage::Storage;
 use crate::track::PendingTransition;
@@ -28,22 +28,22 @@ use std::ops::Deref;
 use std::sync::Arc;
 use std::{cmp::max, iter, num::NonZeroU64, ops::Range, ptr};
 
-type BufferStorage<'a, A> = Vec<(
-    Arc<Buffer<A>>,
+type BufferStorage<'a> = Vec<(
+    Arc<Buffer>,
     Option<PendingTransition<BufferUses>>,
-    Option<(Arc<Buffer<A>>, Option<PendingTransition<BufferUses>>)>,
-    Option<(Arc<Buffer<A>>, Option<PendingTransition<BufferUses>>)>,
+    Option<(Arc<Buffer>, Option<PendingTransition<BufferUses>>)>,
+    Option<(Arc<Buffer>, Option<PendingTransition<BufferUses>>)>,
     BlasTriangleGeometry<'a>,
-    Option<Arc<Blas<A>>>,
+    Option<Arc<Blas>>,
 )>;
 
-type BlasStorage<'a, A> = Vec<(Arc<Blas<A>>, hal::AccelerationStructureEntries<'a, A>, u64)>;
+type BlasStorage<'a> = Vec<(Arc<Blas>, hal::AccelerationStructureEntries<'a, dyn hal::DynBuffer>, u64)>;
 
 // This should be queried from the device, maybe the the hal api should pre aline it, since I am unsure how else we can idiomatically get this value.
 const SCRATCH_BUFFER_ALIGNMENT: u32 = 256;
 
 impl Global {
-    pub fn command_encoder_build_acceleration_structures_unsafe_tlas<'a, A: HalApi>(
+    pub fn command_encoder_build_acceleration_structures_unsafe_tlas<'a, >(
         &self,
         command_encoder_id: CommandEncoderId,
         blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
@@ -51,7 +51,7 @@ impl Global {
     ) -> Result<(), BuildAccelerationStructureError> {
         profiling::scope!("CommandEncoder::build_acceleration_structures_unsafe_tlas");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -147,7 +147,7 @@ impl Global {
         #[cfg(feature = "trace")]
         let tlas_iter = trace_tlas.iter();
 
-        let mut input_barriers = Vec::<hal::BufferBarrier<A>>::new();
+        let mut input_barriers = Vec::<hal::BufferBarrier<dyn hal::DynBuffer>>::new();
         let mut buf_storage = BufferStorage::new();
 
         let mut scratch_buffer_blas_size = 0;
@@ -176,9 +176,9 @@ impl Global {
         )?;
 
         let mut scratch_buffer_tlas_size = 0;
-        let mut tlas_storage = Vec::<(&Tlas<A>, hal::AccelerationStructureEntries<A>, u64)>::new();
+        let mut tlas_storage = Vec::<(&Tlas, hal::AccelerationStructureEntries<dyn hal::DynBuffer>, u64)>::new();
         let mut tlas_buf_storage = Vec::<(
-            Arc<Buffer<A>>,
+            Arc<Buffer>,
             Option<PendingTransition<BufferUses>>,
             TlasBuildEntry,
         )>::new();
@@ -221,13 +221,7 @@ impl Global {
             let tlas = tlas_guard
                 .get(entry.tlas_id)
                 .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
-            cmd_buf_data.trackers.tlas_s.insert_single(tlas.clone());
-
-            if tlas.raw.is_none() {
-                return Err(BuildAccelerationStructureError::InvalidTlas(
-                    tlas.error_ident(),
-                ));
-            }
+            cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
 
             cmd_buf_data.tlas_actions.push(TlasAction {
                 tlas: tlas.clone(),
@@ -246,7 +240,7 @@ impl Global {
             tlas_storage.push((
                 tlas,
                 hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances {
-                    buffer: Some(instance_buffer),
+                    buffer: Some(instance_buffer.as_ref()),
                     offset: 0,
                     count: entry.instance_count,
                 }),
@@ -254,31 +248,22 @@ impl Global {
             ));
         }
 
-        if max(scratch_buffer_blas_size, scratch_buffer_tlas_size) == 0 {
-            return Ok(());
-        }
-
-        let scratch_buffer = unsafe {
-            device
-                .raw()
-                .create_buffer(&hal::BufferDescriptor {
-                    label: Some("(wgpu) scratch buffer"),
-                    size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                    usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH | BufferUses::MAP_WRITE,
-                    memory_flags: hal::MemoryFlags::empty(),
-                })
-                .map_err(crate::device::DeviceError::from)?
+        let scratch_size = match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
+            None => return Ok(()),
+            Some(size) => size,
         };
 
-        let scratch_buffer_barrier = hal::BufferBarrier::<A> {
-            buffer: &scratch_buffer,
+        let scratch_buffer = ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
+
+        let scratch_buffer_barrier = hal::BufferBarrier::<dyn hal::DynBuffer> {
+            buffer: scratch_buffer.raw(),
             usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH
                 ..BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
         };
 
         let blas_descriptors = blas_storage
             .iter()
-            .map(|storage| map_blas(storage, &scratch_buffer));
+            .map(|storage| map_blas(storage, scratch_buffer.raw()));
 
         let tlas_descriptors =
             tlas_storage
@@ -292,8 +277,8 @@ impl Global {
                         mode: hal::AccelerationStructureBuildMode::Build,
                         flags: tlas.flags,
                         source_acceleration_structure: None,
-                        destination_acceleration_structure: tlas.raw.as_ref().unwrap(),
-                        scratch_buffer: &scratch_buffer,
+                        destination_acceleration_structure: tlas.raw(),
+                        scratch_buffer: scratch_buffer.raw(),
                         scratch_buffer_offset: *scratch_buffer_offset,
                     }
                 });
@@ -308,15 +293,14 @@ impl Global {
             blas_present,
             tlas_present,
             input_barriers,
-            blas_storage.len() as u32,
-            blas_descriptors,
+            &blas_descriptors.collect::<Vec<_>>(),
             scratch_buffer_barrier,
         );
 
         if tlas_present {
             unsafe {
                 cmd_buf_raw
-                    .build_acceleration_structures(tlas_storage.len() as u32, tlas_descriptors);
+                    .build_acceleration_structures(&tlas_descriptors.collect::<Vec<_>>());
 
                 cmd_buf_raw.place_acceleration_structure_barrier(
                     hal::AccelerationStructureBarrier {
@@ -327,31 +311,15 @@ impl Global {
             }
         }
 
-        let scratch_mapping = unsafe {
-            device
-                .raw()
-                .map_buffer(
-                    &scratch_buffer,
-                    0..max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                )
-                .map_err(crate::device::DeviceError::from)?
-        };
         device
             .pending_writes
             .lock()
-            .as_mut()
-            .unwrap()
-            .consume_temp(TempResource::StagingBuffer(StagingBuffer {
-                raw: Mutex::new(rank::BLAS, Some(scratch_buffer)),
-                device: device.clone(),
-                size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                is_coherent: scratch_mapping.is_coherent,
-            }));
+            .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
 
         Ok(())
     }
 
-    pub fn command_encoder_build_acceleration_structures<'a, A: HalApi>(
+    pub fn command_encoder_build_acceleration_structures<'a, >(
         &self,
         command_encoder_id: CommandEncoderId,
         blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
@@ -359,7 +327,7 @@ impl Global {
     ) -> Result<(), BuildAccelerationStructureError> {
         profiling::scope!("CommandEncoder::build_acceleration_structures");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -486,7 +454,7 @@ impl Global {
             }
         });
 
-        let mut input_barriers = Vec::<hal::BufferBarrier<A>>::new();
+        let mut input_barriers = Vec::<hal::BufferBarrier<dyn hal::DynBuffer>>::new();
         let mut buf_storage = BufferStorage::new();
 
         let mut scratch_buffer_blas_size = 0;
@@ -514,9 +482,9 @@ impl Global {
             &mut blas_storage,
         )?;
         let mut tlas_lock_store = Vec::<(
-            RwLockReadGuard<Option<A::Buffer>>,
+            &dyn hal::DynBuffer,
             Option<TlasPackage>,
-            Arc<Tlas<A>>,
+            Arc<Tlas>,
         )>::new();
 
         for package in tlas_iter {
@@ -524,14 +492,14 @@ impl Global {
                 .get(package.tlas_id)
                 .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
 
-            cmd_buf_data.trackers.tlas_s.insert_single(tlas.clone());
-            tlas_lock_store.push((tlas.instance_buffer.read(), Some(package), tlas.clone()))
+            cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
+            tlas_lock_store.push((tlas.instance_buffer.as_ref(), Some(package), tlas.clone()))
         }
 
         let mut scratch_buffer_tlas_size = 0;
         let mut tlas_storage = Vec::<(
-            &Tlas<A>,
-            hal::AccelerationStructureEntries<A>,
+            &Tlas,
+            hal::AccelerationStructureEntries<dyn hal::DynBuffer>,
             u64,
             Range<usize>,
         )>::new();
@@ -540,11 +508,6 @@ impl Global {
         for entry in &mut tlas_lock_store {
             let package = entry.1.take().unwrap();
             let tlas = &entry.2;
-            if tlas.raw.is_none() {
-                return Err(BuildAccelerationStructureError::InvalidTlas(
-                    tlas.error_ident(),
-                ));
-            }
 
             let scratch_buffer_offset = scratch_buffer_tlas_size;
             scratch_buffer_tlas_size += align_to(
@@ -568,10 +531,10 @@ impl Global {
                     .map_err(|_| BuildAccelerationStructureError::InvalidBlasIdForInstance)?
                     .clone();
 
-                cmd_buf_data.trackers.blas_s.insert_single(blas.clone());
+                cmd_buf_data.trackers.blas_s.set_single(blas.clone());
 
                 instance_buffer_staging_source
-                    .extend(tlas_instance_into_bytes::<A>(&instance, blas.handle));
+                    .extend(tlas_instance_into_bytes(&instance, blas.handle));
 
                 instance_count += 1;
 
@@ -602,7 +565,7 @@ impl Global {
             tlas_storage.push((
                 tlas,
                 hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances {
-                    buffer: Some(entry.0.as_ref().unwrap()),
+                    buffer: Some(entry.0),
                     offset: 0,
                     count: instance_count,
                 }),
@@ -611,106 +574,41 @@ impl Global {
             ));
         }
 
-        if max(scratch_buffer_blas_size, scratch_buffer_tlas_size) == 0 {
-            return Ok(());
-        }
-
-        let staging_buffer = if !instance_buffer_staging_source.is_empty() {
-            unsafe {
-                let staging_buffer = device
-                    .raw()
-                    .create_buffer(&hal::BufferDescriptor {
-                        label: Some("(wgpu) instance staging buffer"),
-                        size: instance_buffer_staging_source.len() as u64,
-                        usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
-                        memory_flags: hal::MemoryFlags::empty(),
-                    })
-                    .map_err(crate::device::DeviceError::from)?;
-                let mapping = device
-                    .raw()
-                    .map_buffer(
-                        &staging_buffer,
-                        0..instance_buffer_staging_source.len() as u64,
-                    )
-                    .map_err(crate::device::DeviceError::from)?;
-                ptr::copy_nonoverlapping(
-                    instance_buffer_staging_source.as_ptr(),
-                    mapping.ptr.as_ptr(),
-                    instance_buffer_staging_source.len(),
-                );
-                device
-                    .raw()
-                    .unmap_buffer(&staging_buffer)
-                    .map_err(crate::device::DeviceError::from)?;
-                assert!(mapping.is_coherent);
-                Some(StagingBuffer {
-                    raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(staging_buffer)),
-                    device: device.clone(),
-                    size: instance_buffer_staging_source.len() as u64,
-                    is_coherent: mapping.is_coherent,
-                })
-            }
-        } else {
-            None
+        let scratch_size = match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
+            // if the size is zero there is nothing to build
+            None => return Ok(()),
+            Some(size) => size,
         };
 
-        let scratch_buffer = unsafe {
-            device
-                .raw()
-                .create_buffer(&hal::BufferDescriptor {
-                    label: Some("(wgpu) scratch buffer"),
-                    size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                    usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH | BufferUses::MAP_WRITE,
-                    memory_flags: hal::MemoryFlags::empty(),
-                })
-                .map_err(crate::device::DeviceError::from)?
-        };
+        let scratch_buffer = ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
 
-        let scratch_buffer_barrier = hal::BufferBarrier::<A> {
-            buffer: &scratch_buffer,
+        let scratch_buffer_barrier = hal::BufferBarrier::<dyn hal::DynBuffer> {
+            buffer: scratch_buffer.raw(),
             usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH
                 ..BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
         };
 
         let blas_descriptors = blas_storage
             .iter()
-            .map(|storage| map_blas(storage, &scratch_buffer));
+            .map(|storage| map_blas(storage, scratch_buffer.raw()));
 
-        let tlas_descriptors = tlas_storage.iter().map(
-            |&(tlas, ref entries, ref scratch_buffer_offset, ref _range)| {
-                if tlas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
-                    log::info!("only rebuild implemented")
-                }
-                hal::BuildAccelerationStructureDescriptor {
-                    entries,
-                    mode: hal::AccelerationStructureBuildMode::Build,
-                    flags: tlas.flags,
-                    source_acceleration_structure: None,
-                    destination_acceleration_structure: tlas.raw.as_ref().unwrap(),
-                    scratch_buffer: &scratch_buffer,
-                    scratch_buffer_offset: *scratch_buffer_offset,
-                }
-            },
-        );
-
-        let mut lock_vec = Vec::<Option<RwLockReadGuard<Option<<A>::Buffer>>>>::new();
+        let mut tlas_descriptors = Vec::with_capacity(tlas_storage.len());
 
-        for tlas in &tlas_storage {
-            let size = (tlas.3.end - tlas.3.start) as u64;
-            lock_vec.push(if size == 0 {
-                None
-            } else {
-                Some(tlas.0.instance_buffer.read())
+        for &(tlas, ref entries, ref scratch_buffer_offset, ref range) in &tlas_storage {
+            if tlas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
+                log::info!("only rebuild implemented")
+            }
+            tlas_descriptors.push(hal::BuildAccelerationStructureDescriptor {
+                entries,
+                mode: hal::AccelerationStructureBuildMode::Build,
+                flags: tlas.flags,
+                source_acceleration_structure: None,
+                destination_acceleration_structure: tlas.raw.as_ref(),
+                scratch_buffer: scratch_buffer.raw(),
+                scratch_buffer_offset: *scratch_buffer_offset,
             })
         }
 
-        let instance_buffer_barriers = lock_vec.iter().filter_map(|lock| {
-            lock.as_ref().map(|lock| hal::BufferBarrier::<A> {
-                buffer: lock.as_ref().unwrap(),
-                usage: BufferUses::COPY_DST..BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT,
-            })
-        });
-
         let blas_present = !blas_storage.is_empty();
         let tlas_present = !tlas_storage.is_empty();
 
@@ -721,55 +619,69 @@ impl Global {
             blas_present,
             tlas_present,
             input_barriers,
-            blas_storage.len() as u32,
-            blas_descriptors,
+            &blas_descriptors.collect::<Vec<_>>(),
             scratch_buffer_barrier,
         );
 
         if tlas_present {
+            let staging_buffer = if !instance_buffer_staging_source.is_empty() {
+                unsafe {
+                    let mut staging_buffer = StagingBuffer::new(device, wgt::BufferSize::new(instance_buffer_staging_source.len() as u64).unwrap()).map_err(crate::device::DeviceError::from)?;
+                    staging_buffer.write(&instance_buffer_staging_source);
+                    let flushed = staging_buffer.flush();
+                    Some(flushed)
+                }
+            } else {
+                None
+            };
+
             unsafe {
                 if let Some(ref staging_buffer) = staging_buffer {
-                    cmd_buf_raw.transition_buffers(iter::once(hal::BufferBarrier::<A> {
-                        buffer: staging_buffer.raw.lock().as_ref().unwrap(),
+                    cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
+                        buffer: staging_buffer.raw(),
                         usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
-                    }));
+                    }]);
                 }
             }
 
-            for &(tlas, ref _entries, ref _scratch_buffer_offset, ref range) in &tlas_storage {
-                let size = (range.end - range.start) as u64;
-                if size == 0 {
-                    continue;
-                }
+            let mut instance_buffer_barriers = Vec::new();
+            for &(tlas, _, _, ref range) in &tlas_storage {
+                let size = match wgt::BufferSize::new((range.end - range.start) as u64) {
+                    None => continue,
+                    Some(size) => size,
+                };
+                instance_buffer_barriers.push(hal::BufferBarrier::<dyn hal::DynBuffer> {
+                    buffer: tlas.instance_buffer.as_ref(),
+                    usage: BufferUses::COPY_DST..BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT,
+                });
                 unsafe {
-                    cmd_buf_raw.transition_buffers(iter::once(hal::BufferBarrier::<A> {
-                        buffer: tlas.instance_buffer.read().as_ref().unwrap(),
+                    cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
+                        buffer: tlas.instance_buffer.as_ref(),
                         usage: hal::BufferUses::MAP_READ..hal::BufferUses::COPY_DST,
-                    }));
+                    }]);
                     let temp = hal::BufferCopy {
                         src_offset: range.start as u64,
                         dst_offset: 0,
-                        size: NonZeroU64::new(size).unwrap(),
+                        size,
                     };
                     cmd_buf_raw.copy_buffer_to_buffer(
+                        // the range whose size we just checked end is at (at that point in time) instance_buffer_staging_source.len()
+                        // and since instance_buffer_staging_source doesn't shrink we can un wrap this without a panic
                         staging_buffer
                             .as_ref()
                             .unwrap()
-                            .raw
-                            .lock()
-                            .as_ref()
-                            .unwrap(),
-                        tlas.instance_buffer.read().as_ref().unwrap(),
-                        iter::once(temp),
+                            .raw(),
+                        tlas.instance_buffer.as_ref(),
+                        &[temp],
                     );
                 }
             }
 
             unsafe {
-                cmd_buf_raw.transition_buffers(instance_buffer_barriers);
+                cmd_buf_raw.transition_buffers(&instance_buffer_barriers);
 
                 cmd_buf_raw
-                    .build_acceleration_structures(tlas_storage.len() as u32, tlas_descriptors);
+                    .build_acceleration_structures(&tlas_descriptors);
 
                 cmd_buf_raw.place_acceleration_structure_barrier(
                     hal::AccelerationStructureBarrier {
@@ -783,40 +695,20 @@ impl Global {
                 device
                     .pending_writes
                     .lock()
-                    .as_mut()
-                    .unwrap()
                     .consume_temp(TempResource::StagingBuffer(staging_buffer));
             }
         }
-        let scratch_mapping = unsafe {
-            device
-                .raw()
-                .map_buffer(
-                    &scratch_buffer,
-                    0..max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                )
-                .map_err(crate::device::DeviceError::from)?
-        };
-
-        let buf = StagingBuffer {
-            raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(scratch_buffer)),
-            device: device.clone(),
-            size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-            is_coherent: scratch_mapping.is_coherent,
-        };
 
         device
             .pending_writes
             .lock()
-            .as_mut()
-            .unwrap()
-            .consume_temp(TempResource::StagingBuffer(buf));
+            .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
 
         Ok(())
     }
 }
 
-impl<A: HalApi> BakedCommands<A> {
+impl BakedCommands {
     // makes sure a blas is build before it is used
     pub(crate) fn validate_blas_actions(&mut self) -> Result<(), ValidateBlasActionsError> {
         profiling::scope!("CommandEncoder::[submission]::validate_blas_actions");
@@ -884,25 +776,19 @@ impl<A: HalApi> BakedCommands<A> {
 }
 
 ///iterates over the blas iterator, and it's geometry, pushing the buffers into a storage vector (and also some validation).
-fn iter_blas<'a, A: HalApi>(
+fn iter_blas<'a>(
     blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
-    cmd_buf_data: &mut CommandBufferMutable<A>,
+    cmd_buf_data: &mut CommandBufferMutable,
     build_command_index: NonZeroU64,
-    buffer_guard: &RwLockReadGuard<Storage<Buffer<A>>>,
-    blas_guard: &RwLockReadGuard<Storage<Blas<A>>>,
-    buf_storage: &mut BufferStorage<'a, A>,
+    buffer_guard: &RwLockReadGuard<Storage<Buffer>>,
+    blas_guard: &RwLockReadGuard<Storage<Blas>>,
+    buf_storage: &mut BufferStorage<'a>,
 ) -> Result<(), BuildAccelerationStructureError> {
     for entry in blas_iter {
         let blas = blas_guard
             .get(entry.blas_id)
             .map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?;
-        cmd_buf_data.trackers.blas_s.insert_single(blas.clone());
-
-        if blas.raw.is_none() {
-            return Err(BuildAccelerationStructureError::InvalidBlas(
-                blas.error_ident(),
-            ));
-        }
+        cmd_buf_data.trackers.blas_s.set_single(blas.clone());
 
         cmd_buf_data.blas_actions.push(BlasAction {
             blas: blas.clone(),
@@ -1009,16 +895,16 @@ fn iter_blas<'a, A: HalApi>(
 }
 
 /// Iterates over the buffers generated [iter_blas] and convert the barriers into hal barriers, and the triangles into hal [AccelerationStructureEntries] (and also some validation).
-fn iter_buffers<'a, 'b, A: HalApi>(
-    buf_storage: &'a mut BufferStorage<'b, A>,
+fn iter_buffers<'a, 'b>(
+    buf_storage: &'a mut BufferStorage<'b>,
     snatch_guard: &'a SnatchGuard,
-    input_barriers: &mut Vec<hal::BufferBarrier<'a, A>>,
-    cmd_buf_data: &mut CommandBufferMutable<A>,
-    buffer_guard: &RwLockReadGuard<Storage<Buffer<A>>>,
+    input_barriers: &mut Vec<hal::BufferBarrier<'a, dyn hal::DynBuffer>>,
+    cmd_buf_data: &mut CommandBufferMutable,
+    buffer_guard: &RwLockReadGuard<Storage<Buffer>>,
     scratch_buffer_blas_size: &mut u64,
-    blas_storage: &mut BlasStorage<'a, A>,
+    blas_storage: &mut BlasStorage<'a>,
 ) -> Result<(), BuildAccelerationStructureError> {
-    let mut triangle_entries = Vec::<hal::AccelerationStructureTriangles<A>>::new();
+    let mut triangle_entries = Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::new();
     for buf in buf_storage {
         let mesh = &buf.4;
         let vertex_buffer = {
@@ -1164,22 +1050,22 @@ fn iter_buffers<'a, 'b, A: HalApi>(
         };
 
         let triangles = hal::AccelerationStructureTriangles {
-            vertex_buffer: Some(vertex_buffer),
+            vertex_buffer: Some(vertex_buffer.as_ref()),
             vertex_format: mesh.size.vertex_format,
             first_vertex: mesh.first_vertex,
             vertex_count: mesh.size.vertex_count,
             vertex_stride: mesh.vertex_stride,
             indices: index_buffer.map(|index_buffer| hal::AccelerationStructureTriangleIndices::<
-                A,
+                dyn hal::DynBuffer,
             > {
                 format: mesh.size.index_format.unwrap(),
-                buffer: Some(index_buffer),
+                buffer: Some(index_buffer.as_ref()),
                 offset: mesh.index_buffer_offset.unwrap() as u32,
                 count: mesh.size.index_count.unwrap(),
             }),
             transform: transform_buffer.map(|transform_buffer| {
                 hal::AccelerationStructureTriangleTransform {
-                    buffer: transform_buffer,
+                    buffer: transform_buffer.as_ref(),
                     offset: mesh.transform_buffer_offset.unwrap() as u32,
                 }
             }),
@@ -1204,14 +1090,14 @@ fn iter_buffers<'a, 'b, A: HalApi>(
     Ok(())
 }
 
-fn map_blas<'a, A: HalApi>(
+fn map_blas<'a>(
     storage: &'a (
-        Arc<Blas<A>>,
-        hal::AccelerationStructureEntries<A>,
+        Arc<Blas>,
+        hal::AccelerationStructureEntries<dyn hal::DynBuffer>,
         BufferAddress,
     ),
-    scratch_buffer: &'a <A as Api>::Buffer,
-) -> hal::BuildAccelerationStructureDescriptor<'a, A> {
+    scratch_buffer: &'a dyn hal::DynBuffer,
+) -> hal::BuildAccelerationStructureDescriptor<'a, dyn hal::DynBuffer, dyn hal::DynAccelerationStructure> {
     let (blas, entries, scratch_buffer_offset) = storage;
     if blas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
         log::info!("only rebuild implemented")
@@ -1221,23 +1107,22 @@ fn map_blas<'a, A: HalApi>(
         mode: hal::AccelerationStructureBuildMode::Build,
         flags: blas.flags,
         source_acceleration_structure: None,
-        destination_acceleration_structure: blas.raw.as_ref().unwrap(),
+        destination_acceleration_structure: blas.raw.as_ref(),
         scratch_buffer,
         scratch_buffer_offset: *scratch_buffer_offset,
     }
 }
 
-fn build_blas<'a, A: HalApi>(
-    cmd_buf_raw: &mut A::CommandEncoder,
+fn build_blas<'a>(
+    cmd_buf_raw: &mut dyn hal::DynCommandEncoder,
     blas_present: bool,
     tlas_present: bool,
-    input_barriers: Vec<hal::BufferBarrier<A>>,
-    desc_len: u32,
-    blas_descriptors: impl Iterator<Item = hal::BuildAccelerationStructureDescriptor<'a, A>>,
-    scratch_buffer_barrier: hal::BufferBarrier<A>,
+    input_barriers: Vec<hal::BufferBarrier<dyn hal::DynBuffer>>,
+    blas_descriptors: &[hal::BuildAccelerationStructureDescriptor<'a, dyn hal::DynBuffer, dyn hal::DynAccelerationStructure>],
+    scratch_buffer_barrier: hal::BufferBarrier<dyn hal::DynBuffer>,
 ) {
     unsafe {
-        cmd_buf_raw.transition_buffers(input_barriers.into_iter());
+        cmd_buf_raw.transition_buffers(&input_barriers);
     }
 
     if blas_present {
@@ -1247,13 +1132,13 @@ fn build_blas<'a, A: HalApi>(
                     ..hal::AccelerationStructureUses::BUILD_OUTPUT,
             });
 
-            cmd_buf_raw.build_acceleration_structures(desc_len, blas_descriptors);
+            cmd_buf_raw.build_acceleration_structures(blas_descriptors);
         }
     }
 
     if blas_present && tlas_present {
         unsafe {
-            cmd_buf_raw.transition_buffers(iter::once(scratch_buffer_barrier));
+            cmd_buf_raw.transition_buffers(&[scratch_buffer_barrier]);
         }
     }
 
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index aace14d831..1f11ba0937 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1613,7 +1613,6 @@ impl Global {
             let indices = &device.tracker_indices;
             tracker.buffers.set_size(indices.buffers.size());
             tracker.textures.set_size(indices.textures.size());
-            tracker.tlas_s.set_size(indices.tlas_s.size());
 
             let mut state = State {
                 pipeline_flags: PipelineFlags::empty(),
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index c9d797904d..2eb1466d65 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -800,7 +800,7 @@ impl Global {
                 buffer_storage: &Storage<resource::Buffer>,
                 sampler_storage: &Storage<resource::Sampler>,
                 texture_view_storage: &Storage<resource::TextureView>,
-                tlas_storage: &Storage<resource::Tlas<A>>,
+                tlas_storage: &Storage<resource::Tlas>,
             ) -> Result<ResolvedBindGroupEntry<'a>, binding_model::CreateBindGroupError>
             {
                 let map_buffer = |bb: &BufferBinding| {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index e6aed78a08..4937b45343 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -11,6 +11,7 @@ use smallvec::SmallVec;
 
 use std::sync::Arc;
 use thiserror::Error;
+use crate::resource::{Blas, Tlas};
 
 /// A command submitted to the GPU for execution.
 ///
@@ -103,6 +104,50 @@ impl ActiveSubmission {
 
         false
     }
+
+    pub fn contains_blas(&self, blas: &Blas) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of blas's depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.blas_s.contains(blas) {
+                return true;
+            }
+
+            if encoder
+                .pending_buffers
+                .contains_key(&blas.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    pub fn contains_tlas(&self, tlas: &Tlas) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of tlas's depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.tlas_s.contains(tlas) {
+                return true;
+            }
+
+            if encoder
+                .pending_buffers
+                .contains_key(&tlas.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
 }
 
 #[derive(Clone, Debug, Error)]
@@ -229,6 +274,34 @@ impl LifetimeTracker {
         })
     }
 
+    /// Returns the submission index of the most recent submission that uses the
+    /// given blas.
+    pub fn get_blas_latest_submission_index(&self, blas: &Blas) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_blas(blas) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns the submission index of the most recent submission that uses the
+    /// given blas.
+    pub fn get_tlas_latest_submission_index(&self, tlas: &Tlas) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_tlas(tlas) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
     /// Returns the submission index of the most recent submission that uses the
     /// given texture.
     pub fn get_texture_latest_submission_index(
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 8f570785a9..c4c6335d7a 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -26,7 +26,7 @@ use crate::{
 
 use smallvec::SmallVec;
 
-use crate::resource::{Blas, Tlas};
+use crate::resource::{Blas, ScratchBuffer, Tlas};
 use std::{
     iter,
     mem::{self, ManuallyDrop},
@@ -143,10 +143,11 @@ impl SubmittedWorkDoneClosure {
 #[derive(Debug)]
 pub enum TempResource {
     StagingBuffer(FlushedStagingBuffer),
+    ScratchBuffer(ScratchBuffer),
     DestroyedBuffer(DestroyedBuffer),
     DestroyedTexture(DestroyedTexture),
-    Blas(Arc<Blas<A>>),
-    Tlas(Arc<Tlas<A>>),
+    Blas(Arc<Blas>),
+    Tlas(Arc<Tlas>),
 }
 
 /// A series of raw [`CommandBuffer`]s that have been submitted to a
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index 5e80ce52f8..c7d49007c3 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -1,46 +1,45 @@
-#[cfg(feature = "trace")]
-use crate::device::trace;
+use std::mem::ManuallyDrop;
+use std::sync::Arc;
+
+use hal::{AccelerationStructureTriangleIndices, Device as _};
+
 use crate::{
-    device::{queue::TempResource, Device, DeviceError},
+    device::{Device, DeviceError, queue::TempResource},
     global::Global,
     hal_api::HalApi,
     id::{self, BlasId, TlasId},
+    LabelHelpers,
     lock::RwLock,
-    ray_tracing::{get_raw_tlas_instance_size, CreateBlasError, CreateTlasError},
-    resource, LabelHelpers,
+    ray_tracing::{CreateBlasError, CreateTlasError, get_raw_tlas_instance_size}, resource,
 };
-use std::sync::Arc;
-
+#[cfg(feature = "trace")]
+use crate::device::trace;
 use crate::lock::rank;
 use crate::resource::{Trackable, TrackingData};
-use hal::{AccelerationStructureTriangleIndices, Device as _};
 
-impl<A: HalApi> Device<A> {
+impl Device {
     fn create_blas(
         self: &Arc<Self>,
-        self_id: id::DeviceId,
         blas_desc: &resource::BlasDescriptor,
         sizes: wgt::BlasGeometrySizeDescriptors,
-    ) -> Result<Arc<resource::Blas<A>>, CreateBlasError> {
-        debug_assert_eq!(self_id.backend(), A::VARIANT);
-
+    ) -> Result<Arc<resource::Blas>, CreateBlasError> {
         let size_info = match &sizes {
             wgt::BlasGeometrySizeDescriptors::Triangles { desc } => {
                 let mut entries =
-                    Vec::<hal::AccelerationStructureTriangles<A>>::with_capacity(desc.len());
+                    Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::with_capacity(desc.len());
                 for x in desc {
                     if x.index_count.is_some() != x.index_format.is_some() {
                         return Err(CreateBlasError::MissingIndexData);
                     }
                     let indices =
                         x.index_count
-                            .map(|count| AccelerationStructureTriangleIndices::<A> {
+                            .map(|count| AccelerationStructureTriangleIndices::<dyn hal::DynBuffer> {
                                 format: x.index_format.unwrap(),
                                 buffer: None,
                                 offset: 0,
                                 count,
                             });
-                    entries.push(hal::AccelerationStructureTriangles::<A> {
+                    entries.push(hal::AccelerationStructureTriangles::<dyn hal::DynBuffer> {
                         vertex_buffer: None,
                         vertex_format: x.vertex_format,
                         first_vertex: 0,
@@ -72,10 +71,10 @@ impl<A: HalApi> Device<A> {
         }
         .map_err(DeviceError::from)?;
 
-        let handle = unsafe { self.raw().get_acceleration_structure_device_address(&raw) };
+        let handle = unsafe { self.raw().get_acceleration_structure_device_address(raw.as_ref()) };
 
         Ok(Arc::new(resource::Blas {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             size_info,
             sizes,
@@ -90,11 +89,8 @@ impl<A: HalApi> Device<A> {
 
     fn create_tlas(
         self: &Arc<Self>,
-        self_id: id::DeviceId,
         desc: &resource::TlasDescriptor,
-    ) -> Result<Arc<resource::Tlas<A>>, CreateTlasError> {
-        debug_assert_eq!(self_id.backend(), A::VARIANT);
-
+    ) -> Result<Arc<resource::Tlas>, CreateTlasError> {
         let size_info = unsafe {
             self.raw().get_acceleration_structure_build_sizes(
                 &hal::GetAccelerationStructureBuildSizesDescriptor {
@@ -121,7 +117,7 @@ impl<A: HalApi> Device<A> {
         .map_err(DeviceError::from)?;
 
         let instance_buffer_size =
-            get_raw_tlas_instance_size::<A>() * std::cmp::max(desc.max_instances, 1) as usize;
+            get_raw_tlas_instance_size() * std::cmp::max(desc.max_instances, 1) as usize;
         let instance_buffer = unsafe {
             self.raw().create_buffer(&hal::BufferDescriptor {
                 label: Some("(wgpu-core) instances_buffer"),
@@ -134,14 +130,14 @@ impl<A: HalApi> Device<A> {
         .map_err(DeviceError::from)?;
 
         Ok(Arc::new(resource::Tlas {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             size_info,
             flags: desc.flags,
             update_mode: desc.update_mode,
             built_index: RwLock::new(rank::TLAS_BUILT_INDEX, None),
             dependencies: RwLock::new(rank::TLAS_DEPENDENCIES, Vec::new()),
-            instance_buffer: RwLock::new(rank::TLAS_INSTANCE_BUFFER, Some(instance_buffer)),
+            instance_buffer: ManuallyDrop::new(instance_buffer),
             label: desc.label.to_string(),
             max_instance_count: desc.max_instances,
             tracking_data: TrackingData::new(self.tracker_indices.tlas_s.clone()),
@@ -150,7 +146,7 @@ impl<A: HalApi> Device<A> {
 }
 
 impl Global {
-    pub fn device_create_blas<A: HalApi>(
+    pub fn device_create_blas(
         &self,
         device_id: id::DeviceId,
         desc: &resource::BlasDescriptor,
@@ -159,8 +155,8 @@ impl Global {
     ) -> (BlasId, Option<u64>, Option<CreateBlasError>) {
         profiling::scope!("Device::create_blas");
 
-        let hub = A::hub(self);
-        let fid = hub.blas_s.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.blas_s.prepare(device_id.backend(), id_in);
 
         let device_guard = hub.devices.read();
         let error = 'error: {
@@ -181,7 +177,7 @@ impl Global {
                 });
             }
 
-            let blas = match device.create_blas(device_id, desc, sizes) {
+            let blas = match device.create_blas(desc, sizes) {
                 Ok(blas) => blas,
                 Err(e) => break 'error e,
             };
@@ -197,7 +193,7 @@ impl Global {
         (id, None, Some(error))
     }
 
-    pub fn device_create_tlas<A: HalApi>(
+    pub fn device_create_tlas(
         &self,
         device_id: id::DeviceId,
         desc: &resource::TlasDescriptor,
@@ -205,8 +201,8 @@ impl Global {
     ) -> (TlasId, Option<CreateTlasError>) {
         profiling::scope!("Device::create_tlas");
 
-        let hub = A::hub(self);
-        let fid = hub.tlas_s.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.tlas_s.prepare(device_id.backend(), id_in);
 
         let device_guard = hub.devices.read();
         let error = 'error: {
@@ -222,7 +218,7 @@ impl Global {
                 });
             }
 
-            let tlas = match device.create_tlas(device_id, desc) {
+            let tlas = match device.create_tlas(desc) {
                 Ok(tlas) => tlas,
                 Err(e) => break 'error e,
             };
@@ -237,10 +233,10 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn blas_destroy<A: HalApi>(&self, blas_id: BlasId) -> Result<(), resource::DestroyError> {
+    pub fn blas_destroy(&self, blas_id: BlasId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Blas::destroy");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         log::info!("Blas {:?} is destroyed", blas_id);
         let blas_guard = hub.blas_s.write();
@@ -248,7 +244,7 @@ impl Global {
             .get(blas_id)
             .map_err(|_| resource::DestroyError::Invalid)?
             .clone();
-
+        drop(blas_guard);
         let device = &blas.device;
 
         #[cfg(feature = "trace")]
@@ -258,49 +254,49 @@ impl Global {
 
         let temp = TempResource::Blas(blas.clone());
         {
-            let last_submit_index = blas.submission_index();
-            drop(blas_guard);
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut device_lock = device
+                .lock_life();
+            let last_submit_index = device_lock.get_blas_latest_submission_index(blas.as_ref());
+            if let Some(last_submit_index) = last_submit_index {
+                device_lock
+                    .schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
     }
 
-    pub fn blas_drop<A: HalApi>(&self, blas_id: BlasId, wait: bool) {
+    pub fn blas_drop(&self, blas_id: BlasId) {
         profiling::scope!("Blas::drop");
         log::debug!("blas {:?} is dropped", blas_id);
 
-        let hub = A::hub(self);
-
-        if let Some(blas) = hub.blas_s.unregister(blas_id) {
-            let last_submit_index = blas.submission_index();
+        let hub = &self.hub;
 
-            #[cfg(feature = "trace")]
-            if let Some(t) = blas.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyBlas(blas_id));
+        let blas = match hub.blas_s.unregister(blas_id) {
+            Some(blas) => blas,
+            None => {
+                return;
             }
+        };
 
-            if wait {
-                match blas.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for blas {:?}: {:?}", blas_id, e),
-                }
-            }
+        #[cfg(feature = "trace")]
+        if let Some(t) = blas.device.trace.lock().as_mut() {
+            t.add(trace::Action::DestroyBlas(blas_id));
         }
     }
 
-    pub fn tlas_destroy<A: HalApi>(&self, tlas_id: TlasId) -> Result<(), resource::DestroyError> {
+    pub fn tlas_destroy(&self, tlas_id: TlasId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Tlas::destroy");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         log::info!("Tlas {:?} is destroyed", tlas_id);
         let tlas_guard = hub.tlas_s.write();
         let tlas = tlas_guard
             .get(tlas_id)
-            .map_err(|_| resource::DestroyError::Invalid)?;
+            .map_err(|_| resource::DestroyError::Invalid)?
+            .clone();
+        drop(tlas_guard);
 
         let device = &mut tlas.device.clone();
 
@@ -311,36 +307,34 @@ impl Global {
 
         let temp = TempResource::Tlas(tlas.clone());
         {
-            let last_submit_index = tlas.submission_index();
-            drop(tlas_guard);
-            let guard = &mut device.lock_life();
-
-            guard.schedule_resource_destruction(temp, last_submit_index);
+            let mut device_lock = device
+                .lock_life();
+            let last_submit_index = device_lock.get_tlas_latest_submission_index(tlas.as_ref());
+            if let Some(last_submit_index) = last_submit_index {
+                device_lock
+                    .schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
     }
 
-    pub fn tlas_drop<A: HalApi>(&self, tlas_id: TlasId, wait: bool) {
+    pub fn tlas_drop(&self, tlas_id: TlasId) {
         profiling::scope!("Tlas::drop");
         log::debug!("tlas {:?} is dropped", tlas_id);
 
-        let hub = A::hub(self);
-
-        if let Some(tlas) = hub.tlas_s.unregister(tlas_id) {
-            let last_submit_index = tlas.submission_index();
+        let hub = &self.hub;
 
-            #[cfg(feature = "trace")]
-            if let Some(t) = tlas.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyTlas(tlas_id));
+        let tlas = match hub.tlas_s.unregister(tlas_id) {
+            Some(tlas) => tlas,
+            None => {
+                return;
             }
+        };
 
-            if wait {
-                match tlas.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for blas {:?}: {:?}", tlas_id, e),
-                }
-            }
+        #[cfg(feature = "trace")]
+        if let Some(t) = tlas.device.trace.lock().as_mut() {
+            t.add(trace::Action::DestroyTlas(tlas_id));
         }
     }
 }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e038ebd530..49dc487b82 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -41,11 +41,7 @@ use smallvec::SmallVec;
 use thiserror::Error;
 use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension};
 
-use super::{
-    queue::{self, Queue},
-    DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR, ZERO_BUFFER_SIZE,
-};
-use crate::resource::Tlas;
+use crate::resource::{AccelerationStructure, Tlas};
 use std::{
     borrow::Cow,
     mem::ManuallyDrop,
@@ -2118,14 +2114,14 @@ impl Device {
 
     fn create_tlas_binding<'a>(
         self: &Arc<Self>,
-        used: &BindGroupStates<A>,
+        used: &mut BindGroupStates,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        tlas: &'a Arc<Tlas<A>>,
-    ) -> Result<&'a A::AccelerationStructure, binding_model::CreateBindGroupError> {
+        tlas: &'a Arc<Tlas>,
+    ) -> Result<&'a dyn hal::DynAccelerationStructure, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
-        used.acceleration_structures.add_single(tlas);
+        used.acceleration_structures.insert_single(tlas.clone());
 
         tlas.same_device(self)?;
 
@@ -2285,7 +2281,7 @@ impl Device {
                     (res_index, num_bindings)
                 }
                 Br::AccelerationStructure(ref tlas) => {
-                    let tlas = self.create_tlas_binding(&used, binding, decl, tlas)?;
+                    let tlas = self.create_tlas_binding(&mut used, binding, decl, tlas)?;
                     let res_index = hal_tlas_s.len();
                     hal_tlas_s.push(tlas);
                     (res_index, 1)
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index e9a4defbb1..82d45c964b 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -180,8 +180,8 @@ pub struct Hub {
     pub(crate) textures: Registry<Texture>,
     pub(crate) texture_views: Registry<TextureView>,
     pub(crate) samplers: Registry<Sampler>,
-    pub(crate) blas_s: Registry<Blas<A>>,
-    pub(crate) tlas_s: Registry<Tlas<A>>,
+    pub(crate) blas_s: Registry<Blas>,
+    pub(crate) tlas_s: Registry<Tlas>,
 }
 
 impl Hub {
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index ae8b24df74..fd1bdb8616 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -194,24 +194,24 @@ pub(crate) enum BlasActionKind {
 }
 
 #[derive(Debug, Clone)]
-pub(crate) enum TlasActionKind<A: HalApi> {
+pub(crate) enum TlasActionKind {
     Build {
         build_index: NonZeroU64,
-        dependencies: Vec<Arc<Blas<A>>>,
+        dependencies: Vec<Arc<Blas>>,
     },
     Use,
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct BlasAction<A: HalApi> {
-    pub blas: Arc<Blas<A>>,
+pub(crate) struct BlasAction {
+    pub blas: Arc<Blas>,
     pub kind: BlasActionKind,
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct TlasAction<A: HalApi> {
-    pub tlas: Arc<Tlas<A>>,
-    pub kind: TlasActionKind<A>,
+pub(crate) struct TlasAction {
+    pub tlas: Arc<Tlas>,
+    pub kind: TlasActionKind,
 }
 
 #[derive(Debug, Clone)]
@@ -257,12 +257,9 @@ pub struct TraceTlasPackage {
     pub lowest_unmodified: u32,
 }
 
-pub(crate) fn get_raw_tlas_instance_size<A: HalApi>() -> usize {
-    match A::VARIANT {
-        wgt::Backend::Empty => 0,
-        wgt::Backend::Vulkan => 64,
-        _ => unimplemented!(),
-    }
+pub(crate) fn get_raw_tlas_instance_size() -> usize {
+    // TODO: this should be provided by the backend
+    64
 }
 
 #[derive(Clone)]
@@ -274,30 +271,25 @@ struct RawTlasInstance {
     acceleration_structure_reference: u64,
 }
 
-pub(crate) fn tlas_instance_into_bytes<A: HalApi>(
+pub(crate) fn tlas_instance_into_bytes(
     instance: &TlasInstance,
     blas_address: u64,
 ) -> Vec<u8> {
-    match A::VARIANT {
-        wgt::Backend::Empty => vec![],
-        wgt::Backend::Vulkan => {
-            const MAX_U24: u32 = (1u32 << 24u32) - 1u32;
-            let temp = RawTlasInstance {
-                transform: *instance.transform,
-                custom_index_and_mask: (instance.custom_index & MAX_U24)
-                    | (u32::from(instance.mask) << 24),
-                shader_binding_table_record_offset_and_flags: 0,
-                acceleration_structure_reference: blas_address,
-            };
-            let temp: *const _ = &temp;
-            unsafe {
-                slice::from_raw_parts::<u8>(
-                    temp as *const u8,
-                    std::mem::size_of::<RawTlasInstance>(),
-                )
-                .to_vec()
-            }
-        }
-        _ => unimplemented!(),
+    // TODO: get the device to do this
+    const MAX_U24: u32 = (1u32 << 24u32) - 1u32;
+    let temp = RawTlasInstance {
+        transform: *instance.transform,
+        custom_index_and_mask: (instance.custom_index & MAX_U24)
+            | (u32::from(instance.mask) << 24),
+        shader_binding_table_record_offset_and_flags: 0,
+        acceleration_structure_reference: blas_address,
+    };
+    let temp: *const _ = &temp;
+    unsafe {
+        slice::from_raw_parts::<u8>(
+            temp as *const u8,
+            std::mem::size_of::<RawTlasInstance>(),
+        )
+            .to_vec()
     }
 }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 6ac736b35c..7c710f64f2 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -28,8 +28,9 @@ use std::{
     ptr::NonNull,
     sync::{Arc, Weak},
 };
-
+use std::cmp::max;
 use std::num::NonZeroU64;
+use hal::{BufferUses, DynAccelerationStructure};
 
 /// Information about the wgpu-core resource.
 ///
@@ -940,6 +941,46 @@ impl Drop for FlushedStagingBuffer {
     }
 }
 
+#[derive(Debug)]
+pub struct ScratchBuffer {
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    device: Arc<Device>,
+    pub(crate) size: wgt::BufferSize,
+}
+
+impl ScratchBuffer {
+    pub(crate) fn new(device: &Arc<Device>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
+        let raw = unsafe {
+            device
+                .raw()
+                .create_buffer(&hal::BufferDescriptor {
+                    label: Some("(wgpu) scratch buffer"),
+                    size: size.get(),
+                    usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH | BufferUses::MAP_WRITE,
+                    memory_flags: hal::MemoryFlags::empty(),
+                })
+                .map_err(crate::device::DeviceError::from)?
+        };
+        Ok(Self {
+            raw: ManuallyDrop::new(raw),
+            device: device.clone(),
+            size,
+        })
+    }
+    pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
+        self.raw.as_ref()
+    }
+}
+
+impl Drop for ScratchBuffer {
+    fn drop(&mut self) {
+        resource_log!("Destroy raw StagingBuffer");
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe { self.device.raw().destroy_buffer(raw) };
+    }
+}
+
 pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, Vec<wgt::TextureFormat>>;
 
 #[derive(Debug)]
@@ -1843,10 +1884,14 @@ pub enum DestroyError {
 pub type BlasDescriptor<'a> = wgt::CreateBlasDescriptor<Label<'a>>;
 pub type TlasDescriptor<'a> = wgt::CreateTlasDescriptor<Label<'a>>;
 
+pub(crate) trait AccelerationStructure: Trackable {
+    fn raw(&self) -> &dyn hal::DynAccelerationStructure;
+}
+
 #[derive(Debug)]
-pub struct Blas<A: HalApi> {
-    pub(crate) raw: Option<A::AccelerationStructure>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Blas {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynAccelerationStructure>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) size_info: hal::AccelerationStructureBuildSizes,
     pub(crate) sizes: wgt::BlasGeometrySizeDescriptors,
     pub(crate) flags: wgt::AccelerationStructureFlags,
@@ -1858,18 +1903,25 @@ pub struct Blas<A: HalApi> {
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for Blas<A> {
+impl Drop for Blas {
     fn drop(&mut self) {
         unsafe {
-            if let Some(structure) = self.raw.take() {
-                resource_log!("Destroy raw {}", self.error_ident());
-                use hal::Device;
-                self.device.raw().destroy_acceleration_structure(structure);
+            resource_log!("Destroy raw {}", self.error_ident());
+            // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+            let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+            unsafe {
+                self.device.raw().destroy_acceleration_structure(raw);
             }
         }
     }
 }
 
+impl AccelerationStructure for Blas {
+    fn raw(&self) -> &dyn hal::DynAccelerationStructure {
+        self.raw.as_ref()
+    }
+}
+
 crate::impl_resource_type!(Blas);
 crate::impl_labeled!(Blas);
 crate::impl_parent_device!(Blas);
@@ -1877,39 +1929,37 @@ crate::impl_storage_item!(Blas);
 crate::impl_trackable!(Blas);
 
 #[derive(Debug)]
-pub struct Tlas<A: HalApi> {
-    pub(crate) raw: Option<A::AccelerationStructure>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Tlas {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynAccelerationStructure>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) size_info: hal::AccelerationStructureBuildSizes,
     pub(crate) max_instance_count: u32,
     pub(crate) flags: wgt::AccelerationStructureFlags,
     pub(crate) update_mode: wgt::AccelerationStructureUpdateMode,
     pub(crate) built_index: RwLock<Option<NonZeroU64>>,
-    pub(crate) dependencies: RwLock<Vec<Arc<Blas<A>>>>,
-    pub(crate) instance_buffer: RwLock<Option<A::Buffer>>,
+    pub(crate) dependencies: RwLock<Vec<Arc<Blas>>>,
+    pub(crate) instance_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for Tlas<A> {
+impl Drop for Tlas {
     fn drop(&mut self) {
         unsafe {
             use hal::Device;
-            if let Some(structure) = self.raw.take() {
-                resource_log!("Destroy raw {}", self.error_ident());
-                self.device.raw().destroy_acceleration_structure(structure);
-            }
-            if let Some(buffer) = self.instance_buffer.write().take() {
-                self.device.raw().destroy_buffer(buffer)
-            }
+            let structure = ManuallyDrop::take(&mut self.raw);
+            let buffer = ManuallyDrop::take(&mut self.instance_buffer);
+            resource_log!("Destroy raw {}", self.error_ident());
+            self.device.raw().destroy_acceleration_structure(structure);
+            self.device.raw().destroy_buffer(buffer);
         }
     }
 }
 
-impl<A: HalApi> Tlas<A> {
-    pub(crate) fn raw(&self) -> &A::AccelerationStructure {
-        self.raw.as_ref().unwrap()
+impl AccelerationStructure for Tlas {
+    fn raw(&self) -> &dyn hal::DynAccelerationStructure {
+        self.raw.as_ref()
     }
 }
 
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 5864015bae..a57ebd1e6f 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -100,6 +100,7 @@ mod metadata;
 mod range;
 mod stateless;
 mod texture;
+mod ray_tracing;
 
 use crate::{
     binding_model, command,
@@ -122,6 +123,8 @@ pub(crate) use texture::{
     TextureUsageScope, TextureViewBindGroupState,
 };
 use wgt::strict_assert_ne;
+use crate::resource::AccelerationStructure;
+use crate::track::ray_tracing::AccelerationStructureTracker;
 
 #[repr(transparent)]
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
@@ -302,6 +305,17 @@ impl PendingTransition<hal::TextureUses> {
     }
 }
 
+impl PendingTransition<hal::AccelerationStructureUses> {
+    /// Produce the hal barrier corresponding to the transition.
+    pub fn into_hal<'a>(
+        self,
+    ) -> hal::AccelerationStructureBarrier {
+        hal::AccelerationStructureBarrier {
+            usage: self.usage,
+        }
+    }
+}
+
 /// The uses that a resource or subresource can be in.
 pub(crate) trait ResourceUses:
     fmt::Debug + ops::BitAnd<Output = Self> + ops::BitOr<Output = Self> + PartialEq + Sized + Copy
@@ -424,7 +438,7 @@ pub(crate) struct BindGroupStates {
     pub buffers: BufferBindGroupState,
     pub views: TextureViewBindGroupState,
     pub samplers: StatelessTracker<resource::Sampler>,
-    pub acceleration_structures: StatelessBindGroupState<resource::Tlas<A>>,
+    pub acceleration_structures: StatelessTracker<resource::Tlas>,
 }
 
 impl BindGroupStates {
@@ -433,7 +447,7 @@ impl BindGroupStates {
             buffers: BufferBindGroupState::new(),
             views: TextureViewBindGroupState::new(),
             samplers: StatelessTracker::new(),
-            acceleration_structures: StatelessBindGroupState::new(),
+            acceleration_structures: StatelessTracker::new(),
         }
     }
 
@@ -446,9 +460,8 @@ impl BindGroupStates {
         // Views are stateless, however, `TextureViewBindGroupState`
         // is special as it will be merged with other texture trackers.
         self.views.optimize();
-        // Samplers are stateless and don't need to be optimized
+        // Samplers and Tlas's are stateless and don't need to be optimized
         // since the tracker is never merged with any other tracker.
-        self.acceleration_structures.optimize();
     }
 }
 
@@ -601,14 +614,14 @@ impl DeviceTracker {
 pub(crate) struct Tracker {
     pub buffers: BufferTracker,
     pub textures: TextureTracker,
+    pub blas_s: AccelerationStructureTracker<resource::Blas>,
+    pub tlas_s: AccelerationStructureTracker<resource::Tlas>,
     pub views: StatelessTracker<resource::TextureView>,
     pub bind_groups: StatelessTracker<binding_model::BindGroup>,
     pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline>,
     pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
     pub bundles: StatelessTracker<command::RenderBundle>,
     pub query_sets: StatelessTracker<resource::QuerySet>,
-    pub blas_s: StatelessTracker<resource::Blas<A>>,
-    pub tlas_s: StatelessTracker<resource::Tlas<A>>,
 }
 
 impl Tracker {
@@ -616,14 +629,14 @@ impl Tracker {
         Self {
             buffers: BufferTracker::new(),
             textures: TextureTracker::new(),
+            blas_s: AccelerationStructureTracker::new(),
+            tlas_s: AccelerationStructureTracker::new(),
             views: StatelessTracker::new(),
             bind_groups: StatelessTracker::new(),
             compute_pipelines: StatelessTracker::new(),
             render_pipelines: StatelessTracker::new(),
             bundles: StatelessTracker::new(),
             query_sets: StatelessTracker::new(),
-            blas_s: StatelessTracker::new(),
-            tlas_s: StatelessTracker::new(),
         }
     }
 
diff --git a/wgpu-core/src/track/ray_tracing.rs b/wgpu-core/src/track/ray_tracing.rs
new file mode 100644
index 0000000000..1d673af401
--- /dev/null
+++ b/wgpu-core/src/track/ray_tracing.rs
@@ -0,0 +1,99 @@
+use std::sync::Arc;
+use hal::{AccelerationStructureBarrier, AccelerationStructureUses, BufferUses};
+use wgt::strict_assert;
+use crate::resource::{AccelerationStructure, Trackable};
+use crate::track::metadata::ResourceMetadata;
+use crate::track::{PendingTransition, ResourceUses};
+
+pub(crate) struct AccelerationStructureTracker<T: AccelerationStructure> {
+    start: Vec<AccelerationStructureUses>,
+    end: Vec<AccelerationStructureUses>,
+
+    metadata: ResourceMetadata<Arc<T>>,
+
+    temp: Vec<PendingTransition<AccelerationStructureUses>>,
+}
+
+impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
+    pub fn new() -> Self {
+        Self {
+            start: Vec::new(),
+            end: Vec::new(),
+
+            metadata: ResourceMetadata::new(),
+
+            temp: Vec::new(),
+        }
+    }
+
+    fn tracker_assert_in_bounds(&self, index: usize) {
+        strict_assert!(index < self.start.len());
+        strict_assert!(index < self.end.len());
+        self.metadata.tracker_assert_in_bounds(index);
+    }
+
+    /// Sets the size of all the vectors inside the tracker.
+    ///
+    /// Must be called with the highest possible Buffer ID before
+    /// all unsafe functions are called.
+    pub fn set_size(&mut self, size: usize) {
+        self.start.resize(size, AccelerationStructureUses::empty());
+        self.end.resize(size, AccelerationStructureUses::empty());
+
+        self.metadata.set_size(size);
+    }
+
+    /// Extend the vectors to let the given index be valid.
+    fn allow_index(&mut self, index: usize) {
+        if index >= self.start.len() {
+            self.set_size(index + 1);
+        }
+    }
+
+    /// Returns true if the given buffer is tracked.
+    pub fn contains(&self, acceleration_structure: &T) -> bool {
+        self.metadata.contains(acceleration_structure.tracker_index().as_usize())
+    }
+
+    /// Returns a list of all buffers tracked.
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
+        self.metadata.owned_resources()
+    }
+
+    /// Drains all currently pending transitions.
+    pub fn drain_transitions<'a, 'b: 'a>(
+        &'b mut self,
+    ) -> impl Iterator<Item = AccelerationStructureBarrier> + 'b {
+        let buffer_barriers = self.temp.drain(..).map(|pending| {
+            pending.into_hal()
+        });
+        buffer_barriers
+    }
+
+    /// Inserts a single resource into the resource tracker.
+    pub fn set_single(&mut self, resource: Arc<T>) {
+        let index: usize = resource.tracker_index().as_usize();
+
+        self.allow_index(index);
+
+        self.tracker_assert_in_bounds(index);
+    }
+}
+
+impl ResourceUses for AccelerationStructureUses {
+    const EXCLUSIVE: Self = Self::empty();
+
+    type Selector = ();
+
+    fn bits(self) -> u16 {
+        Self::bits(&self) as u16
+    }
+
+    fn all_ordered(self) -> bool {
+        true
+    }
+
+    fn any_exclusive(self) -> bool {
+        self.intersects(Self::EXCLUSIVE)
+    }
+}
\ No newline at end of file
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index d1c2c87dd5..8378ec4573 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -1,4 +1,6 @@
+use std::slice::Iter;
 use std::sync::Arc;
+use std::vec::IntoIter;
 
 /// A tracker that holds strong references to resources.
 ///
@@ -24,3 +26,12 @@ impl<T> StatelessTracker<T> {
         unsafe { self.resources.last().unwrap_unchecked() }
     }
 }
+
+impl<'a, T> IntoIterator for &'a StatelessTracker<T> {
+    type Item = &'a Arc<T>;
+    type IntoIter = Iter<'a, Arc<T>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.resources.as_slice().into_iter()
+    }
+}
diff --git a/wgpu/src/api/bind_group.rs b/wgpu/src/api/bind_group.rs
index 51c1efac74..05e47511db 100644
--- a/wgpu/src/api/bind_group.rs
+++ b/wgpu/src/api/bind_group.rs
@@ -80,6 +80,8 @@ pub enum BindingResource<'a> {
     /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
     /// [`BindGroupLayoutEntry::count`] set to Some.
     TextureViewArray(&'a [&'a TextureView]),
+    /// Todo
+    AccelerationStructure(&'a ray_tracing::Tlas),
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 6d9dda3229..3b8bf54c11 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -3360,12 +3360,12 @@ impl crate::Context for ContextWgpuCore {
         sizes: wgt::BlasGeometrySizeDescriptors,
     ) -> (Self::BlasId, Option<u64>, Self::BlasData) {
         let global = &self.0;
-        let (id, handle, error) = wgc::gfx_select!(device => global.device_create_blas(
+        let (id, handle, error) = global.device_create_blas(
             *device,
             &desc.map_label(|l| l.map(Borrowed)),
             sizes,
             None,
-        ));
+        );
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -3390,11 +3390,11 @@ impl crate::Context for ContextWgpuCore {
         desc: &crate::ray_tracing::CreateTlasDescriptor<'_>,
     ) -> (Self::TlasId, Self::TlasData) {
         let global = &self.0;
-        let (id, error) = wgc::gfx_select!(device => global.device_create_tlas(
+        let (id, error) = global.device_create_tlas(
             *device,
             &desc.map_label(|l| l.map(Borrowed)),
             None,
-        ));
+        );
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -3456,11 +3456,11 @@ impl crate::Context for ContextWgpuCore {
             },
         );
 
-        if let Err(cause) = wgc::gfx_select!(encoder => global.command_encoder_build_acceleration_structures_unsafe_tlas(
+        if let Err(cause) = global.command_encoder_build_acceleration_structures_unsafe_tlas(
             *encoder,
             blas,
             tlas
-        )) {
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -3522,11 +3522,11 @@ impl crate::Context for ContextWgpuCore {
             }
         });
 
-        if let Err(cause) = wgc::gfx_select!(encoder => global.command_encoder_build_acceleration_structures(
+        if let Err(cause) = global.command_encoder_build_acceleration_structures(
             *encoder,
             blas,
             tlas
-        )) {
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -3537,22 +3537,22 @@ impl crate::Context for ContextWgpuCore {
 
     fn blas_destroy(&self, blas: &Self::BlasId, _blas_data: &Self::BlasData) {
         let global = &self.0;
-        let _ = wgc::gfx_select!(blas => global.blas_destroy(*blas));
+        let _ = global.blas_destroy(*blas);
     }
 
     fn blas_drop(&self, blas: &Self::BlasId, _blas_data: &Self::BlasData) {
         let global = &self.0;
-        wgc::gfx_select!(blas => global.blas_drop(*blas, false))
+        global.blas_drop(*blas)
     }
 
     fn tlas_destroy(&self, tlas: &Self::TlasId, _tlas_data: &Self::TlasData) {
         let global = &self.0;
-        let _ = wgc::gfx_select!(tlas => global.tlas_destroy(*tlas));
+        let _ = global.tlas_destroy(*tlas);
     }
 
     fn tlas_drop(&self, tlas: &Self::TlasId, _tlas_data: &Self::TlasData) {
         let global = &self.0;
-        wgc::gfx_select!(tlas => global.tlas_drop(*tlas, false))
+        global.tlas_drop(*tlas)
     }
 }
 

From 49cfa7d73ec2c32a62af743e44172ae635ee98e6 Mon Sep 17 00:00:00 2001
From: Vecvec <vectorsofvectors@gmail.com>
Date: Thu, 15 Aug 2024 11:00:36 +1200
Subject: [PATCH 223/226] fix warnings

---
 wgpu-core/src/command/ray_tracing.rs | 20 ++++++++------------
 wgpu-core/src/device/ray_tracing.rs  | 13 ++++++-------
 wgpu-core/src/lock/rank.rs           |  2 --
 wgpu-core/src/ray_tracing.rs         |  1 -
 wgpu-core/src/resource.rs            | 16 +++++-----------
 wgpu-core/src/track/mod.rs           | 12 ------------
 wgpu-core/src/track/ray_tracing.rs   | 25 +++----------------------
 wgpu-core/src/track/stateless.rs     |  1 -
 8 files changed, 22 insertions(+), 68 deletions(-)

diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index b20592e60f..fc13909bf5 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -1,10 +1,9 @@
 use crate::{
     device::queue::TempResource,
     global::Global,
-    hal_api::HalApi,
     id::CommandEncoderId,
     init_tracker::MemoryInitKind,
-    lock::{Mutex, RwLockReadGuard},
+    lock::RwLockReadGuard,
     ray_tracing::{
         tlas_instance_into_bytes, BlasAction, BlasBuildEntry, BlasGeometries,
         BuildAccelerationStructureError, TlasAction, TlasBuildEntry, TlasPackage,
@@ -17,16 +16,15 @@ use crate::{
 use wgt::{math::align_to, BufferAddress, BufferUsages};
 
 use super::{BakedCommands, CommandBufferMutable, CommandEncoderError};
-use crate::lock::rank;
 use crate::ray_tracing::BlasTriangleGeometry;
 use crate::resource::{AccelerationStructure, Buffer, Labeled, ScratchBuffer, StagingBuffer, Trackable};
 use crate::snatch::SnatchGuard;
 use crate::storage::Storage;
 use crate::track::PendingTransition;
-use hal::{Api, BufferUses, CommandEncoder, Device};
+use hal::BufferUses;
 use std::ops::Deref;
 use std::sync::Arc;
-use std::{cmp::max, iter, num::NonZeroU64, ops::Range, ptr};
+use std::{cmp::max, num::NonZeroU64, ops::Range};
 
 type BufferStorage<'a> = Vec<(
     Arc<Buffer>,
@@ -594,7 +592,7 @@ impl Global {
 
         let mut tlas_descriptors = Vec::with_capacity(tlas_storage.len());
 
-        for &(tlas, ref entries, ref scratch_buffer_offset, ref range) in &tlas_storage {
+        for &(tlas, ref entries, ref scratch_buffer_offset, _) in &tlas_storage {
             if tlas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
                 log::info!("only rebuild implemented")
             }
@@ -625,12 +623,10 @@ impl Global {
 
         if tlas_present {
             let staging_buffer = if !instance_buffer_staging_source.is_empty() {
-                unsafe {
-                    let mut staging_buffer = StagingBuffer::new(device, wgt::BufferSize::new(instance_buffer_staging_source.len() as u64).unwrap()).map_err(crate::device::DeviceError::from)?;
-                    staging_buffer.write(&instance_buffer_staging_source);
-                    let flushed = staging_buffer.flush();
-                    Some(flushed)
-                }
+                let mut staging_buffer = StagingBuffer::new(device, wgt::BufferSize::new(instance_buffer_staging_source.len() as u64).unwrap()).map_err(crate::device::DeviceError::from)?;
+                staging_buffer.write(&instance_buffer_staging_source);
+                let flushed = staging_buffer.flush();
+                Some(flushed)
             } else {
                 None
             };
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index c7d49007c3..7fb845aac9 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -1,12 +1,11 @@
 use std::mem::ManuallyDrop;
 use std::sync::Arc;
 
-use hal::{AccelerationStructureTriangleIndices, Device as _};
+use hal::AccelerationStructureTriangleIndices;
 
 use crate::{
     device::{Device, DeviceError, queue::TempResource},
     global::Global,
-    hal_api::HalApi,
     id::{self, BlasId, TlasId},
     LabelHelpers,
     lock::RwLock,
@@ -15,7 +14,7 @@ use crate::{
 #[cfg(feature = "trace")]
 use crate::device::trace;
 use crate::lock::rank;
-use crate::resource::{Trackable, TrackingData};
+use crate::resource::TrackingData;
 
 impl Device {
     fn create_blas(
@@ -272,7 +271,7 @@ impl Global {
 
         let hub = &self.hub;
 
-        let blas = match hub.blas_s.unregister(blas_id) {
+        let _blas = match hub.blas_s.unregister(blas_id) {
             Some(blas) => blas,
             None => {
                 return;
@@ -280,7 +279,7 @@ impl Global {
         };
 
         #[cfg(feature = "trace")]
-        if let Some(t) = blas.device.trace.lock().as_mut() {
+        if let Some(t) = _blas.device.trace.lock().as_mut() {
             t.add(trace::Action::DestroyBlas(blas_id));
         }
     }
@@ -325,7 +324,7 @@ impl Global {
 
         let hub = &self.hub;
 
-        let tlas = match hub.tlas_s.unregister(tlas_id) {
+        let _tlas = match hub.tlas_s.unregister(tlas_id) {
             Some(tlas) => tlas,
             None => {
                 return;
@@ -333,7 +332,7 @@ impl Global {
         };
 
         #[cfg(feature = "trace")]
-        if let Some(t) = tlas.device.trace.lock().as_mut() {
+        if let Some(t) = _tlas.device.trace.lock().as_mut() {
             t.add(trace::Action::DestroyTlas(tlas_id));
         }
     }
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index 9fa74a7e2d..8b55b27d8f 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -134,11 +134,9 @@ define_lock_ranks! {
     rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
     rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
     rank TEXTURE_VIEWS "Texture::views" followed by { }
-    rank BLAS "Blas::raw" followed by { }
     rank BLAS_BUILT_INDEX "Blas::built_index" followed by { }
     rank TLAS_BUILT_INDEX "Tlas::built_index" followed by { }
     rank TLAS_DEPENDENCIES "Tlas::dependencies" followed by { }
-    rank TLAS_INSTANCE_BUFFER "Tlas::instance_buffer" followed by { }
 
     #[cfg(test)]
     rank PAWN "pawn" followed by { ROOK, BISHOP }
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index fd1bdb8616..f747660855 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -1,7 +1,6 @@
 use crate::{
     command::CommandEncoderError,
     device::DeviceError,
-    hal_api::HalApi,
     id::{BlasId, BufferId, TlasId},
     resource::CreateBufferError,
 };
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 7c710f64f2..237a33d514 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -28,9 +28,8 @@ use std::{
     ptr::NonNull,
     sync::{Arc, Weak},
 };
-use std::cmp::max;
 use std::num::NonZeroU64;
-use hal::{BufferUses, DynAccelerationStructure};
+use hal::BufferUses;
 
 /// Information about the wgpu-core resource.
 ///
@@ -945,7 +944,6 @@ impl Drop for FlushedStagingBuffer {
 pub struct ScratchBuffer {
     raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     device: Arc<Device>,
-    pub(crate) size: wgt::BufferSize,
 }
 
 impl ScratchBuffer {
@@ -964,7 +962,6 @@ impl ScratchBuffer {
         Ok(Self {
             raw: ManuallyDrop::new(raw),
             device: device.clone(),
-            size,
         })
     }
     pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
@@ -1905,13 +1902,11 @@ pub struct Blas {
 
 impl Drop for Blas {
     fn drop(&mut self) {
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl, and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            resource_log!("Destroy raw {}", self.error_ident());
-            // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
-            let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
-            unsafe {
-                self.device.raw().destroy_acceleration_structure(raw);
-            }
+            self.device.raw().destroy_acceleration_structure(raw);
         }
     }
 }
@@ -1947,7 +1942,6 @@ pub struct Tlas {
 impl Drop for Tlas {
     fn drop(&mut self) {
         unsafe {
-            use hal::Device;
             let structure = ManuallyDrop::take(&mut self.raw);
             let buffer = ManuallyDrop::take(&mut self.instance_buffer);
             resource_log!("Destroy raw {}", self.error_ident());
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index a57ebd1e6f..9f15b2edfd 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -123,7 +123,6 @@ pub(crate) use texture::{
     TextureUsageScope, TextureViewBindGroupState,
 };
 use wgt::strict_assert_ne;
-use crate::resource::AccelerationStructure;
 use crate::track::ray_tracing::AccelerationStructureTracker;
 
 #[repr(transparent)]
@@ -305,17 +304,6 @@ impl PendingTransition<hal::TextureUses> {
     }
 }
 
-impl PendingTransition<hal::AccelerationStructureUses> {
-    /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a>(
-        self,
-    ) -> hal::AccelerationStructureBarrier {
-        hal::AccelerationStructureBarrier {
-            usage: self.usage,
-        }
-    }
-}
-
 /// The uses that a resource or subresource can be in.
 pub(crate) trait ResourceUses:
     fmt::Debug + ops::BitAnd<Output = Self> + ops::BitOr<Output = Self> + PartialEq + Sized + Copy
diff --git a/wgpu-core/src/track/ray_tracing.rs b/wgpu-core/src/track/ray_tracing.rs
index 1d673af401..08aa968715 100644
--- a/wgpu-core/src/track/ray_tracing.rs
+++ b/wgpu-core/src/track/ray_tracing.rs
@@ -1,17 +1,15 @@
 use std::sync::Arc;
-use hal::{AccelerationStructureBarrier, AccelerationStructureUses, BufferUses};
+use hal::AccelerationStructureUses;
 use wgt::strict_assert;
-use crate::resource::{AccelerationStructure, Trackable};
+use crate::resource::AccelerationStructure;
 use crate::track::metadata::ResourceMetadata;
-use crate::track::{PendingTransition, ResourceUses};
+use crate::track::ResourceUses;
 
 pub(crate) struct AccelerationStructureTracker<T: AccelerationStructure> {
     start: Vec<AccelerationStructureUses>,
     end: Vec<AccelerationStructureUses>,
 
     metadata: ResourceMetadata<Arc<T>>,
-
-    temp: Vec<PendingTransition<AccelerationStructureUses>>,
 }
 
 impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
@@ -21,8 +19,6 @@ impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
             end: Vec::new(),
 
             metadata: ResourceMetadata::new(),
-
-            temp: Vec::new(),
         }
     }
 
@@ -55,21 +51,6 @@ impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
         self.metadata.contains(acceleration_structure.tracker_index().as_usize())
     }
 
-    /// Returns a list of all buffers tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        self.metadata.owned_resources()
-    }
-
-    /// Drains all currently pending transitions.
-    pub fn drain_transitions<'a, 'b: 'a>(
-        &'b mut self,
-    ) -> impl Iterator<Item = AccelerationStructureBarrier> + 'b {
-        let buffer_barriers = self.temp.drain(..).map(|pending| {
-            pending.into_hal()
-        });
-        buffer_barriers
-    }
-
     /// Inserts a single resource into the resource tracker.
     pub fn set_single(&mut self, resource: Arc<T>) {
         let index: usize = resource.tracker_index().as_usize();
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 8378ec4573..74db1025c6 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -1,6 +1,5 @@
 use std::slice::Iter;
 use std::sync::Arc;
-use std::vec::IntoIter;
 
 /// A tracker that holds strong references to resources.
 ///

From 2810f134649ef222fc3911d31d8692bdf46fc991 Mon Sep 17 00:00:00 2001
From: Vecvec <vectorsofvectors@gmail.com>
Date: Thu, 15 Aug 2024 11:09:59 +1200
Subject: [PATCH 224/226] clippy & fmt

---
 examples/src/ray_cube_compute/mod.rs  |  1 +
 examples/src/ray_cube_fragment/mod.rs |  1 +
 examples/src/ray_scene/mod.rs         |  1 +
 player/src/lib.rs                     |  4 +-
 wgpu-core/src/command/ray_tracing.rs  | 85 ++++++++++++++++-----------
 wgpu-core/src/device/life.rs          | 12 +---
 wgpu-core/src/device/ray_tracing.rs   | 55 ++++++++++-------
 wgpu-core/src/ray_tracing.rs          | 13 +---
 wgpu-core/src/resource.rs             |  4 +-
 wgpu-core/src/track/mod.rs            |  4 +-
 wgpu-core/src/track/ray_tracing.rs    | 11 ++--
 wgpu-core/src/track/stateless.rs      |  2 +-
 wgpu/src/backend/wgpu_core.rs         | 31 ++++------
 13 files changed, 117 insertions(+), 107 deletions(-)

diff --git a/examples/src/ray_cube_compute/mod.rs b/examples/src/ray_cube_compute/mod.rs
index 1cf2dc52de..4d2c238844 100644
--- a/examples/src/ray_cube_compute/mod.rs
+++ b/examples/src/ray_cube_compute/mod.rs
@@ -623,6 +623,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest
     base_test_parameters: wgpu_test::TestParameters {
         required_features: <Example as crate::framework::Example>::required_features(),
         required_limits: <Example as crate::framework::Example>::required_limits(),
+        force_fxc: false,
         skips: vec![],
         failures: Vec::new(),
         required_downlevel_caps:
diff --git a/examples/src/ray_cube_fragment/mod.rs b/examples/src/ray_cube_fragment/mod.rs
index 854d0caa41..cfada8fd21 100644
--- a/examples/src/ray_cube_fragment/mod.rs
+++ b/examples/src/ray_cube_fragment/mod.rs
@@ -388,6 +388,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest
     base_test_parameters: wgpu_test::TestParameters {
         required_features: <Example as crate::framework::Example>::required_features(),
         required_limits: <Example as crate::framework::Example>::required_limits(),
+        force_fxc: false,
         skips: vec![],
         failures: Vec::new(),
         required_downlevel_caps:
diff --git a/examples/src/ray_scene/mod.rs b/examples/src/ray_scene/mod.rs
index ef6a6bfb69..25f663d4bf 100644
--- a/examples/src/ray_scene/mod.rs
+++ b/examples/src/ray_scene/mod.rs
@@ -564,6 +564,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest
     base_test_parameters: wgpu_test::TestParameters {
         required_features: <Example as crate::framework::Example>::required_features(),
         required_limits: <Example as crate::framework::Example>::required_limits(),
+        force_fxc: false,
         skips: vec![],
         failures: Vec::new(),
         required_downlevel_caps:
diff --git a/player/src/lib.rs b/player/src/lib.rs
index c340b828a2..c9ee55aa6d 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -150,7 +150,7 @@ impl GlobalPlay for wgc::global::Global {
                         log::error!("a trace of command_encoder_build_acceleration_structures_unsafe_tlas containing a tlas build is not replayable! skipping tlas build");
                     }
 
-                    self.command_encoder_build_acceleration_structures_unsafe_tlas::<A>(
+                    self.command_encoder_build_acceleration_structures_unsafe_tlas(
                         encoder,
                         blas_iter,
                         std::iter::empty(),
@@ -202,7 +202,7 @@ impl GlobalPlay for wgc::global::Global {
                         }
                     });
 
-                    self.command_encoder_build_acceleration_structures::<A>(
+                    self.command_encoder_build_acceleration_structures(
                         encoder, blas_iter, tlas_iter,
                     )
                     .unwrap();
diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index fc13909bf5..a945bd9fba 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -17,7 +17,9 @@ use wgt::{math::align_to, BufferAddress, BufferUsages};
 
 use super::{BakedCommands, CommandBufferMutable, CommandEncoderError};
 use crate::ray_tracing::BlasTriangleGeometry;
-use crate::resource::{AccelerationStructure, Buffer, Labeled, ScratchBuffer, StagingBuffer, Trackable};
+use crate::resource::{
+    AccelerationStructure, Buffer, Labeled, ScratchBuffer, StagingBuffer, Trackable,
+};
 use crate::snatch::SnatchGuard;
 use crate::storage::Storage;
 use crate::track::PendingTransition;
@@ -35,13 +37,17 @@ type BufferStorage<'a> = Vec<(
     Option<Arc<Blas>>,
 )>;
 
-type BlasStorage<'a> = Vec<(Arc<Blas>, hal::AccelerationStructureEntries<'a, dyn hal::DynBuffer>, u64)>;
+type BlasStorage<'a> = Vec<(
+    Arc<Blas>,
+    hal::AccelerationStructureEntries<'a, dyn hal::DynBuffer>,
+    u64,
+)>;
 
 // This should be queried from the device, maybe the the hal api should pre aline it, since I am unsure how else we can idiomatically get this value.
 const SCRATCH_BUFFER_ALIGNMENT: u32 = 256;
 
 impl Global {
-    pub fn command_encoder_build_acceleration_structures_unsafe_tlas<'a, >(
+    pub fn command_encoder_build_acceleration_structures_unsafe_tlas<'a>(
         &self,
         command_encoder_id: CommandEncoderId,
         blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
@@ -174,7 +180,11 @@ impl Global {
         )?;
 
         let mut scratch_buffer_tlas_size = 0;
-        let mut tlas_storage = Vec::<(&Tlas, hal::AccelerationStructureEntries<dyn hal::DynBuffer>, u64)>::new();
+        let mut tlas_storage = Vec::<(
+            &Tlas,
+            hal::AccelerationStructureEntries<dyn hal::DynBuffer>,
+            u64,
+        )>::new();
         let mut tlas_buf_storage = Vec::<(
             Arc<Buffer>,
             Option<PendingTransition<BufferUses>>,
@@ -246,12 +256,14 @@ impl Global {
             ));
         }
 
-        let scratch_size = match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
-            None => return Ok(()),
-            Some(size) => size,
-        };
+        let scratch_size =
+            match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
+                None => return Ok(()),
+                Some(size) => size,
+            };
 
-        let scratch_buffer = ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
+        let scratch_buffer =
+            ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
 
         let scratch_buffer_barrier = hal::BufferBarrier::<dyn hal::DynBuffer> {
             buffer: scratch_buffer.raw(),
@@ -297,8 +309,7 @@ impl Global {
 
         if tlas_present {
             unsafe {
-                cmd_buf_raw
-                    .build_acceleration_structures(&tlas_descriptors.collect::<Vec<_>>());
+                cmd_buf_raw.build_acceleration_structures(&tlas_descriptors.collect::<Vec<_>>());
 
                 cmd_buf_raw.place_acceleration_structure_barrier(
                     hal::AccelerationStructureBarrier {
@@ -317,7 +328,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_build_acceleration_structures<'a, >(
+    pub fn command_encoder_build_acceleration_structures<'a>(
         &self,
         command_encoder_id: CommandEncoderId,
         blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
@@ -479,11 +490,8 @@ impl Global {
             &mut scratch_buffer_blas_size,
             &mut blas_storage,
         )?;
-        let mut tlas_lock_store = Vec::<(
-            &dyn hal::DynBuffer,
-            Option<TlasPackage>,
-            Arc<Tlas>,
-        )>::new();
+        let mut tlas_lock_store =
+            Vec::<(&dyn hal::DynBuffer, Option<TlasPackage>, Arc<Tlas>)>::new();
 
         for package in tlas_iter {
             let tlas = tlas_guard
@@ -572,13 +580,15 @@ impl Global {
             ));
         }
 
-        let scratch_size = match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
-            // if the size is zero there is nothing to build
-            None => return Ok(()),
-            Some(size) => size,
-        };
+        let scratch_size =
+            match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
+                // if the size is zero there is nothing to build
+                None => return Ok(()),
+                Some(size) => size,
+            };
 
-        let scratch_buffer = ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
+        let scratch_buffer =
+            ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
 
         let scratch_buffer_barrier = hal::BufferBarrier::<dyn hal::DynBuffer> {
             buffer: scratch_buffer.raw(),
@@ -623,7 +633,11 @@ impl Global {
 
         if tlas_present {
             let staging_buffer = if !instance_buffer_staging_source.is_empty() {
-                let mut staging_buffer = StagingBuffer::new(device, wgt::BufferSize::new(instance_buffer_staging_source.len() as u64).unwrap()).map_err(crate::device::DeviceError::from)?;
+                let mut staging_buffer = StagingBuffer::new(
+                    device,
+                    wgt::BufferSize::new(instance_buffer_staging_source.len() as u64).unwrap(),
+                )
+                .map_err(crate::device::DeviceError::from)?;
                 staging_buffer.write(&instance_buffer_staging_source);
                 let flushed = staging_buffer.flush();
                 Some(flushed)
@@ -663,10 +677,7 @@ impl Global {
                     cmd_buf_raw.copy_buffer_to_buffer(
                         // the range whose size we just checked end is at (at that point in time) instance_buffer_staging_source.len()
                         // and since instance_buffer_staging_source doesn't shrink we can un wrap this without a panic
-                        staging_buffer
-                            .as_ref()
-                            .unwrap()
-                            .raw(),
+                        staging_buffer.as_ref().unwrap().raw(),
                         tlas.instance_buffer.as_ref(),
                         &[temp],
                     );
@@ -676,8 +687,7 @@ impl Global {
             unsafe {
                 cmd_buf_raw.transition_buffers(&instance_buffer_barriers);
 
-                cmd_buf_raw
-                    .build_acceleration_structures(&tlas_descriptors);
+                cmd_buf_raw.build_acceleration_structures(&tlas_descriptors);
 
                 cmd_buf_raw.place_acceleration_structure_barrier(
                     hal::AccelerationStructureBarrier {
@@ -900,7 +910,8 @@ fn iter_buffers<'a, 'b>(
     scratch_buffer_blas_size: &mut u64,
     blas_storage: &mut BlasStorage<'a>,
 ) -> Result<(), BuildAccelerationStructureError> {
-    let mut triangle_entries = Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::new();
+    let mut triangle_entries =
+        Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::new();
     for buf in buf_storage {
         let mesh = &buf.4;
         let vertex_buffer = {
@@ -1093,7 +1104,11 @@ fn map_blas<'a>(
         BufferAddress,
     ),
     scratch_buffer: &'a dyn hal::DynBuffer,
-) -> hal::BuildAccelerationStructureDescriptor<'a, dyn hal::DynBuffer, dyn hal::DynAccelerationStructure> {
+) -> hal::BuildAccelerationStructureDescriptor<
+    'a,
+    dyn hal::DynBuffer,
+    dyn hal::DynAccelerationStructure,
+> {
     let (blas, entries, scratch_buffer_offset) = storage;
     if blas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
         log::info!("only rebuild implemented")
@@ -1114,7 +1129,11 @@ fn build_blas<'a>(
     blas_present: bool,
     tlas_present: bool,
     input_barriers: Vec<hal::BufferBarrier<dyn hal::DynBuffer>>,
-    blas_descriptors: &[hal::BuildAccelerationStructureDescriptor<'a, dyn hal::DynBuffer, dyn hal::DynAccelerationStructure>],
+    blas_descriptors: &[hal::BuildAccelerationStructureDescriptor<
+        'a,
+        dyn hal::DynBuffer,
+        dyn hal::DynAccelerationStructure,
+    >],
     scratch_buffer_barrier: hal::BufferBarrier<dyn hal::DynBuffer>,
 ) {
     unsafe {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 4937b45343..588f962000 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -9,9 +9,9 @@ use crate::{
 };
 use smallvec::SmallVec;
 
+use crate::resource::{Blas, Tlas};
 use std::sync::Arc;
 use thiserror::Error;
-use crate::resource::{Blas, Tlas};
 
 /// A command submitted to the GPU for execution.
 ///
@@ -116,10 +116,7 @@ impl ActiveSubmission {
                 return true;
             }
 
-            if encoder
-                .pending_buffers
-                .contains_key(&blas.tracker_index())
-            {
+            if encoder.pending_buffers.contains_key(&blas.tracker_index()) {
                 return true;
             }
         }
@@ -138,10 +135,7 @@ impl ActiveSubmission {
                 return true;
             }
 
-            if encoder
-                .pending_buffers
-                .contains_key(&tlas.tracker_index())
-            {
+            if encoder.pending_buffers.contains_key(&tlas.tracker_index()) {
                 return true;
             }
         }
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index 7fb845aac9..c4b785683d 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -3,18 +3,18 @@ use std::sync::Arc;
 
 use hal::AccelerationStructureTriangleIndices;
 
+#[cfg(feature = "trace")]
+use crate::device::trace;
+use crate::lock::rank;
+use crate::resource::TrackingData;
 use crate::{
-    device::{Device, DeviceError, queue::TempResource},
+    device::{queue::TempResource, Device, DeviceError},
     global::Global,
     id::{self, BlasId, TlasId},
-    LabelHelpers,
     lock::RwLock,
-    ray_tracing::{CreateBlasError, CreateTlasError, get_raw_tlas_instance_size}, resource,
+    ray_tracing::{get_raw_tlas_instance_size, CreateBlasError, CreateTlasError},
+    resource, LabelHelpers,
 };
-#[cfg(feature = "trace")]
-use crate::device::trace;
-use crate::lock::rank;
-use crate::resource::TrackingData;
 
 impl Device {
     fn create_blas(
@@ -25,14 +25,18 @@ impl Device {
         let size_info = match &sizes {
             wgt::BlasGeometrySizeDescriptors::Triangles { desc } => {
                 let mut entries =
-                    Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::with_capacity(desc.len());
+                    Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::with_capacity(
+                        desc.len(),
+                    );
                 for x in desc {
                     if x.index_count.is_some() != x.index_format.is_some() {
                         return Err(CreateBlasError::MissingIndexData);
                     }
                     let indices =
                         x.index_count
-                            .map(|count| AccelerationStructureTriangleIndices::<dyn hal::DynBuffer> {
+                            .map(|count| AccelerationStructureTriangleIndices::<
+                                dyn hal::DynBuffer,
+                            > {
                                 format: x.index_format.unwrap(),
                                 buffer: None,
                                 offset: 0,
@@ -70,7 +74,10 @@ impl Device {
         }
         .map_err(DeviceError::from)?;
 
-        let handle = unsafe { self.raw().get_acceleration_structure_device_address(raw.as_ref()) };
+        let handle = unsafe {
+            self.raw()
+                .get_acceleration_structure_device_address(raw.as_ref())
+        };
 
         Ok(Arc::new(resource::Blas {
             raw: ManuallyDrop::new(raw),
@@ -253,12 +260,10 @@ impl Global {
 
         let temp = TempResource::Blas(blas.clone());
         {
-            let mut device_lock = device
-                .lock_life();
+            let mut device_lock = device.lock_life();
             let last_submit_index = device_lock.get_blas_latest_submission_index(blas.as_ref());
             if let Some(last_submit_index) = last_submit_index {
-                device_lock
-                    .schedule_resource_destruction(temp, last_submit_index);
+                device_lock.schedule_resource_destruction(temp, last_submit_index);
             }
         }
 
@@ -279,8 +284,12 @@ impl Global {
         };
 
         #[cfg(feature = "trace")]
-        if let Some(t) = _blas.device.trace.lock().as_mut() {
-            t.add(trace::Action::DestroyBlas(blas_id));
+        {
+            let mut lock = _blas.device.trace.lock();
+
+            if let Some(t) = lock.as_mut() {
+                t.add(trace::Action::DestroyBlas(blas_id));
+            }
         }
     }
 
@@ -306,12 +315,10 @@ impl Global {
 
         let temp = TempResource::Tlas(tlas.clone());
         {
-            let mut device_lock = device
-                .lock_life();
+            let mut device_lock = device.lock_life();
             let last_submit_index = device_lock.get_tlas_latest_submission_index(tlas.as_ref());
             if let Some(last_submit_index) = last_submit_index {
-                device_lock
-                    .schedule_resource_destruction(temp, last_submit_index);
+                device_lock.schedule_resource_destruction(temp, last_submit_index);
             }
         }
 
@@ -332,8 +339,12 @@ impl Global {
         };
 
         #[cfg(feature = "trace")]
-        if let Some(t) = _tlas.device.trace.lock().as_mut() {
-            t.add(trace::Action::DestroyTlas(tlas_id));
+        {
+            let mut lock = _tlas.device.trace.lock();
+
+            if let Some(t) = lock.as_mut() {
+                t.add(trace::Action::DestroyTlas(tlas_id));
+            }
         }
     }
 }
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index f747660855..a52a750c96 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -270,25 +270,18 @@ struct RawTlasInstance {
     acceleration_structure_reference: u64,
 }
 
-pub(crate) fn tlas_instance_into_bytes(
-    instance: &TlasInstance,
-    blas_address: u64,
-) -> Vec<u8> {
+pub(crate) fn tlas_instance_into_bytes(instance: &TlasInstance, blas_address: u64) -> Vec<u8> {
     // TODO: get the device to do this
     const MAX_U24: u32 = (1u32 << 24u32) - 1u32;
     let temp = RawTlasInstance {
         transform: *instance.transform,
-        custom_index_and_mask: (instance.custom_index & MAX_U24)
-            | (u32::from(instance.mask) << 24),
+        custom_index_and_mask: (instance.custom_index & MAX_U24) | (u32::from(instance.mask) << 24),
         shader_binding_table_record_offset_and_flags: 0,
         acceleration_structure_reference: blas_address,
     };
     let temp: *const _ = &temp;
     unsafe {
-        slice::from_raw_parts::<u8>(
-            temp as *const u8,
-            std::mem::size_of::<RawTlasInstance>(),
-        )
+        slice::from_raw_parts::<u8>(temp.cast::<u8>(), std::mem::size_of::<RawTlasInstance>())
             .to_vec()
     }
 }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 237a33d514..83e45c7f2c 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -20,6 +20,8 @@ use crate::{
 use smallvec::SmallVec;
 use thiserror::Error;
 
+use hal::BufferUses;
+use std::num::NonZeroU64;
 use std::{
     borrow::{Borrow, Cow},
     fmt::Debug,
@@ -28,8 +30,6 @@ use std::{
     ptr::NonNull,
     sync::{Arc, Weak},
 };
-use std::num::NonZeroU64;
-use hal::BufferUses;
 
 /// Information about the wgpu-core resource.
 ///
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 9f15b2edfd..9a66b5f903 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -98,9 +98,9 @@ Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.
 mod buffer;
 mod metadata;
 mod range;
+mod ray_tracing;
 mod stateless;
 mod texture;
-mod ray_tracing;
 
 use crate::{
     binding_model, command,
@@ -113,6 +113,7 @@ use crate::{
 use std::{fmt, ops, sync::Arc};
 use thiserror::Error;
 
+use crate::track::ray_tracing::AccelerationStructureTracker;
 pub(crate) use buffer::{
     BufferBindGroupState, BufferTracker, BufferUsageScope, DeviceBufferTracker,
 };
@@ -123,7 +124,6 @@ pub(crate) use texture::{
     TextureUsageScope, TextureViewBindGroupState,
 };
 use wgt::strict_assert_ne;
-use crate::track::ray_tracing::AccelerationStructureTracker;
 
 #[repr(transparent)]
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
diff --git a/wgpu-core/src/track/ray_tracing.rs b/wgpu-core/src/track/ray_tracing.rs
index 08aa968715..c344526dfb 100644
--- a/wgpu-core/src/track/ray_tracing.rs
+++ b/wgpu-core/src/track/ray_tracing.rs
@@ -1,9 +1,9 @@
-use std::sync::Arc;
-use hal::AccelerationStructureUses;
-use wgt::strict_assert;
 use crate::resource::AccelerationStructure;
 use crate::track::metadata::ResourceMetadata;
 use crate::track::ResourceUses;
+use hal::AccelerationStructureUses;
+use std::sync::Arc;
+use wgt::strict_assert;
 
 pub(crate) struct AccelerationStructureTracker<T: AccelerationStructure> {
     start: Vec<AccelerationStructureUses>,
@@ -48,7 +48,8 @@ impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
 
     /// Returns true if the given buffer is tracked.
     pub fn contains(&self, acceleration_structure: &T) -> bool {
-        self.metadata.contains(acceleration_structure.tracker_index().as_usize())
+        self.metadata
+            .contains(acceleration_structure.tracker_index().as_usize())
     }
 
     /// Inserts a single resource into the resource tracker.
@@ -77,4 +78,4 @@ impl ResourceUses for AccelerationStructureUses {
     fn any_exclusive(self) -> bool {
         self.intersects(Self::EXCLUSIVE)
     }
-}
\ No newline at end of file
+}
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 74db1025c6..975a850f36 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -31,6 +31,6 @@ impl<'a, T> IntoIterator for &'a StatelessTracker<T> {
     type IntoIter = Iter<'a, Arc<T>>;
 
     fn into_iter(self) -> Self::IntoIter {
-        self.resources.as_slice().into_iter()
+        self.resources.as_slice().iter()
     }
 }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 3b8bf54c11..4d8b820f70 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -3360,12 +3360,8 @@ impl crate::Context for ContextWgpuCore {
         sizes: wgt::BlasGeometrySizeDescriptors,
     ) -> (Self::BlasId, Option<u64>, Self::BlasData) {
         let global = &self.0;
-        let (id, handle, error) = global.device_create_blas(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            sizes,
-            None,
-        );
+        let (id, handle, error) =
+            global.device_create_blas(*device, &desc.map_label(|l| l.map(Borrowed)), sizes, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -3390,11 +3386,8 @@ impl crate::Context for ContextWgpuCore {
         desc: &crate::ray_tracing::CreateTlasDescriptor<'_>,
     ) -> (Self::TlasId, Self::TlasData) {
         let global = &self.0;
-        let (id, error) = global.device_create_tlas(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None,
-        );
+        let (id, error) =
+            global.device_create_tlas(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -3456,11 +3449,9 @@ impl crate::Context for ContextWgpuCore {
             },
         );
 
-        if let Err(cause) = global.command_encoder_build_acceleration_structures_unsafe_tlas(
-            *encoder,
-            blas,
-            tlas
-        ) {
+        if let Err(cause) =
+            global.command_encoder_build_acceleration_structures_unsafe_tlas(*encoder, blas, tlas)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -3522,11 +3513,9 @@ impl crate::Context for ContextWgpuCore {
             }
         });
 
-        if let Err(cause) = global.command_encoder_build_acceleration_structures(
-            *encoder,
-            blas,
-            tlas
-        ) {
+        if let Err(cause) =
+            global.command_encoder_build_acceleration_structures(*encoder, blas, tlas)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,

From 78665572e5558432063156293af3d2bf9e27c62e Mon Sep 17 00:00:00 2001
From: Vecvec <vectorsofvectors@gmail.com>
Date: Thu, 15 Aug 2024 13:00:17 +1200
Subject: [PATCH 225/226] re-add matching to get_raw_tlas_instance_size and
 tlas_instance_into_bytes

---
 wgpu-core/src/command/ray_tracing.rs |  2 +-
 wgpu-core/src/device/ray_tracing.rs  |  2 +-
 wgpu-core/src/ray_tracing.rs         | 42 ++++++++++++++++++----------
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index a945bd9fba..02fac9fc9f 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -540,7 +540,7 @@ impl Global {
                 cmd_buf_data.trackers.blas_s.set_single(blas.clone());
 
                 instance_buffer_staging_source
-                    .extend(tlas_instance_into_bytes(&instance, blas.handle));
+                    .extend(tlas_instance_into_bytes(&instance, blas.handle, device.backend()));
 
                 instance_count += 1;
 
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index c4b785683d..7350d5963a 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -123,7 +123,7 @@ impl Device {
         .map_err(DeviceError::from)?;
 
         let instance_buffer_size =
-            get_raw_tlas_instance_size() * std::cmp::max(desc.max_instances, 1) as usize;
+            get_raw_tlas_instance_size(self.backend()) * std::cmp::max(desc.max_instances, 1) as usize;
         let instance_buffer = unsafe {
             self.raw().create_buffer(&hal::BufferDescriptor {
                 label: Some("(wgpu-core) instances_buffer"),
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index a52a750c96..f86159a7f0 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -256,9 +256,13 @@ pub struct TraceTlasPackage {
     pub lowest_unmodified: u32,
 }
 
-pub(crate) fn get_raw_tlas_instance_size() -> usize {
+pub(crate) fn get_raw_tlas_instance_size(backend: wgt::Backend) -> usize {
     // TODO: this should be provided by the backend
-    64
+    match backend {
+        wgt::Backend::Empty => 0,
+        wgt::Backend::Vulkan => 64,
+        _ => unimplemented!(),
+    }
 }
 
 #[derive(Clone)]
@@ -270,18 +274,28 @@ struct RawTlasInstance {
     acceleration_structure_reference: u64,
 }
 
-pub(crate) fn tlas_instance_into_bytes(instance: &TlasInstance, blas_address: u64) -> Vec<u8> {
+pub(crate) fn tlas_instance_into_bytes(instance: &TlasInstance, blas_address: u64, backend: wgt::Backend) -> Vec<u8> {
     // TODO: get the device to do this
-    const MAX_U24: u32 = (1u32 << 24u32) - 1u32;
-    let temp = RawTlasInstance {
-        transform: *instance.transform,
-        custom_index_and_mask: (instance.custom_index & MAX_U24) | (u32::from(instance.mask) << 24),
-        shader_binding_table_record_offset_and_flags: 0,
-        acceleration_structure_reference: blas_address,
-    };
-    let temp: *const _ = &temp;
-    unsafe {
-        slice::from_raw_parts::<u8>(temp.cast::<u8>(), std::mem::size_of::<RawTlasInstance>())
-            .to_vec()
+    match backend {
+        wgt::Backend::Empty => vec![],
+        wgt::Backend::Vulkan => {
+            const MAX_U24: u32 = (1u32 << 24u32) - 1u32;
+            let temp = RawTlasInstance {
+                transform: *instance.transform,
+                custom_index_and_mask: (instance.custom_index & MAX_U24)
+                    | (u32::from(instance.mask) << 24),
+                shader_binding_table_record_offset_and_flags: 0,
+                acceleration_structure_reference: blas_address,
+            };
+            let temp: *const _ = &temp;
+            unsafe {
+                slice::from_raw_parts::<u8>(
+                    temp.cast::<u8>(),
+                    std::mem::size_of::<RawTlasInstance>(),
+                )
+                    .to_vec()
+            }
+        }
+        _ => unimplemented!(),
     }
 }

From af183c5d74212df339cd2a7284538a1376b54e5e Mon Sep 17 00:00:00 2001
From: Vecvec <vectorsofvectors@gmail.com>
Date: Thu, 15 Aug 2024 13:02:26 +1200
Subject: [PATCH 226/226] fmt

---
 wgpu-core/src/command/ray_tracing.rs | 7 +++++--
 wgpu-core/src/device/ray_tracing.rs  | 4 ++--
 wgpu-core/src/ray_tracing.rs         | 8 ++++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index 02fac9fc9f..038ff0ece2 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -539,8 +539,11 @@ impl Global {
 
                 cmd_buf_data.trackers.blas_s.set_single(blas.clone());
 
-                instance_buffer_staging_source
-                    .extend(tlas_instance_into_bytes(&instance, blas.handle, device.backend()));
+                instance_buffer_staging_source.extend(tlas_instance_into_bytes(
+                    &instance,
+                    blas.handle,
+                    device.backend(),
+                ));
 
                 instance_count += 1;
 
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index 7350d5963a..3d1e102b6d 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -122,8 +122,8 @@ impl Device {
         }
         .map_err(DeviceError::from)?;
 
-        let instance_buffer_size =
-            get_raw_tlas_instance_size(self.backend()) * std::cmp::max(desc.max_instances, 1) as usize;
+        let instance_buffer_size = get_raw_tlas_instance_size(self.backend())
+            * std::cmp::max(desc.max_instances, 1) as usize;
         let instance_buffer = unsafe {
             self.raw().create_buffer(&hal::BufferDescriptor {
                 label: Some("(wgpu-core) instances_buffer"),
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index f86159a7f0..678d66e0cd 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -274,7 +274,11 @@ struct RawTlasInstance {
     acceleration_structure_reference: u64,
 }
 
-pub(crate) fn tlas_instance_into_bytes(instance: &TlasInstance, blas_address: u64, backend: wgt::Backend) -> Vec<u8> {
+pub(crate) fn tlas_instance_into_bytes(
+    instance: &TlasInstance,
+    blas_address: u64,
+    backend: wgt::Backend,
+) -> Vec<u8> {
     // TODO: get the device to do this
     match backend {
         wgt::Backend::Empty => vec![],
@@ -293,7 +297,7 @@ pub(crate) fn tlas_instance_into_bytes(instance: &TlasInstance, blas_address: u6
                     temp.cast::<u8>(),
                     std::mem::size_of::<RawTlasInstance>(),
                 )
-                    .to_vec()
+                .to_vec()
             }
         }
         _ => unimplemented!(),