From 82b73408dc94e4b52e2021aad0c3f72a15ae1713 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Fri, 31 Mar 2023 22:52:22 +0200
Subject: [PATCH 1/6] (unvalidated) picking world pos retrieval

---
 crates/re_renderer/examples/picking.rs        |  18 +-
 .../src/allocator/gpu_readback_belt.rs        |   3 +-
 .../src/draw_phases/picking_layer.rs          | 182 ++++++++++++------
 crates/re_renderer/src/wgpu_resources/mod.rs  |   4 +-
 .../src/ui/view_spatial/scene/picking.rs      |   2 +-
 5 files changed, 138 insertions(+), 71 deletions(-)
diff --git a/crates/re_renderer/examples/picking.rs b/crates/re_renderer/examples/picking.rs
index ed21f8f3875f..5344e044d26d 100644
--- a/crates/re_renderer/examples/picking.rs
+++ b/crates/re_renderer/examples/picking.rs
@@ -102,19 +102,19 @@ impl framework::Example for Picking {
             PickingLayerProcessor::next_readback_result::<()>(re_ctx, READBACK_IDENTIFIER)
         {
             // Grab the middle pixel. usually we'd want to do something clever that snaps the the closest object of interest.
-            let picked_pixel = picking_result.picking_data[(picking_result.rect.extent.x / 2
-                + (picking_result.rect.extent.y / 2) * picking_result.rect.extent.x)
-                as usize];
+            let picked_id = picking_result.picked_id(picking_result.rect.extent / 2);
+            //let picked_position =
+            //    picking_result.picked_world_position(picking_result.rect.extent / 2);
+            //dbg!(picked_position, picked_id);
 
             self.mesh_is_hovered = false;
-            if picked_pixel == MESH_ID {
+            if picked_id == MESH_ID {
                 self.mesh_is_hovered = true;
-            } else if picked_pixel.object.0 != 0
-                && picked_pixel.object.0 <= self.point_sets.len() as u64
+            } else if picked_id.object.0 != 0 && picked_id.object.0 <= self.point_sets.len() as u64
             {
-                let point_set = &mut self.point_sets[picked_pixel.object.0 as usize - 1];
-                point_set.radii[picked_pixel.instance.0 as usize] = Size::new_scene(0.1);
-                point_set.colors[picked_pixel.instance.0 as usize] = Color32::DEBUG_COLOR;
+                let point_set = &mut self.point_sets[picked_id.object.0 as usize - 1];
+                point_set.radii[picked_id.instance.0 as usize] = Size::new_scene(0.1);
+                point_set.colors[picked_id.instance.0 as usize] = Color32::DEBUG_COLOR;
             }
         }
 
diff --git a/crates/re_renderer/src/allocator/gpu_readback_belt.rs b/crates/re_renderer/src/allocator/gpu_readback_belt.rs
index 6facafc3e761..8e5f413743e9 100644
--- a/crates/re_renderer/src/allocator/gpu_readback_belt.rs
+++ b/crates/re_renderer/src/allocator/gpu_readback_belt.rs
@@ -86,7 +86,8 @@ impl GpuReadbackBuffer {
                 },
             );
 
-            self.range_in_chunk = start_offset..self.range_in_chunk.end;
+            self.range_in_chunk =
+                (start_offset + buffer_info.buffer_size_padded)..self.range_in_chunk.end;
         }
     }
 
diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs
index 5270f4bf6093..8214a195bd62 100644
--- a/crates/re_renderer/src/draw_phases/picking_layer.rs
+++ b/crates/re_renderer/src/draw_phases/picking_layer.rs
@@ -26,16 +26,54 @@ pub struct PickingResult<T: 'static + Send + Sync> {
     /// Describes the area of the picking layer that was read back.
     pub rect: IntRect,
 
-    /// Picking data for the requested rectangle.
+    /// Picking id data for the requested rectangle.
     ///
     /// GPU internal row padding has already been removed.
     /// Data is stored row wise, left to right, top to bottom.
-    pub picking_data: Vec<PickingLayerId>,
+    pub picking_id_data: Vec<PickingLayerId>,
+
+    /// Picking depth data for the requested rectangle.
+    /// TODO: refer to utility for interpretation
+    ///
+    /// GPU internal row padding has already been removed.
+    /// Data is stored row wise, left to right, top to bottom.
+    pub picking_depth_data: Vec<f32>,
+
+    /// Transforms a position on the picking rect to a world position.
+    world_from_cropped_projection: glam::Mat4,
+}
+
+impl<T: 'static + Send + Sync> PickingResult<T> {
+    /// Returns the picked world position.
+    ///
+    /// Panics if the position is outside of the picking rect.
+    ///
+    /// Keep in mind that the picked position may be (negative) infinity if nothing was picked.
+    #[inline]
+    pub fn picked_world_position(&self, pos_on_picking_rect: glam::UVec2) -> glam::Vec3 {
+        let raw_depth = self.picking_depth_data
+            [(pos_on_picking_rect.y * self.rect.width() + pos_on_picking_rect.x) as usize];
+
+        self.world_from_cropped_projection.project_point3(
+            pixel_coord_to_ndc(pos_on_picking_rect.as_vec2(), self.rect.extent.as_vec2())
+                .extend(raw_depth),
+        )
+    }
+
+    /// Returns the picked picking id.
+    ///
+    /// Panics if the position is outside of the picking rect.
+    #[inline]
+    pub fn picked_id(&self, pos_on_picking_rect: glam::UVec2) -> PickingLayerId {
+        self.picking_id_data
+            [(pos_on_picking_rect.y * self.rect.width() + pos_on_picking_rect.x) as usize]
+    }
 }
 
 /// Type used as user data on the gpu readback belt.
 struct ReadbackBeltMetadata<T: 'static + Send + Sync> {
     picking_rect: IntRect,
+    world_from_cropped_projection: glam::Mat4,
     user_data: T,
 }
 
@@ -76,6 +114,14 @@ impl From<PickingLayerId> for [u32; 4] {
     }
 }
 
+/// Converts a pixel coordinate to normalized device coordinates.
+fn pixel_coord_to_ndc(coord: glam::Vec2, target_resolution: glam::Vec2) -> glam::Vec2 {
+    glam::vec2(
+        coord.x / target_resolution.x * 2.0 - 1.0,
+        1.0 - coord.y / target_resolution.y * 2.0,
+    )
+}
+
 /// Manages the rendering of the picking layer pass, its render targets & readback buffer.
 ///
 /// The view builder creates this for every frame that requests a picking result.
@@ -89,9 +135,8 @@ pub struct PickingLayerProcessor {
 impl PickingLayerProcessor {
     /// The texture format used for the picking layer.
     pub const PICKING_LAYER_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Uint;
-
-    pub const PICKING_LAYER_DEPTH_FORMAT: wgpu::TextureFormat =
-        ViewBuilder::MAIN_TARGET_DEPTH_FORMAT;
+    /// The depth format used for the picking layer - f32 makes it easiest to deal with retrieved depth and is guaranteed to be copyable.
+    pub const PICKING_LAYER_DEPTH_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Depth32Float;
 
     pub const PICKING_LAYER_MSAA_STATE: wgpu::MultisampleState = wgpu::MultisampleState {
         count: 1,
@@ -122,30 +167,6 @@ impl PickingLayerProcessor {
         readback_identifier: GpuReadbackIdentifier,
         readback_user_data: T,
     ) -> Self {
-        let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
-        //let row_info_depth =
-        //Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
-
-        // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
-        // This is "trivially true" if the size of the depth format is a multiple of the size of the id format.
-        debug_assert!(
-            Self::PICKING_LAYER_FORMAT.describe().block_size
-                % Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size
-                == 0
-        );
-        let buffer_size = row_info_id.buffer_size_padded; // + row_info_depth.buffer_size_padded;
-
-        let readback_buffer = ctx.gpu_readback_belt.lock().allocate(
-            &ctx.device,
-            &ctx.gpu_resources.buffers,
-            buffer_size,
-            readback_identifier,
-            Box::new(ReadbackBeltMetadata {
-                picking_rect,
-                user_data: readback_user_data,
-            }),
-        );
-
         let mut picking_target_usage =
             wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC;
         picking_target_usage.set(
@@ -170,7 +191,6 @@ impl PickingLayerProcessor {
             &TextureDesc {
                 label: format!("{view_name} - picking_layer depth").into(),
                 format: Self::PICKING_LAYER_DEPTH_FORMAT,
-                usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
                 ..picking_target.creation_desc
             },
         );
@@ -178,14 +198,11 @@ impl PickingLayerProcessor {
         let rect_min = picking_rect.top_left_corner.as_vec2();
         let rect_max = rect_min + picking_rect.extent.as_vec2();
         let screen_resolution = screen_resolution.as_vec2();
-        let rect_min_ndc = glam::vec2(
-            rect_min.x / screen_resolution.x * 2.0 - 1.0,
-            1.0 - rect_max.y / screen_resolution.y * 2.0,
-        );
-        let rect_max_ndc = glam::vec2(
-            rect_max.x / screen_resolution.x * 2.0 - 1.0,
-            1.0 - rect_min.y / screen_resolution.y * 2.0,
-        );
+        // y axis is flipped in NDC, therefore we need to flip the y axis of the rect.
+        let rect_min_ndc =
+            pixel_coord_to_ndc(glam::vec2(rect_min.x, rect_max.y), screen_resolution);
+        let rect_max_ndc =
+            pixel_coord_to_ndc(glam::vec2(rect_max.x, rect_min.y), screen_resolution);
         let rect_center_ndc = (rect_min_ndc + rect_max_ndc) * 0.5;
         let cropped_projection_from_projection =
             glam::Mat4::from_scale(2.0 / (rect_max_ndc - rect_min_ndc).extend(1.0))
@@ -194,15 +211,16 @@ impl PickingLayerProcessor {
         // Setup frame uniform buffer
         let previous_projection_from_world: glam::Mat4 =
             frame_uniform_buffer_content.projection_from_world.into();
+        let cropped_projection_from_world =
+            cropped_projection_from_projection * previous_projection_from_world;
         let previous_projection_from_view: glam::Mat4 =
             frame_uniform_buffer_content.projection_from_view.into();
+        let cropped_projection_from_view =
+            cropped_projection_from_projection * previous_projection_from_view;
+
         let frame_uniform_buffer_content = FrameUniformBuffer {
-            projection_from_world: (cropped_projection_from_projection
-                * previous_projection_from_world)
-                .into(),
-            projection_from_view: (cropped_projection_from_projection
-                * previous_projection_from_view)
-                .into(),
+            projection_from_world: cropped_projection_from_world.into(),
+            projection_from_view: cropped_projection_from_view.into(),
             ..*frame_uniform_buffer_content
         };
 
@@ -218,6 +236,31 @@ impl PickingLayerProcessor {
             frame_uniform_buffer,
         );
 
+        let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
+        let row_info_depth =
+            Texture2DBufferInfo::new(Self::PICKING_LAYER_DEPTH_FORMAT, picking_rect.extent);
+
+        // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
+        // This is "trivially true" if the size of the depth format is a multiple of the size of the id format.
+        debug_assert!(
+            Self::PICKING_LAYER_FORMAT.describe().block_size
+                % Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size
+                == 0
+        );
+        let buffer_size = row_info_id.buffer_size_padded + row_info_depth.buffer_size_padded;
+
+        let readback_buffer = ctx.gpu_readback_belt.lock().allocate(
+            &ctx.device,
+            &ctx.gpu_resources.buffers,
+            buffer_size,
+            readback_identifier,
+            Box::new(ReadbackBeltMetadata {
+                picking_rect,
+                user_data: readback_user_data,
+                world_from_cropped_projection: cropped_projection_from_world.inverse(),
+            }),
+        );
+
         PickingLayerProcessor {
             bind_group_0,
             picking_target,
@@ -247,7 +290,7 @@ impl PickingLayerProcessor {
                 view: &self.picking_depth.default_view,
                 depth_ops: Some(wgpu::Operations {
                     load: ViewBuilder::DEFAULT_DEPTH_CLEAR,
-                    store: false,
+                    store: true,
                 }),
                 stencil_ops: None,
             }),
@@ -259,18 +302,32 @@ impl PickingLayerProcessor {
     }
 
     pub fn end_render_pass(self, encoder: &mut wgpu::CommandEncoder) {
-        self.readback_buffer.read_texture2d(
+        let extent = glam::uvec2(
+            self.picking_target.texture.width(),
+            self.picking_target.texture.height(),
+        );
+        self.readback_buffer.read_multiple_texture2d(
             encoder,
-            wgpu::ImageCopyTexture {
-                texture: &self.picking_target.texture,
-                mip_level: 0,
-                origin: wgpu::Origin3d::ZERO,
-                aspect: wgpu::TextureAspect::All,
-            },
-            glam::uvec2(
-                self.picking_target.texture.width(),
-                self.picking_target.texture.height(),
-            ),
+            &[
+                (
+                    wgpu::ImageCopyTexture {
+                        texture: &self.picking_target.texture,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d::ZERO,
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    extent,
+                ),
+                (
+                    wgpu::ImageCopyTexture {
+                        texture: &self.picking_depth.texture,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d::ZERO,
+                        aspect: wgpu::TextureAspect::DepthOnly,
+                    },
+                    extent,
+                ),
+            ],
         );
     }
 
@@ -294,13 +351,22 @@ impl PickingLayerProcessor {
                     Self::PICKING_LAYER_FORMAT,
                     metadata.picking_rect.extent,
                 );
+                let buffer_info_depth = Texture2DBufferInfo::new(
+                    Self::PICKING_LAYER_DEPTH_FORMAT,
+                    metadata.picking_rect.extent,
+                );
 
-                let picking_data = buffer_info_id.remove_padding_and_convert(data);
+                let picking_id_data = buffer_info_id
+                    .remove_padding_and_convert(&data[..buffer_info_id.buffer_size_padded as _]);
+                let picking_depth_data = buffer_info_depth
+                    .remove_padding_and_convert(&data[buffer_info_id.buffer_size_padded as _..]);
 
                 result = Some(PickingResult {
-                    picking_data,
+                    picking_id_data,
+                    picking_depth_data,
                     user_data: metadata.user_data,
                     rect: metadata.picking_rect,
+                    world_from_cropped_projection: metadata.world_from_cropped_projection,
                 });
             });
         result
diff --git a/crates/re_renderer/src/wgpu_resources/mod.rs b/crates/re_renderer/src/wgpu_resources/mod.rs
index 403e46350b47..06f30b6292d3 100644
--- a/crates/re_renderer/src/wgpu_resources/mod.rs
+++ b/crates/re_renderer/src/wgpu_resources/mod.rs
@@ -169,7 +169,7 @@ impl Texture2DBufferInfo {
     pub fn remove_padding<'a>(&self, buffer: &'a [u8]) -> Cow<'a, [u8]> {
         crate::profile_function!();
 
-        assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded);
+        assert_eq!(buffer.len() as wgpu::BufferAddress, self.buffer_size_padded);
 
         if self.bytes_per_row_padded == self.bytes_per_row_unpadded {
             return Cow::Borrowed(buffer);
@@ -196,7 +196,7 @@ impl Texture2DBufferInfo {
     pub fn remove_padding_and_convert<T: bytemuck::Pod>(&self, buffer: &[u8]) -> Vec<T> {
         crate::profile_function!();
 
-        assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded);
+        assert_eq!(buffer.len() as wgpu::BufferAddress, self.buffer_size_padded);
         assert!(self.bytes_per_row_unpadded % std::mem::size_of::<T>() as u32 == 0);
 
         // Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned,
diff --git a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
index 5585e555f5a0..196eaaf827a1 100644
--- a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
+++ b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
@@ -208,7 +208,7 @@ pub fn picking(
         if let Some(gpu_picking_result) = gpu_picking_result {
             // TODO(andreas): Pick middle pixel for now. But we soon want to snap to the closest object using a bigger picking rect.
             let rect = gpu_picking_result.rect;
-            let picked_id = gpu_picking_result.picking_data
+            let picked_id = gpu_picking_result.picking_id_data
                 [(rect.width() / 2 + (rect.height() / 2) * rect.width()) as usize];
             let picked_object = instance_path_hash_from_picking_layer_id(picked_id);
 

From 2149b16e745e6e6099caa11955ee49150e51b9a4 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Sat, 1 Apr 2023 00:20:57 +0200
Subject: [PATCH 2/6] gpu picking in the viewer picks up depth now

---
 .../src/ui/view_spatial/scene/picking.rs        | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
index 196eaaf827a1..84179c62fb81 100644
--- a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
+++ b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
@@ -207,14 +207,19 @@ pub fn picking(
     if state.closest_opaque_pick.instance_path_hash == InstancePathHash::NONE {
         if let Some(gpu_picking_result) = gpu_picking_result {
             // TODO(andreas): Pick middle pixel for now. But we soon want to snap to the closest object using a bigger picking rect.
-            let rect = gpu_picking_result.rect;
-            let picked_id = gpu_picking_result.picking_id_data
-                [(rect.width() / 2 + (rect.height() / 2) * rect.width()) as usize];
+            let picking_rect_position = gpu_picking_result.rect.extent / 2;
+            let picked_id = gpu_picking_result.picked_id(picking_rect_position);
             let picked_object = instance_path_hash_from_picking_layer_id(picked_id);
 
-            // TODO(andreas): We're lacking depth information!
-            state.closest_opaque_pick.instance_path_hash = picked_object;
-            state.closest_opaque_pick.used_gpu_picking = true;
+            // It is old data, the object might be gone by now!
+            if picked_object.is_some() {
+                // TODO(andreas): Once this is the primary path we should not awkwardly reconstruct the ray_t here. It's entirely correct either!
+                state.closest_opaque_pick.ray_t = gpu_picking_result
+                    .picked_world_position(picking_rect_position)
+                    .distance(context.ray_in_world.origin);
+                state.closest_opaque_pick.instance_path_hash = picked_object;
+                state.closest_opaque_pick.used_gpu_picking = true;
+            }
         } else {
             // It is possible that some frames we don't get a picking result and the frame after we get several.
             // We need to cache the last picking result and use it until we get a new one or the mouse leaves the screen.

From 5ad8c09cb9eab24e02bfce1144a03eb8c29f132b Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 3 Apr 2023 12:55:40 +0200
Subject: [PATCH 3/6] WebGL workarounds. * can't read depth formats * and all
 the other weird things, see https://github.com/gfx-rs/wgpu/issues/3644

---
 crates/re_renderer/shader/copy_texture.wgsl   |  15 +
 crates/re_renderer/src/config.rs              |  20 +-
 .../src/draw_phases/picking_layer.rs          | 258 ++++++++++++++++--
 .../re_renderer/src/renderer/debug_overlay.rs |   4 +-
 crates/re_renderer/src/view_builder.rs        |   2 +-
 crates/re_renderer/src/workspace_shaders.rs   |   6 +
 6 files changed, 281 insertions(+), 24 deletions(-)
 create mode 100644 crates/re_renderer/shader/copy_texture.wgsl

diff --git a/crates/re_renderer/shader/copy_texture.wgsl b/crates/re_renderer/shader/copy_texture.wgsl
new file mode 100644
index 000000000000..aaa5bb4c36b6
--- /dev/null
+++ b/crates/re_renderer/shader/copy_texture.wgsl
@@ -0,0 +1,15 @@
+// Reads the content of a texture and writes it out as is.
+//
+// This is needed e.g. on WebGL to convert from a depth format to a regular color format that can be read back to the CPU.
+
+#import <./types.wgsl>
+#import <./global_bindings.wgsl>
+#import <./screen_triangle_vertex.wgsl>
+
+@group(1) @binding(0)
+var tex: texture_2d<f32>;
+
+@fragment
+fn main(in: FragmentInput) -> @location(0) Vec4 {
+    return textureSample(tex, nearest_sampler, in.texcoord);
+}
diff --git a/crates/re_renderer/src/config.rs b/crates/re_renderer/src/config.rs
index d168ab274373..9d4318ce9b5e 100644
--- a/crates/re_renderer/src/config.rs
+++ b/crates/re_renderer/src/config.rs
@@ -5,7 +5,7 @@
 #[derive(Clone, Copy, Debug)]
 pub enum HardwareTier {
     /// For WebGL and native OpenGL. Maintains strict WebGL capability.
-    Basic,
+    Web,
 
     /// Run natively with Vulkan/Metal but don't demand anything that isn't widely available.
     Native,
@@ -17,7 +17,15 @@ impl HardwareTier {
     /// Whether the current hardware tier supports sampling from textures with a sample count higher than 1.
     pub fn support_sampling_msaa_texture(&self) -> bool {
         match self {
-            HardwareTier::Basic => false,
+            HardwareTier::Web => false,
+            HardwareTier::Native => true,
+        }
+    }
+
+    /// Whether the current hardware tier supports sampling from textures with a sample count higher than 1.
+    pub fn support_depth_readback(&self) -> bool {
+        match self {
+            HardwareTier::Web => false,
             HardwareTier::Native => true,
         }
     }
@@ -27,7 +35,7 @@ impl Default for HardwareTier {
     fn default() -> Self {
         // Use "Basic" tier for actual web but also if someone forces the GL backend!
         if supported_backends() == wgpu::Backends::GL {
-            HardwareTier::Basic
+            HardwareTier::Web
         } else {
             HardwareTier::Native
         }
@@ -63,7 +71,11 @@ impl HardwareTier {
     /// Downlevel features required by the given tier.
     pub fn required_downlevel_capabilities(self) -> wgpu::DownlevelCapabilities {
         wgpu::DownlevelCapabilities {
-            flags: wgpu::DownlevelFlags::empty(),
+            flags: match self {
+                HardwareTier::Web => wgpu::DownlevelFlags::empty(),
+                // Require fully WebGPU compliance for the native tier.
+                HardwareTier::Native => wgpu::DownlevelFlags::all(),
+            },
             limits: Default::default(), // unused so far both here and in wgpu
             shader_model: wgpu::ShaderModel::Sm4,
         }
diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs
index 8214a195bd62..4311c02ad8be 100644
--- a/crates/re_renderer/src/draw_phases/picking_layer.rs
+++ b/crates/re_renderer/src/draw_phases/picking_layer.rs
@@ -12,11 +12,18 @@
 use crate::{
     allocator::create_and_fill_uniform_buffer,
     global_bindings::FrameUniformBuffer,
+    include_shader_module,
     view_builder::ViewBuilder,
-    wgpu_resources::{GpuBindGroup, GpuTexture, Texture2DBufferInfo, TextureDesc},
+    wgpu_resources::{
+        BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuRenderPipelineHandle,
+        GpuTexture, GpuTextureHandle, PipelineLayoutDesc, PoolError, RenderPipelineDesc,
+        Texture2DBufferInfo, TextureDesc, WgpuResourcePools,
+    },
     DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, IntRect, RenderContext,
 };
 
+use smallvec::smallvec;
+
 /// GPU retrieved & processed picking data result.
 pub struct PickingResult<T: 'static + Send + Sync> {
     /// User data supplied on picking request.
@@ -33,13 +40,14 @@ pub struct PickingResult<T: 'static + Send + Sync> {
     pub picking_id_data: Vec<PickingLayerId>,
 
     /// Picking depth data for the requested rectangle.
-    /// TODO: refer to utility for interpretation
+    ///
+    /// Use [`PickingResult::picked_world_position`] for easy interpretation of the data.
     ///
     /// GPU internal row padding has already been removed.
     /// Data is stored row wise, left to right, top to bottom.
     pub picking_depth_data: Vec<f32>,
 
-    /// Transforms a position on the picking rect to a world position.
+    /// Transforms a NDC position on the picking rect to a world position.
     world_from_cropped_projection: glam::Mat4,
 }
 
@@ -75,6 +83,8 @@ struct ReadbackBeltMetadata<T: 'static + Send + Sync> {
     picking_rect: IntRect,
     world_from_cropped_projection: glam::Mat4,
     user_data: T,
+
+    depth_readback_workaround_in_use: bool,
 }
 
 /// The first 64bit of the picking layer.
@@ -115,7 +125,7 @@ impl From<PickingLayerId> for [u32; 4] {
 }
 
 /// Converts a pixel coordinate to normalized device coordinates.
-fn pixel_coord_to_ndc(coord: glam::Vec2, target_resolution: glam::Vec2) -> glam::Vec2 {
+pub fn pixel_coord_to_ndc(coord: glam::Vec2, target_resolution: glam::Vec2) -> glam::Vec2 {
     glam::vec2(
         coord.x / target_resolution.x * 2.0 - 1.0,
         1.0 - coord.y / target_resolution.y * 2.0,
@@ -127,9 +137,11 @@ fn pixel_coord_to_ndc(coord: glam::Vec2, target_resolution: glam::Vec2) -> glam:
 /// The view builder creates this for every frame that requests a picking result.
 pub struct PickingLayerProcessor {
     pub picking_target: GpuTexture,
-    picking_depth: GpuTexture,
+    picking_depth_target: GpuTexture,
     readback_buffer: GpuReadbackBuffer,
     bind_group_0: GpuBindGroup,
+
+    depth_readback_workaround: Option<DepthReadbackWorkaround>,
 }
 
 impl PickingLayerProcessor {
@@ -186,15 +198,31 @@ impl PickingLayerProcessor {
                 usage: picking_target_usage,
             },
         );
-        let picking_depth = ctx.gpu_resources.textures.alloc(
+
+        let direct_depth_readback = ctx
+            .shared_renderer_data
+            .config
+            .hardware_tier
+            .support_depth_readback();
+
+        let picking_depth_target = ctx.gpu_resources.textures.alloc(
             &ctx.device,
             &TextureDesc {
-                label: format!("{view_name} - picking_layer depth").into(),
+                label: format!("{view_name} - picking_layer depth target").into(),
                 format: Self::PICKING_LAYER_DEPTH_FORMAT,
+                usage: if direct_depth_readback {
+                    wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC
+                } else {
+                    wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING
+                },
                 ..picking_target.creation_desc
             },
         );
 
+        let depth_readback_workaround = (!direct_depth_readback).then(|| {
+            DepthReadbackWorkaround::new(ctx, picking_rect.extent, picking_depth_target.handle)
+        });
+
         let rect_min = picking_rect.top_left_corner.as_vec2();
         let rect_max = rect_min + picking_rect.extent.as_vec2();
         let screen_resolution = screen_resolution.as_vec2();
@@ -237,8 +265,14 @@ impl PickingLayerProcessor {
         );
 
         let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
-        let row_info_depth =
-            Texture2DBufferInfo::new(Self::PICKING_LAYER_DEPTH_FORMAT, picking_rect.extent);
+        let row_info_depth = Texture2DBufferInfo::new(
+            if direct_depth_readback {
+                Self::PICKING_LAYER_DEPTH_FORMAT
+            } else {
+                DepthReadbackWorkaround::READBACK_FORMAT
+            },
+            picking_rect.extent,
+        );
 
         // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
         // This is "trivially true" if the size of the depth format is a multiple of the size of the id format.
@@ -258,14 +292,16 @@ impl PickingLayerProcessor {
                 picking_rect,
                 user_data: readback_user_data,
                 world_from_cropped_projection: cropped_projection_from_world.inverse(),
+                depth_readback_workaround_in_use: depth_readback_workaround.is_some(),
             }),
         );
 
         PickingLayerProcessor {
             bind_group_0,
             picking_target,
-            picking_depth,
+            picking_depth_target,
             readback_buffer,
+            depth_readback_workaround,
         }
     }
 
@@ -287,10 +323,10 @@ impl PickingLayerProcessor {
                 },
             })],
             depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
-                view: &self.picking_depth.default_view,
+                view: &self.picking_depth_target.default_view,
                 depth_ops: Some(wgpu::Operations {
                     load: ViewBuilder::DEFAULT_DEPTH_CLEAR,
-                    store: true,
+                    store: true, // Store for readback!
                 }),
                 stencil_ops: None,
             }),
@@ -301,11 +337,24 @@ impl PickingLayerProcessor {
         pass
     }
 
-    pub fn end_render_pass(self, encoder: &mut wgpu::CommandEncoder) {
+    pub fn end_render_pass(
+        self,
+        encoder: &mut wgpu::CommandEncoder,
+        pools: &WgpuResourcePools,
+    ) -> Result<(), PoolError> {
         let extent = glam::uvec2(
             self.picking_target.texture.width(),
             self.picking_target.texture.height(),
         );
+
+        let readable_depth_texture = if let Some(depth_copy_workaround) =
+            self.depth_readback_workaround.as_ref()
+        {
+            depth_copy_workaround.copy_to_readable_texture(encoder, pools, &self.bind_group_0)?
+        } else {
+            &self.picking_depth_target
+        };
+
         self.readback_buffer.read_multiple_texture2d(
             encoder,
             &[
@@ -320,15 +369,17 @@ impl PickingLayerProcessor {
                 ),
                 (
                     wgpu::ImageCopyTexture {
-                        texture: &self.picking_depth.texture,
+                        texture: &readable_depth_texture.texture,
                         mip_level: 0,
                         origin: wgpu::Origin3d::ZERO,
-                        aspect: wgpu::TextureAspect::DepthOnly,
+                        aspect: wgpu::TextureAspect::All,
                     },
                     extent,
                 ),
             ],
         );
+
+        Ok(())
     }
 
     /// Returns the oldest received picking results for a given identifier and user data type.
@@ -347,20 +398,46 @@ impl PickingLayerProcessor {
         ctx.gpu_readback_belt
             .lock()
             .readback_data::<ReadbackBeltMetadata<T>>(identifier, |data, metadata| {
+                // Assert that our texture data reinterpretation works out from a pixel size point of view.
+                debug_assert_eq!(
+                    Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size as usize,
+                    std::mem::size_of::<f32>()
+                );
+                debug_assert_eq!(
+                    Self::PICKING_LAYER_FORMAT.describe().block_size as usize,
+                    std::mem::size_of::<PickingLayerId>()
+                );
+
                 let buffer_info_id = Texture2DBufferInfo::new(
                     Self::PICKING_LAYER_FORMAT,
                     metadata.picking_rect.extent,
                 );
                 let buffer_info_depth = Texture2DBufferInfo::new(
-                    Self::PICKING_LAYER_DEPTH_FORMAT,
+                    if metadata.depth_readback_workaround_in_use {
+                        DepthReadbackWorkaround::READBACK_FORMAT
+                    } else {
+                        Self::PICKING_LAYER_DEPTH_FORMAT
+                    },
                     metadata.picking_rect.extent,
                 );
 
                 let picking_id_data = buffer_info_id
                     .remove_padding_and_convert(&data[..buffer_info_id.buffer_size_padded as _]);
-                let picking_depth_data = buffer_info_depth
+                let mut picking_depth_data = buffer_info_depth
                     .remove_padding_and_convert(&data[buffer_info_id.buffer_size_padded as _..]);
 
+                if metadata.depth_readback_workaround_in_use {
+                    // Can't read back depth textures & can't read back R32Float textures either!
+                    // See https://github.com/gfx-rs/wgpu/issues/3644
+                    debug_assert_eq!(
+                        DepthReadbackWorkaround::READBACK_FORMAT
+                            .describe()
+                            .block_size as usize,
+                        std::mem::size_of::<f32>() * 4
+                    );
+                    picking_depth_data = picking_depth_data.into_iter().step_by(4).collect();
+                }
+
                 result = Some(PickingResult {
                     picking_id_data,
                     picking_depth_data,
@@ -372,3 +449,150 @@ impl PickingLayerProcessor {
         result
     }
 }
+
+/// Utility for copying a depth texture when it can't be read-back directly to a [`wgpu::TextureFormat::R32Float`] which is readable texture.
+///
+/// Implementation note:
+/// This is a plain & simple "sample in shader and write to texture" utility.
+/// It might be worth abstracting this further into a general purpose operator.
+/// There is not much in here that is specific to the depth usecase!
+struct DepthReadbackWorkaround {
+    render_pipeline: GpuRenderPipelineHandle,
+    bind_group: GpuBindGroup,
+    readable_texture: GpuTexture,
+}
+
+impl DepthReadbackWorkaround {
+    /// There's two layers of workarounds here:
+    /// * WebGL (via spec) not being able to read back depth textures
+    /// * unclear behavior for any readback that isn't RGBA
+    ///     Furthermore, integer textures also seemed to be problematic, but it seems to work fine for RgbaU32 which we use for our picking ID
+    ///     Details see [wgpu#3644](https://github.com/gfx-rs/wgpu/issues/3644)
+    const READBACK_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Float;
+
+    fn new(
+        ctx: &mut RenderContext,
+        extent: glam::UVec2,
+        depth_target_handle: GpuTextureHandle,
+    ) -> DepthReadbackWorkaround {
+        let readable_texture = ctx.gpu_resources.textures.alloc(
+            &ctx.device,
+            &TextureDesc {
+                label: "DepthCopyWorkaround::readable_texture".into(),
+                format: Self::READBACK_FORMAT,
+                usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+                size: wgpu::Extent3d {
+                    width: extent.x,
+                    height: extent.y,
+                    depth_or_array_layers: 1,
+                },
+                mip_level_count: 1,
+                sample_count: 1,
+                dimension: wgpu::TextureDimension::D2,
+            },
+        );
+
+        let bind_group_layout = ctx.gpu_resources.bind_group_layouts.get_or_create(
+            &ctx.device,
+            &BindGroupLayoutDesc {
+                label: "DepthCopyWorkaround::bind_group_layout".into(),
+                entries: vec![wgpu::BindGroupLayoutEntry {
+                    binding: 0,
+                    visibility: wgpu::ShaderStages::FRAGMENT,
+                    ty: wgpu::BindingType::Texture {
+                        sample_type: wgpu::TextureSampleType::Float { filterable: false },
+                        view_dimension: wgpu::TextureViewDimension::D2,
+                        multisampled: false,
+                    },
+                    count: None,
+                }],
+            },
+        );
+
+        let bind_group = ctx.gpu_resources.bind_groups.alloc(
+            &ctx.device,
+            &ctx.gpu_resources,
+            &BindGroupDesc {
+                label: "DepthCopyWorkaround::bind_group".into(),
+                entries: smallvec![BindGroupEntry::DefaultTextureView(depth_target_handle)],
+                layout: bind_group_layout,
+            },
+        );
+
+        let render_pipeline = ctx.gpu_resources.render_pipelines.get_or_create(
+            &ctx.device,
+            &RenderPipelineDesc {
+                label: "DepthCopyWorkaround::render_pipeline".into(),
+                pipeline_layout: ctx.gpu_resources.pipeline_layouts.get_or_create(
+                    &ctx.device,
+                    &PipelineLayoutDesc {
+                        label: "DepthCopyWorkaround::render_pipeline".into(),
+                        entries: vec![
+                            ctx.shared_renderer_data.global_bindings.layout,
+                            bind_group_layout,
+                        ],
+                    },
+                    &ctx.gpu_resources.bind_group_layouts,
+                ),
+                vertex_entrypoint: "main".into(),
+                vertex_handle: ctx.gpu_resources.shader_modules.get_or_create(
+                    &ctx.device,
+                    &mut ctx.resolver,
+                    &include_shader_module!("../../shader/screen_triangle.wgsl"),
+                ),
+                fragment_entrypoint: "main".into(),
+                fragment_handle: ctx.gpu_resources.shader_modules.get_or_create(
+                    &ctx.device,
+                    &mut ctx.resolver,
+                    &include_shader_module!("../../shader/copy_texture.wgsl"),
+                ),
+                vertex_buffers: smallvec![],
+                render_targets: smallvec![Some(readable_texture.texture.format().into())],
+                primitive: wgpu::PrimitiveState {
+                    topology: wgpu::PrimitiveTopology::TriangleStrip,
+                    cull_mode: None,
+                    ..Default::default()
+                },
+                depth_stencil: None,
+                multisample: wgpu::MultisampleState::default(),
+            },
+            &ctx.gpu_resources.pipeline_layouts,
+            &ctx.gpu_resources.shader_modules,
+        );
+
+        Self {
+            render_pipeline,
+            bind_group,
+            readable_texture,
+        }
+    }
+
+    fn copy_to_readable_texture(
+        &self,
+        encoder: &mut wgpu::CommandEncoder,
+        pools: &WgpuResourcePools,
+        global_binding_bind_group: &GpuBindGroup,
+    ) -> Result<&GpuTexture, PoolError> {
+        // Copy depth texture to a readable (color) texture with a screen filling triangle.
+        let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: DebugLabel::from("Depth copy workaround").get(),
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                view: &self.readable_texture.default_view,
+                resolve_target: None,
+                ops: wgpu::Operations {
+                    load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
+                    store: true, // Store for readback!
+                },
+            })],
+            depth_stencil_attachment: None,
+        });
+
+        let pipeline = pools.render_pipelines.get_resource(self.render_pipeline)?;
+        pass.set_pipeline(pipeline);
+        pass.set_bind_group(0, global_binding_bind_group, &[]);
+        pass.set_bind_group(1, &self.bind_group, &[]);
+        pass.draw(0..3, 0..1);
+
+        Ok(&self.readable_texture)
+    }
+}
diff --git a/crates/re_renderer/src/renderer/debug_overlay.rs b/crates/re_renderer/src/renderer/debug_overlay.rs
index f7dc4a4dbcec..276f8fc413da 100644
--- a/crates/re_renderer/src/renderer/debug_overlay.rs
+++ b/crates/re_renderer/src/renderer/debug_overlay.rs
@@ -189,7 +189,7 @@ impl Renderer for DebugOverlayRenderer {
         );
         let render_pipeline = pools.render_pipelines.get_or_create(
             device,
-            &(RenderPipelineDesc {
+            &RenderPipelineDesc {
                 label: "DebugOverlayDrawData::render_pipeline_regular".into(),
                 pipeline_layout: pools.pipeline_layouts.get_or_create(
                     device,
@@ -212,7 +212,7 @@ impl Renderer for DebugOverlayRenderer {
                 },
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
-            }),
+            },
             &pools.pipeline_layouts,
             &pools.shader_modules,
         );
diff --git a/crates/re_renderer/src/view_builder.rs b/crates/re_renderer/src/view_builder.rs
index 49fde9b8bf09..704f0a22f7be 100644
--- a/crates/re_renderer/src/view_builder.rs
+++ b/crates/re_renderer/src/view_builder.rs
@@ -582,7 +582,7 @@ impl ViewBuilder {
                 //pass.set_bind_group(0, &setup.bind_group_0, &[]);
                 self.draw_phase(ctx, DrawPhase::PickingLayer, &mut pass);
             }
-            picking_processor.end_render_pass(&mut encoder);
+            picking_processor.end_render_pass(&mut encoder, &ctx.gpu_resources)?;
         }
 
         if let Some(outline_mask_processor) = self.outline_mask_processor.take() {
diff --git a/crates/re_renderer/src/workspace_shaders.rs b/crates/re_renderer/src/workspace_shaders.rs
index c2f37fe7ae41..0dd9316c8c4c 100644
--- a/crates/re_renderer/src/workspace_shaders.rs
+++ b/crates/re_renderer/src/workspace_shaders.rs
@@ -25,6 +25,12 @@ pub fn init() {
         fs.create_file(virtpath, content).unwrap();
     }
 
+    {
+        let virtpath = Path::new("shader/copy_texture.wgsl");
+        let content = include_str!("../shader/copy_texture.wgsl").into();
+        fs.create_file(virtpath, content).unwrap();
+    }
+
     {
         let virtpath = Path::new("shader/debug_overlay.wgsl");
         let content = include_str!("../shader/debug_overlay.wgsl").into();

From 33755da0ab4e985efb9c09fcdf7501a4acabfd8e Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 3 Apr 2023 16:39:32 +0200
Subject: [PATCH 4/6] lint fix

---
 crates/re_renderer/src/draw_phases/picking_layer.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs
index 4311c02ad8be..ce6e508adc6c 100644
--- a/crates/re_renderer/src/draw_phases/picking_layer.rs
+++ b/crates/re_renderer/src/draw_phases/picking_layer.rs
@@ -147,6 +147,7 @@ pub struct PickingLayerProcessor {
 impl PickingLayerProcessor {
     /// The texture format used for the picking layer.
     pub const PICKING_LAYER_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Uint;
+
     /// The depth format used for the picking layer - f32 makes it easiest to deal with retrieved depth and is guaranteed to be copyable.
     pub const PICKING_LAYER_DEPTH_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Depth32Float;
 

From eaf082ff688391a27328c47eb1bdc7a7a4cc5378 Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Mon, 3 Apr 2023 16:48:27 +0200
Subject: [PATCH 5/6] doc string fox

---
 crates/re_renderer/src/draw_phases/picking_layer.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs
index ce6e508adc6c..7999f301b5cb 100644
--- a/crates/re_renderer/src/draw_phases/picking_layer.rs
+++ b/crates/re_renderer/src/draw_phases/picking_layer.rs
@@ -467,7 +467,8 @@ impl DepthReadbackWorkaround {
     /// There's two layers of workarounds here:
     /// * WebGL (via spec) not being able to read back depth textures
     /// * unclear behavior for any readback that isn't RGBA
-    ///     Furthermore, integer textures also seemed to be problematic, but it seems to work fine for RgbaU32 which we use for our picking ID
+    ///     Furthermore, integer textures also seemed to be problematic,
+    ///     but it seems to work fine for [`wgpu::TextureFormat::Rgba32Uint`] which we use for our picking ID
     ///     Details see [wgpu#3644](https://github.com/gfx-rs/wgpu/issues/3644)
     const READBACK_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Float;
 

From 97785881e66bf19592ddb47e591d6fa8362afd8a Mon Sep 17 00:00:00 2001
From: Andreas Reich <r_andreas2@web.de>
Date: Tue, 4 Apr 2023 09:59:48 +0200
Subject: [PATCH 6/6] improve comment, rename

---
 crates/re_renderer/src/draw_phases/picking_layer.rs   | 8 ++++----
 crates/re_viewer/src/ui/view_spatial/scene/picking.rs | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs
index 7999f301b5cb..69b125529b2d 100644
--- a/crates/re_renderer/src/draw_phases/picking_layer.rs
+++ b/crates/re_renderer/src/draw_phases/picking_layer.rs
@@ -35,16 +35,16 @@ pub struct PickingResult<T: 'static + Send + Sync> {
 
     /// Picking id data for the requested rectangle.
     ///
-    /// GPU internal row padding has already been removed.
-    /// Data is stored row wise, left to right, top to bottom.
+    /// GPU internal row padding has already been removed from this buffer.
+    /// Pixel data is stored in the normal fashion - row wise, left to right, top to bottom.
     pub picking_id_data: Vec<PickingLayerId>,
 
     /// Picking depth data for the requested rectangle.
     ///
     /// Use [`PickingResult::picked_world_position`] for easy interpretation of the data.
     ///
-    /// GPU internal row padding has already been removed.
-    /// Data is stored row wise, left to right, top to bottom.
+    /// GPU internal row padding has already been removed from this buffer.
+    /// Pixel data is stored in the normal fashion - row wise, left to right, top to bottom.
     pub picking_depth_data: Vec<f32>,
 
     /// Transforms a NDC position on the picking rect to a world position.
diff --git a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
index 84179c62fb81..ea813f460c91 100644
--- a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
+++ b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs
@@ -207,15 +207,15 @@ pub fn picking(
     if state.closest_opaque_pick.instance_path_hash == InstancePathHash::NONE {
         if let Some(gpu_picking_result) = gpu_picking_result {
             // TODO(andreas): Pick middle pixel for now. But we soon want to snap to the closest object using a bigger picking rect.
-            let picking_rect_position = gpu_picking_result.rect.extent / 2;
-            let picked_id = gpu_picking_result.picked_id(picking_rect_position);
+            let pos_on_picking_rect = gpu_picking_result.rect.extent / 2;
+            let picked_id = gpu_picking_result.picked_id(pos_on_picking_rect);
             let picked_object = instance_path_hash_from_picking_layer_id(picked_id);
 
             // It is old data, the object might be gone by now!
             if picked_object.is_some() {
                 // TODO(andreas): Once this is the primary path we should not awkwardly reconstruct the ray_t here. It's entirely correct either!
                 state.closest_opaque_pick.ray_t = gpu_picking_result
-                    .picked_world_position(picking_rect_position)
+                    .picked_world_position(pos_on_picking_rect)
                     .distance(context.ray_in_world.origin);
                 state.closest_opaque_pick.instance_path_hash = picked_object;
                 state.closest_opaque_pick.used_gpu_picking = true;