From 64679896483966abb77348c9079b0fcbd0209a61 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 20 Jan 2022 19:59:25 -0500 Subject: [PATCH] hal/dx12: improve RowPitch computation (#2409) --- wgpu-core/src/command/clear.rs | 3 +- wgpu-core/src/device/queue.rs | 3 +- wgpu-core/src/lib.rs | 8 ---- wgpu-hal/examples/halmark/main.rs | 19 +++----- wgpu-hal/src/auxil/mod.rs | 11 +++++ wgpu-hal/src/dx12/command.rs | 72 +++++++++++++++---------------- 6 files changed, 54 insertions(+), 62 deletions(-) diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index 1328c55771..e9ff242b30 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -3,7 +3,6 @@ use std::{num::NonZeroU32, ops::Range}; #[cfg(feature = "trace")] use crate::device::trace::Command as TraceCommand; use crate::{ - align_to, command::CommandBuffer, device::Device, get_lowest_common_denom, @@ -14,7 +13,7 @@ use crate::{ track::{ResourceTracker, TextureSelector, TextureState}, }; -use hal::CommandEncoder as _; +use hal::{auxil::align_to, CommandEncoder as _}; use thiserror::Error; use wgt::{BufferAddress, BufferSize, BufferUsages, ImageSubresourceRange, TextureAspect}; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index ff3d640321..fc27278eae 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1,7 +1,6 @@ #[cfg(feature = "trace")] use crate::device::trace::Action; use crate::{ - align_to, command::{ extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, @@ -415,7 +414,7 @@ impl Global { device.alignments.buffer_copy_pitch.get() as u32, format_desc.block_size as u32, ); - let stage_bytes_per_row = align_to( + let stage_bytes_per_row = hal::auxil::align_to( format_desc.block_size as u32 * width_blocks, bytes_per_row_alignment, ); diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index 565cc99ef6..3a3ff84763 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -252,14 +252,6 @@ pub(crate) fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 { } } -#[inline] -pub(crate) fn align_to(value: u32, alignment: u32) -> u32 { - match value % alignment { - 0 => value, - other => value - other + alignment, - } -} - #[test] fn test_lcd() { assert_eq!(get_lowest_common_denom(2, 2), 2); diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index cdaf6797f8..ed97e53b83 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -67,7 +67,7 @@ struct Example { pipeline: A::RenderPipeline, bunnies: Vec, local_buffer: A::Buffer, - local_alignment: wgt::BufferAddress, + local_alignment: u32, global_buffer: A::Buffer, sampler: A::Sampler, texture: A::Texture, @@ -376,22 +376,13 @@ impl Example { buffer }; - fn align_to( - value: wgt::BufferAddress, - alignment: wgt::BufferAddress, - ) -> wgt::BufferAddress { - match value % alignment { - 0 => value, - other => value - other + alignment, - } - } - let local_alignment = align_to( - mem::size_of::() as _, - capabilities.limits.min_uniform_buffer_offset_alignment as _, + let local_alignment = hal::auxil::align_to( + mem::size_of::() as u32, + capabilities.limits.min_uniform_buffer_offset_alignment, ); let local_buffer_desc = hal::BufferDescriptor { label: Some("local"), - size: (MAX_BUNNIES as wgt::BufferAddress) * local_alignment, + size: (MAX_BUNNIES as wgt::BufferAddress) * (local_alignment as wgt::BufferAddress), usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, memory_flags: hal::MemoryFlags::PREFER_COHERENT, }; diff --git a/wgpu-hal/src/auxil/mod.rs b/wgpu-hal/src/auxil/mod.rs index a157e80565..4d8cd0d2fa 100644 --- a/wgpu-hal/src/auxil/mod.rs +++ b/wgpu-hal/src/auxil/mod.rs @@ -29,6 +29,17 @@ pub fn map_naga_stage(stage: naga::ShaderStage) -> wgt::ShaderStages { } } +pub fn align_to(value: u32, alignment: u32) -> u32 { + if alignment.is_power_of_two() { + (value + alignment - 1) & !(alignment - 1) + } else { + match value % alignment { + 0 => value, + other => value - other + alignment, + } + } +} + impl crate::CopyExtent { pub fn min(&self, other: &Self) -> Self { Self { diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 48f72377b0..673ccccdf1 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -13,6 +13,40 @@ fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12::D3D12_BO } } +impl crate::BufferTextureCopy { + fn to_subresource_footprint( + &self, + format: wgt::TextureFormat, + ) -> d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + let desc = format.describe(); + d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: self.buffer_layout.offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: conv::map_texture_format(format), + Width: self.size.width, + Height: self + .buffer_layout + .rows_per_image + .map_or(self.size.height, |count| { + count.get() * desc.block_dimensions.1 as u32 + }), + Depth: self.size.depth, + RowPitch: { + let actual = match self.buffer_layout.bytes_per_row { + Some(count) => count.get(), + // this may happen for single-line updates + None => { + (self.size.width / desc.block_dimensions.0 as u32) + * desc.block_size as u32 + } + }; + crate::auxil::align_to(actual, d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }, + }, + } + } +} + impl super::Temp { fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { self.marker.clear(); @@ -458,28 +492,10 @@ impl crate::CommandEncoder for super::CommandEncoder { Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, u: mem::zeroed(), }; - let raw_format = conv::map_texture_format(dst.format); - - let block_size = dst.format.describe().block_dimensions.0 as u32; for r in regions { let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); - *src_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { - Offset: r.buffer_layout.offset, - Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { - Format: raw_format, - Width: r.size.width, - Height: r - .buffer_layout - .rows_per_image - .map_or(r.size.height, |count| count.get() * block_size), - Depth: r.size.depth, - RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| { - count.get().max(d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) - }), - }, - }; + *src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format); *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.texture_base); - list.CopyTextureRegion( &dst_location, r.texture_base.origin.x, @@ -511,26 +527,10 @@ impl crate::CommandEncoder for super::CommandEncoder { Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, u: mem::zeroed(), }; - let raw_format = conv::map_texture_format(src.format); - - let block_size = src.format.describe().block_dimensions.0 as u32; for r in regions { let src_box = make_box(&r.texture_base.origin, &r.size); *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.texture_base); - *dst_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { - Offset: r.buffer_layout.offset, - Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { - Format: raw_format, - Width: r.size.width, - Height: r - .buffer_layout - .rows_per_image - .map_or(r.size.height, |count| count.get() * block_size), - Depth: r.size.depth, - RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| count.get()), - }, - }; - + *dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format); list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); } }