Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix extractBits and insertBits When offset + count Overflows the Bit Width #5305

Merged
merged 2 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Bottom level categories:
- Fix timeout when presenting a surface where no work has been done. By @waywardmonkeys in [#5200](https://github.com/gfx-rs/wgpu/pull/5200)
- Simplify and speed up the allocation of internal IDs. By @nical in [#5229](https://github.com/gfx-rs/wgpu/pull/5229)
- Fix an issue where command encoders weren't properly freed if an error occurred during command encoding. By @ErichDonGubler in [#5251](https://github.com/gfx-rs/wgpu/pull/5251).
- Fix behavior of `extractBits` and `insertBits` when `offset + count` overflows the bit width. By @cwfitzgerald in [#5305](https://github.com/gfx-rs/wgpu/pull/5305)
- Fix registry leaks with de-duplicated resources. By @nical in [#5244](https://github.com/gfx-rs/wgpu/pull/5244)
- Fix behavior of integer `clamp` when `min` argument > `max` argument. By @cwfitzgerald in [#5300](https://github.com/gfx-rs/wgpu/pull/5300).
- Fix missing validation for `Device::clear_buffer` where `offset + size buffer.size` was not checked when `size` was omitted. By @ErichDonGubler in [#5282](https://github.com/gfx-rs/wgpu/pull/5282).
Expand Down
72 changes: 69 additions & 3 deletions naga/src/back/glsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1290,7 +1290,14 @@ impl<'a, W: Write> Writer<'a, W> {

let inner = expr_info.ty.inner_with(&self.module.types);

if let Expression::Math { fun, arg, arg1, .. } = *expr {
if let Expression::Math {
fun,
arg,
arg1,
arg2,
..
} = *expr
{
match fun {
crate::MathFunction::Dot => {
// if the expression is a Dot product with integer arguments,
Expand All @@ -1305,6 +1312,14 @@ impl<'a, W: Write> Writer<'a, W> {
}
}
}
crate::MathFunction::ExtractBits => {
// Only argument 1 is re-used.
self.need_bake_expressions.insert(arg1.unwrap());
}
crate::MathFunction::InsertBits => {
// Only argument 2 is re-used.
self.need_bake_expressions.insert(arg2.unwrap());
}
crate::MathFunction::CountLeadingZeros => {
if let Some(crate::ScalarKind::Sint) = inner.scalar_kind() {
self.need_bake_expressions.insert(arg);
Expand Down Expand Up @@ -3375,8 +3390,59 @@ impl<'a, W: Write> Writer<'a, W> {
}
Mf::CountOneBits => "bitCount",
Mf::ReverseBits => "bitfieldReverse",
Mf::ExtractBits => "bitfieldExtract",
Mf::InsertBits => "bitfieldInsert",
Mf::ExtractBits => {
// The behavior of ExtractBits is undefined when offset + count > bit_width. We need
// to first sanitize the offset and count first. If we don't do this, AMD and Intel chips
// will return out-of-spec values if the extracted range is not within the bit width.
//
// This encodes the exact formula specified by the wgsl spec, without temporary values:
// https://gpuweb.github.io/gpuweb/wgsl/#extractBits-unsigned-builtin
//
// w = sizeof(x) * 8
// o = min(offset, w)
// c = min(count, w - o)
//
// bitfieldExtract(x, o, c)
//
// extract_bits(e, min(offset, w), min(count, w - min(offset, w))))
let scalar_bits = ctx
.resolve_type(arg, &self.module.types)
.scalar_width()
.unwrap();

write!(self.out, "bitfieldExtract(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", int(min(")?;
self.write_expr(arg1.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u)), int(min(",)?;
self.write_expr(arg2.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u - min(")?;
self.write_expr(arg1.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u))))")?;

return Ok(());
}
Mf::InsertBits => {
// InsertBits has the same considerations as ExtractBits above
let scalar_bits = ctx
.resolve_type(arg, &self.module.types)
.scalar_width()
.unwrap();

write!(self.out, "bitfieldInsert(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", ")?;
self.write_expr(arg1.unwrap(), ctx)?;
write!(self.out, ", int(min(")?;
self.write_expr(arg2.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u)), int(min(",)?;
self.write_expr(arg3.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u - min(")?;
self.write_expr(arg2.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u))))")?;

return Ok(());
}
Mf::FindLsb => "findLSB",
Mf::FindMsb => "findMSB",
// data packing
Expand Down
150 changes: 149 additions & 1 deletion naga/src/back/hlsl/help.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ int dim_1d = NagaDimensions1D(image_1d);
```
*/

use super::{super::FunctionCtx, BackendResult};
use super::{
super::FunctionCtx,
writer::{EXTRACT_BITS_FUNCTION, INSERT_BITS_FUNCTION},
BackendResult,
};
use crate::{arena::Handle, proc::NameKey};
use std::fmt::Write;

Expand Down Expand Up @@ -59,6 +63,13 @@ pub(super) struct WrappedMatCx2 {
pub(super) columns: crate::VectorSize,
}

#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
pub(super) struct WrappedMath {
pub(super) fun: crate::MathFunction,
pub(super) scalar: crate::Scalar,
pub(super) components: Option<u32>,
}

/// HLSL backend requires its own `ImageQuery` enum.
///
/// It is used inside `WrappedImageQuery` and should be unique per ImageQuery function.
Expand Down Expand Up @@ -851,12 +862,149 @@ impl<'a, W: Write> super::Writer<'a, W> {
Ok(())
}

pub(super) fn write_wrapped_math_functions(
&mut self,
module: &crate::Module,
func_ctx: &FunctionCtx,
) -> BackendResult {
for (_, expression) in func_ctx.expressions.iter() {
if let crate::Expression::Math {
fun,
arg,
arg1: _arg1,
arg2: _arg2,
arg3: _arg3,
} = *expression
{
match fun {
crate::MathFunction::ExtractBits => {
// The behavior of our extractBits polyfill is undefined if offset + count > bit_width. We need
// to first sanitize the offset and count first. If we don't do this, we will get out-of-spec
// values if the extracted range is not within the bit width.
//
// This encodes the exact formula specified by the wgsl spec:
// https://gpuweb.github.io/gpuweb/wgsl/#extractBits-unsigned-builtin
//
// w = sizeof(x) * 8
// o = min(offset, w)
// c = min(count, w - o)
//
// bitfieldExtract(x, o, c)
let arg_ty = func_ctx.resolve_type(arg, &module.types);
let scalar = arg_ty.scalar().unwrap();
let components = arg_ty.components();

let wrapped = WrappedMath {
fun,
scalar,
components,
};

if !self.wrapped.math.insert(wrapped) {
continue;
}

// Write return type
self.write_value_type(module, arg_ty)?;

let scalar_width: u8 = scalar.width * 8;

// Write function name and parameters
writeln!(self.out, " {EXTRACT_BITS_FUNCTION}(")?;
write!(self.out, " ")?;
self.write_value_type(module, arg_ty)?;
writeln!(self.out, " e,")?;
writeln!(self.out, " uint offset,")?;
writeln!(self.out, " uint count")?;
writeln!(self.out, ") {{")?;

// Write function body
writeln!(self.out, " uint w = {scalar_width};")?;
writeln!(self.out, " uint o = min(offset, w);")?;
writeln!(self.out, " uint c = min(count, w - o);")?;
writeln!(
self.out,
" return (c == 0 ? 0 : (e << (w - c - o)) >> (w - c));"
)?;

// End of function body
writeln!(self.out, "}}")?;
}
crate::MathFunction::InsertBits => {
// The behavior of our insertBits polyfill has the same constraints as the extractBits polyfill.

let arg_ty = func_ctx.resolve_type(arg, &module.types);
let scalar = arg_ty.scalar().unwrap();
let components = arg_ty.components();

let wrapped = WrappedMath {
fun,
scalar,
components,
};

if !self.wrapped.math.insert(wrapped) {
continue;
}

// Write return type
self.write_value_type(module, arg_ty)?;

let scalar_width: u8 = scalar.width * 8;
let scalar_max: u64 = match scalar.width {
1 => 0xFF,
2 => 0xFFFF,
4 => 0xFFFFFFFF,
8 => 0xFFFFFFFFFFFFFFFF,
_ => unreachable!(),
};

// Write function name and parameters
writeln!(self.out, " {INSERT_BITS_FUNCTION}(")?;
write!(self.out, " ")?;
self.write_value_type(module, arg_ty)?;
writeln!(self.out, " e,")?;
write!(self.out, " ")?;
self.write_value_type(module, arg_ty)?;
writeln!(self.out, " newbits,")?;
writeln!(self.out, " uint offset,")?;
writeln!(self.out, " uint count")?;
writeln!(self.out, ") {{")?;

// Write function body
writeln!(self.out, " uint w = {scalar_width}u;")?;
writeln!(self.out, " uint o = min(offset, w);")?;
writeln!(self.out, " uint c = min(count, w - o);")?;

// The `u` suffix on the literals is _extremely_ important. Otherwise it will use
// i32 shifting instead of the intended u32 shifting.
writeln!(
self.out,
" uint mask = (({scalar_max}u >> ({scalar_width}u - c)) << o);"
)?;
writeln!(
self.out,
" return (c == 0 ? e : ((e & ~mask) | ((newbits << o) & mask)));"
)?;

// End of function body
writeln!(self.out, "}}")?;
}
_ => {}
}
}
}

Ok(())
}

/// Helper function that writes various wrapped functions
pub(super) fn write_wrapped_functions(
&mut self,
module: &crate::Module,
func_ctx: &FunctionCtx,
) -> BackendResult {
self.write_wrapped_math_functions(module, func_ctx)?;
self.write_wrapped_compose_functions(module, func_ctx.expressions)?;

for (handle, _) in func_ctx.expressions.iter() {
Expand Down
2 changes: 2 additions & 0 deletions naga/src/back/hlsl/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,8 @@ pub const RESERVED: &[&str] = &[
// Naga utilities
super::writer::MODF_FUNCTION,
super::writer::FREXP_FUNCTION,
super::writer::EXTRACT_BITS_FUNCTION,
super::writer::INSERT_BITS_FUNCTION,
];

// DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254
Expand Down
2 changes: 2 additions & 0 deletions naga/src/back/hlsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ struct Wrapped {
constructors: crate::FastHashSet<help::WrappedConstructor>,
struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>,
mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>,
math: crate::FastHashSet<help::WrappedMath>,
}

impl Wrapped {
Expand All @@ -265,6 +266,7 @@ impl Wrapped {
self.constructors.clear();
self.struct_matrix_access.clear();
self.mat_cx2s.clear();
self.math.clear();
}
}

Expand Down
Loading
Loading