improve test

gfx-rs · Nov 7, 2022 · ee65e5b · ee65e5b
1 parent 37237a9
commit ee65e5b
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 48 deletions.
diff --git a/wgpu/tests/shader/zero_init_workgroup_mem.rs b/wgpu/tests/shader/zero_init_workgroup_mem.rs
@@ -1,8 +1,11 @@
+use std::num::NonZeroU64;
+
 use wgpu::{
     include_wgsl, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
-    BindGroupLayoutEntry, BindingType, BufferBindingType, BufferDescriptor, BufferUsages,
-    CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, DownlevelFlags,
-    Limits, Maintain, MapMode, PipelineLayoutDescriptor, ShaderStages,
+    BindGroupLayoutEntry, BindingResource, BindingType, BufferBinding, BufferBindingType,
+    BufferDescriptor, BufferUsages, CommandEncoderDescriptor, ComputePassDescriptor,
+    ComputePipelineDescriptor, DownlevelFlags, Limits, Maintain, MapMode, PipelineLayoutDescriptor,
+    ShaderStages,
 };
 
 use crate::common::{initialize_test, TestParameters, TestingContext};
@@ -17,12 +20,19 @@ fn zero_init_workgroup_mem() {
     );
 }
 
-/// Increases iterations and writes random data to workgroup memory before reading it each iteration.
-const TRY_TO_FAIL: bool = false;
+const DISPATCH_SIZE: (u32, u32, u32) = (64, 64, 64);
+const TOTAL_WORK_GROUPS: u32 = DISPATCH_SIZE.0 * DISPATCH_SIZE.1 * DISPATCH_SIZE.2;
+
+/// nr of bytes we use in the shader
+const SHADER_WORKGROUP_MEMORY: u32 = 512 * 4 + 4;
+// assume we have this much workgroup memory (2GB)
+const MAX_DEVICE_WORKGROUP_MEMORY: u32 = i32::MAX as u32;
+const NR_OF_DISPATCHES: u32 =
+    MAX_DEVICE_WORKGROUP_MEMORY / (SHADER_WORKGROUP_MEMORY * TOTAL_WORK_GROUPS) + 1; // TODO: use div_ceil once stabilized
 
-const ARR_SIZE: usize = 512;
-const BUFFER_SIZE: u64 = 4 * (ARR_SIZE as u64);
-const ITERATIONS: u32 = if TRY_TO_FAIL { 100 } else { 1 };
+const OUTPUT_ARRAY_SIZE: u32 = TOTAL_WORK_GROUPS * NR_OF_DISPATCHES;
+const BUFFER_SIZE: u64 = OUTPUT_ARRAY_SIZE as u64 * 4;
+const BUFFER_BINDING_SIZE: u32 = TOTAL_WORK_GROUPS * 4;
 
 fn zero_init_workgroup_mem_impl(ctx: TestingContext) {
     let bgl = ctx
@@ -34,7 +44,7 @@ fn zero_init_workgroup_mem_impl(ctx: TestingContext) {
                 visibility: ShaderStages::COMPUTE,
                 ty: BindingType::Buffer {
                     ty: BufferBindingType::Storage { read_only: false },
-                    has_dynamic_offset: false,
+                    has_dynamic_offset: true,
                     min_binding_size: None,
                 },
                 count: None,
@@ -60,7 +70,11 @@ fn zero_init_workgroup_mem_impl(ctx: TestingContext) {
         layout: &bgl,
         entries: &[BindGroupEntry {
             binding: 0,
-            resource: output_buffer.as_entire_binding(),
+            resource: BindingResource::Buffer(BufferBinding {
+                buffer: &output_buffer,
+                offset: 0,
+                size: Some(NonZeroU64::new(BUFFER_BINDING_SIZE as u64).unwrap()),
+            }),
         }],
     });
 
@@ -96,7 +110,7 @@ fn zero_init_workgroup_mem_impl(ctx: TestingContext) {
 
     // -- Initializing data --
 
-    let output_pre_init_data = [1; ARR_SIZE];
+    let output_pre_init_data = vec![1; OUTPUT_ARRAY_SIZE as usize];
     ctx.queue.write_buffer(
         &output_buffer,
         0,
@@ -105,46 +119,48 @@ fn zero_init_workgroup_mem_impl(ctx: TestingContext) {
 
     // -- Run test --
 
-    for i in 0..ITERATIONS {
-        let mut encoder = ctx
-            .device
-            .create_command_encoder(&CommandEncoderDescriptor::default());
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default());
+
+    let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor::default());
 
-        let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor::default());
-        if TRY_TO_FAIL {
-            cpass.set_pipeline(&pipeline_write);
-            cpass.dispatch_workgroups(64, 64, 64);
-        }
+    cpass.set_pipeline(&pipeline_write);
+    for _ in 0..NR_OF_DISPATCHES {
+        cpass.dispatch_workgroups(DISPATCH_SIZE.0, DISPATCH_SIZE.1, DISPATCH_SIZE.2);
+    }
 
-        cpass.set_pipeline(&pipeline_read);
-        cpass.set_bind_group(0, &bg, &[]);
-        cpass.dispatch_workgroups(1, 1, 1);
-        drop(cpass);
+    cpass.set_pipeline(&pipeline_read);
+    for i in 0..NR_OF_DISPATCHES {
+        cpass.set_bind_group(0, &bg, &[i * BUFFER_BINDING_SIZE]);
+        cpass.dispatch_workgroups(DISPATCH_SIZE.0, DISPATCH_SIZE.1, DISPATCH_SIZE.2);
+    }
+    drop(cpass);
 
-        // -- Pulldown data --
+    // -- Pulldown data --
 
-        encoder.copy_buffer_to_buffer(&output_buffer, 0, &mapping_buffer, 0, BUFFER_SIZE);
+    encoder.copy_buffer_to_buffer(&output_buffer, 0, &mapping_buffer, 0, BUFFER_SIZE);
 
-        ctx.queue.submit(Some(encoder.finish()));
+    ctx.queue.submit(Some(encoder.finish()));
 
-        mapping_buffer.slice(..).map_async(MapMode::Read, |_| ());
-        ctx.device.poll(Maintain::Wait);
+    mapping_buffer.slice(..).map_async(MapMode::Read, |_| ());
+    ctx.device.poll(Maintain::Wait);
 
-        let mapped = mapping_buffer.slice(..).get_mapped_range();
+    let mapped = mapping_buffer.slice(..).get_mapped_range();
 
-        let typed: &[u32] = bytemuck::cast_slice(&*mapped);
+    let typed: &[u32] = bytemuck::cast_slice(&*mapped);
 
-        // -- Check results --
+    // -- Check results --
 
-        let expected = [0; ARR_SIZE];
+    let num_disptaches_failed = typed.iter().filter(|&&res| res != 0).count();
+    let ratio = (num_disptaches_failed as f32 / OUTPUT_ARRAY_SIZE as f32) * 100.;
 
-        assert!(
-            typed == expected,
-            "Zero-initialization of workgroup memory failed (in iteration: {}).",
-            i
-        );
+    assert!(
+        num_disptaches_failed == 0,
+        "Zero-initialization of workgroup memory failed ({:.0}% of disptaches failed).",
+        ratio
+    );
 
-        drop(mapped);
-        mapping_buffer.unmap();
-    }
+    drop(mapped);
+    mapping_buffer.unmap();
 }
diff --git a/wgpu/tests/shader/zero_init_workgroup_mem.wgsl b/wgpu/tests/shader/zero_init_workgroup_mem.wgsl
@@ -1,21 +1,31 @@
+let array_size = 512u;
+
 struct WStruct {
-    arr: array<i32, 512>,
-    atom: atomic<i32>
+    arr: array<u32, array_size>,
+    atom: atomic<u32>
 }
 
 var<workgroup> w_mem: WStruct;
 
 @group(0) @binding(0)
-var<storage, read_write> output: array<i32, 512>;
+var<storage, read_write> output: array<u32>;
 
 @compute @workgroup_size(1)
-fn read() {
-    output = w_mem.arr;
+fn read(@builtin(workgroup_id) wgid: vec3<u32>, @builtin(num_workgroups) num_workgroups: vec3<u32>) {
+    var is_zero = true;
+    for(var i = 0u; i < array_size; i++) {
+        is_zero &= w_mem.arr[i] == 0u;
+    }
+    is_zero &= atomicLoad(&w_mem.atom) == 0u;
+
+    let idx = wgid.x + (wgid.y * num_workgroups.x) + (wgid.z * num_workgroups.x * num_workgroups.y);
+    output[idx] = u32(!is_zero);
 }
 
-@compute @workgroup_size(64)
+@compute @workgroup_size(1)
 fn write() {
-    for(var i: i32 = 0; i < 512; i++) {
+    for(var i = 0u; i < array_size; i++) {
         w_mem.arr[i] = i;
     }
+    atomicStore(&w_mem.atom, 3u);
 }