diff --git a/docs/backends.rst b/docs/backends.rst
index ec3bafcd..fb23323c 100644
--- a/docs/backends.rst
+++ b/docs/backends.rst
@@ -59,6 +59,103 @@ The wgpu_native backend provides a few extra functionalities:
:return: Device
:rtype: wgpu.GPUDevice
+The wgpu_native backend provides support for push constants.
+Since WebGPU does not support this feature, documentation on its use is hard to find.
+A full explanation of push constants and its use in Vulkan can be found
+`here `_.
+Using push constants in WGPU closely follows the Vulkan model.
+
+The advantage of push constants is that they are typically faster to update than uniform buffers.
+Modifications to push constants are included in the command encoder; updating a uniform
+buffer involves sending a separate command to the GPU.
+The disadvantage of push constants is that their size limit is much smaller. The limit
+is guaranteed to be at least 128 bytes, and 256 bytes is typical.
+
+Given an adapter, first determine if it supports push constants::
+
+ >> "push-constants" in adapter.features
+ True
+
+If push constants are supported, determine the maximum number of bytes that can
+be allocated for push constants::
+
+ >> adapter.limits["max-push-constant-size"]
+ 256
+
+You must tell the adapter to create a device that supports push constants,
+and you must tell it the number of bytes of push constants that you are using.
+Overestimating is okay::
+
+ device = adapter.request_device(
+ required_features=["push-constants"],
+ required_limits={"max-push-constant-size": 256},
+ )
+
+Creating a push constant in your shader code is similar to the way you would create
+a uniform buffer.
+The fields that are only used in the ``@vertex`` shader should be separated from the fields
+that are only used in the ``@fragment`` shader which should be separated from the fields
+used in both shaders::
+
+ struct PushConstants {
+ // vertex shader
+ vertex_transform: vec4x4f,
+ // fragment shader
+ fragment_transform: vec4x4f,
+ // used in both
+ generic_transform: vec4x4f,
+ }
+ var push_constants: PushConstants;
+
+To the pipeline layout for this shader, use
+``wgpu.backends.wpgu_native.create_pipeline_layout`` instead of
+``device.create_pipelinelayout``. It takes an additional argument,
+``push_constant_layouts``, describing
+the layout of the push constants. For example, in the above example::
+
+ push_constant_layouts = [
+ {"visibility": ShaderState.VERTEX, "start": 0, "end": 64},
+ {"visibility": ShaderStage.FRAGMENT, "start": 64, "end": 128},
+ {"visibility": ShaderState.VERTEX + ShaderStage.FRAGMENT , "start": 128, "end": 192},
+ ],
+
+Finally, you set the value of the push constant by using
+``wgpu.backends.wpgu_native.set_push_constants``::
+
+ set_push_constants(this_pass, ShaderStage.VERTEX, 0, 64, <64 bytes>)
+ set_push_constants(this_pass, ShaderStage.FRAGMENT, 64, 128, <64 bytes>)
+ set_push_constants(this_pass, ShaderStage.VERTEX + ShaderStage.FRAGMENT, 128, 192, <64 bytes>)
+
+Bytes must be set separately for each of the three shader stages. If the push constant has
+already been set, on the next use you only need to call ``set_push_constants`` on those
+bytes you wish to change.
+
+.. py:function:: wgpu.backends.wpgu_native.create_pipeline_layout(device, *, label="", bind_group_layouts, push_constant_layouts=[])
+
+ This method provides the same functionality as :func:`wgpu.GPUDevice.create_pipeline_layout`,
+ but provides an extra `push_constant_layouts` argument.
+ When using push constants, this argument is a list of dictionaries, where each item
+ in the dictionary has three fields: `visibility`, `start`, and `end`.
+
+ :param device: The device on which we are creating the pipeline layout
+ :param label: An optional label
+ :param bind_group_layouts:
+ :param push_constant_layouts: Described above.
+
+.. py:function:: wgpu.backends.wgpu_native.set_push_constants(render_pass_encoder, visibility, offset, size_in_bytes, data, data_offset=0)
+
+ This function requires that the underlying GPU implement `push_constants`.
+ These push constants are a buffer of bytes available to the `fragment` and `vertex`
+ shaders. They are similar to a bound buffer, but the buffer is set using this
+ function call.
+
+ :param render_pass_encoder: The render pass encoder to which we are pushing constants.
+ :param visibility: The stages (vertex, fragment, or both) to which these constants are visible
+ :param offset: The offset into the push constants at which the bytes are to be written
+ :param size_in_bytes: The number of bytes to copy from the ata
+ :param data: The data to copy to the buffer
+ :param data_offset: The starting offset in the data at which to begin copying.
+
The js_webgpu backend
---------------------
diff --git a/tests/test_set_constant.py b/tests/test_set_constant.py
new file mode 100644
index 00000000..1252feef
--- /dev/null
+++ b/tests/test_set_constant.py
@@ -0,0 +1,164 @@
+import numpy as np
+import pytest
+
+import wgpu.utils
+from tests.testutils import can_use_wgpu_lib, run_tests
+from wgpu import TextureFormat
+from wgpu.backends.wgpu_native.extras import create_pipeline_layout, set_push_constants
+
+if not can_use_wgpu_lib:
+ pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True)
+
+
+"""
+This code is an amazingly slow way of adding together two 10-element arrays of 32-bit
+integers defined by push constants and store them into an output buffer.
+
+The first number of the addition is purposely pulled using the vertex stage, and the
+second number from the fragment stage, so that we can ensure that we are correctly
+using stage-separated push constants correctly.
+
+The source code assumes the topology is POINT-LIST, so that each call to vertexMain
+corresponds with one call to fragmentMain.
+"""
+COUNT = 10
+
+SHADER_SOURCE = (
+ f"""
+ const COUNT = {COUNT}u;
+"""
+ """
+ // Put the results here
+ @group(0) @binding(0) var data: array;
+
+ struct PushConstants {
+ values1: array, // VERTEX constants
+ values2: array, // FRAGMENT constants
+ }
+ var push_constants: PushConstants;
+
+ struct VertexOutput {
+ @location(0) index: u32,
+ @location(1) value: u32,
+ @builtin(position) position: vec4f,
+ }
+
+ @vertex
+ fn vertexMain(
+ @builtin(vertex_index) index: u32,
+ ) -> VertexOutput {
+ return VertexOutput(index, push_constants.values1[index], vec4f(0, 0, 0, 1));
+ }
+
+ @fragment
+ fn fragmentMain(@location(0) index: u32,
+ @location(1) value: u32
+ ) -> @location(0) vec4f {
+ data[index] = value + push_constants.values2[index];
+ return vec4f();
+ }
+"""
+)
+
+BIND_GROUP_ENTRIES = [
+ {"binding": 0, "visibility": "FRAGMENT", "buffer": {"type": "storage"}},
+]
+
+
+def setup_pipeline():
+ adapter = wgpu.gpu.request_adapter(power_preference="high-performance")
+ device = adapter.request_device(
+ required_features=["push-constants"],
+ required_limits={"max-push-constant-size": 128},
+ )
+ output_texture = device.create_texture(
+ # Actual size is immaterial. Could just be 1x1
+ size=[128, 128],
+ format=TextureFormat.rgba8unorm,
+ usage="RENDER_ATTACHMENT|COPY_SRC",
+ )
+ shader = device.create_shader_module(code=SHADER_SOURCE)
+ bind_group_layout = device.create_bind_group_layout(entries=BIND_GROUP_ENTRIES)
+ render_pipeline_layout = create_pipeline_layout(
+ device,
+ bind_group_layouts=[bind_group_layout],
+ push_constant_layouts=[
+ {"visibility": "VERTEX", "start": 0, "end": COUNT * 4},
+ {"visibility": "FRAGMENT", "start": COUNT * 4, "end": COUNT * 4 * 2},
+ ],
+ )
+ pipeline = device.create_render_pipeline(
+ layout=render_pipeline_layout,
+ vertex={
+ "module": shader,
+ "entry_point": "vertexMain",
+ },
+ fragment={
+ "module": shader,
+ "entry_point": "fragmentMain",
+ "targets": [{"format": output_texture.format}],
+ },
+ primitive={
+ "topology": "point-list",
+ },
+ )
+ render_pass_descriptor = {
+ "color_attachments": [
+ {
+ "clear_value": (0, 0, 0, 0), # only first value matters
+ "load_op": "clear",
+ "store_op": "store",
+ "view": output_texture.create_view(),
+ }
+ ],
+ }
+
+ return device, pipeline, render_pass_descriptor
+
+
+def test_normal_push_constants():
+ device, pipeline, render_pass_descriptor = setup_pipeline()
+ vertex_call_buffer = device.create_buffer(size=COUNT * 4, usage="STORAGE|COPY_SRC")
+ bind_group = device.create_bind_group(
+ layout=pipeline.get_bind_group_layout(0),
+ entries=[
+ {"binding": 0, "resource": {"buffer": vertex_call_buffer}},
+ ],
+ )
+
+ encoder = device.create_command_encoder()
+ this_pass = encoder.begin_render_pass(**render_pass_descriptor)
+ this_pass.set_pipeline(pipeline)
+ this_pass.set_bind_group(0, bind_group)
+
+ buffer = np.random.randint(0, 1_000_000, size=(2 * COUNT), dtype=np.uint32)
+ set_push_constants(this_pass, "VERTEX", 0, COUNT * 4, buffer)
+ set_push_constants(this_pass, "FRAGMENT", COUNT * 4, COUNT * 4, buffer, COUNT * 4)
+ this_pass.draw(COUNT)
+ this_pass.end()
+ device.queue.submit([encoder.finish()])
+ info_view = device.queue.read_buffer(vertex_call_buffer)
+ result = np.frombuffer(info_view, dtype=np.uint32)
+ expected_result = buffer[0:COUNT] + buffer[COUNT:]
+ assert all(result == expected_result)
+
+
+def test_bad_set_push_constants():
+ device, pipeline, render_pass_descriptor = setup_pipeline()
+ encoder = device.create_command_encoder()
+ this_pass = encoder.begin_render_pass(**render_pass_descriptor)
+
+ def zeros(n):
+ return np.zeros(n, dtype=np.uint32)
+
+ with pytest.raises(ValueError):
+ # Buffer is to short
+ set_push_constants(this_pass, "VERTEX", 0, COUNT * 4, zeros(COUNT - 1))
+
+ with pytest.raises(ValueError):
+ # Buffer is to short
+ set_push_constants(this_pass, "VERTEX", 0, COUNT * 4, zeros(COUNT + 1), 8)
+
+
+if __name__ == "__main__":
+ run_tests(globals())
diff --git a/tests/test_wgpu_native_basics.py b/tests/test_wgpu_native_basics.py
index 6d455b11..f5ff4d73 100644
--- a/tests/test_wgpu_native_basics.py
+++ b/tests/test_wgpu_native_basics.py
@@ -424,11 +424,11 @@ def test_features_are_legal():
)
# We can also use underscore
assert are_features_wgpu_legal(["push_constants", "vertex_writable_storage"])
+ # We can also use camel case
+ assert are_features_wgpu_legal(["PushConstants", "VertexWritableStorage"])
def test_features_are_illegal():
- # not camel Case
- assert not are_features_wgpu_legal(["pushConstants"])
# writable is misspelled
assert not are_features_wgpu_legal(
["multi-draw-indirect", "vertex-writeable-storage"]
@@ -436,6 +436,36 @@ def test_features_are_illegal():
assert not are_features_wgpu_legal(["my-made-up-feature"])
+def are_limits_wgpu_legal(limits):
+ """Returns true if the list of features is legal. Determining whether a specific
+ set of features is implemented on a particular device would make the tests fragile,
+ so we only verify that the names are legal feature names."""
+ adapter = wgpu.gpu.request_adapter(power_preference="high-performance")
+ try:
+ adapter.request_device(required_limits=limits)
+ return True
+ except RuntimeError as e:
+ assert "Unsupported features were requested" in str(e)
+ return True
+ except KeyError:
+ return False
+
+
+def test_limits_are_legal():
+ # A standard feature. Probably exists
+ assert are_limits_wgpu_legal({"max-bind-groups": 8})
+ # Two common extension features
+ assert are_limits_wgpu_legal({"max-push-constant-size": 128})
+ # We can also use underscore
+ assert are_limits_wgpu_legal({"max_bind_groups": 8, "max_push_constant_size": 128})
+ # We can also use camel case
+ assert are_limits_wgpu_legal({"maxBindGroups": 8, "maxPushConstantSize": 128})
+
+
+def test_limits_are_not_legal():
+ assert not are_limits_wgpu_legal({"max-bind-group": 8})
+
+
if __name__ == "__main__":
run_tests(globals())
diff --git a/tests_mem/testutils.py b/tests_mem/testutils.py
index b71d7eb6..451ec217 100644
--- a/tests_mem/testutils.py
+++ b/tests_mem/testutils.py
@@ -145,7 +145,40 @@ def ob_name_from_test_func(func):
def create_and_release(create_objects_func):
- """Decorator."""
+ """
+ This wrapper goes around a test that takes a single argument n. That test should
+ be a generator function that yields a descriptor followed
+ n different objects corresponding to the name of the test function. Hence
+ a test named `test_release_foo_bar` would yield a descriptor followed by
+ n FooBar objects.
+
+ The descriptor is a dictionary with three fields, each optional.
+ In a typical situation, there will be `n` FooBar object after the test, and after
+ releasing, there will be zero. However, sometimes there are auxiliary objects,
+ in which case its necessary to provide one or more fields.
+
+ The keys "expected_counts_after_create" and "expected_counts_after_release" each have
+ as their value a sub-dictionary giving the number of still-alive WGPU objects.
+ The key "expected_counts_after_create" gives the expected state after the
+ n objects have been created and put into a list; "expected_counts_after_release"
+ gives the state after the n objects have been released.
+
+ These sub-dictionaries have as their keys the names of WGPU object types, and
+ their value is a tuple of two integers: the first is the number of Python objects
+ expected to exist and the second is the number of native objects. Any type not in
+ the subdictionary has an implied value of (0, 0).
+
+ The key "ignore" has as its value a collection of object types that we should ignore
+ in this test. Ideally we should not use this, but currently there are a few cases where
+ we cannot reliably predict the number of objects in wgpu-native.
+
+ If the descriptor doesn't contain an "expected_counts_after_create", then the default
+ is {"FooBar": (n, n)}, where "FooBar" is derived from the name of the test.
+
+ If the descriptor doesn't contain an "expected_counts_after_release", then the
+ default is {}, indicated that creating and removing the objects should completely
+ clean itself up.
+ """
def core_test_func():
"""The core function that does the testing."""
diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py
index eb8dc3d5..d082fa3f 100644
--- a/wgpu/backends/wgpu_native/_api.py
+++ b/wgpu/backends/wgpu_native/_api.py
@@ -32,7 +32,6 @@
get_memoryview_from_address,
get_memoryview_and_address,
to_snake_case,
- to_camel_case,
ErrorHandler,
SafeLibCalls,
)
@@ -203,6 +202,70 @@ def check_struct(struct_name, d):
raise ValueError(f"Invalid keys in {struct_name}: {invalid_keys}")
+def _get_limits(id: int, device: bool = False, adapter: bool = False):
+ """Gets the limits for a device or an adapter"""
+ assert device + adapter == 1 # exactly one is set
+
+ # H: chain: WGPUChainedStructOut, limits: WGPUNativeLimits
+ c_supported_limits_extras = new_struct_p(
+ "WGPUSupportedLimitsExtras *",
+ # not used: chain
+ # not used: limits
+ )
+ c_supported_limits_extras.chain.sType = lib.WGPUSType_SupportedLimitsExtras
+ # H: nextInChain: WGPUChainedStructOut *, limits: WGPULimits
+ c_supported_limits = new_struct_p(
+ "WGPUSupportedLimits *",
+ nextInChain=ffi.cast("WGPUChainedStructOut *", c_supported_limits_extras),
+ # not used: limits
+ )
+ if adapter:
+ # H: WGPUBool f(WGPUAdapter adapter, WGPUSupportedLimits * limits)
+ libf.wgpuAdapterGetLimits(id, c_supported_limits)
+ else:
+ # H: WGPUBool f(WGPUDevice device, WGPUSupportedLimits * limits)
+ libf.wgpuDeviceGetLimits(id, c_supported_limits)
+
+ key_value_pairs = [
+ (to_snake_case(name, "-"), getattr(c_limits, name))
+ for c_limits in (c_supported_limits.limits, c_supported_limits_extras.limits)
+ for name in dir(c_limits)
+ ]
+ limits = dict(sorted(key_value_pairs))
+ return limits
+
+
+def _get_features(id: int, device: bool = False, adapter: bool = False):
+ """Gets the features for a device or an adapter"""
+ assert device + adapter == 1 # exactly one of them is set
+
+ if adapter:
+ # H: WGPUBool f(WGPUAdapter adapter, WGPUFeatureName feature)
+ has_feature = lambda feature: libf.wgpuAdapterHasFeature(id, feature) # noqa
+ else:
+ # H: WGPUBool f(WGPUDevice device, WGPUFeatureName feature)
+ has_feature = lambda feature: libf.wgpuDeviceHasFeature(id, feature) # noqa
+
+ features = set()
+
+ # Standard features
+ for f in sorted(enums.FeatureName):
+ if f in [
+ "clip-distances",
+ "dual-source-blending",
+ "texture-compression-bc-sliced-3d",
+ ]:
+ continue # not supported by wgpu-native yet
+ if has_feature(enummap[f"FeatureName.{f}"]):
+ features.add(f)
+
+ # Native features
+ for name, feature_id in enum_str2int["NativeFeature"].items():
+ if has_feature(feature_id):
+ features.add(name)
+ return features
+
+
error_handler = ErrorHandler(logger)
libf = SafeLibCalls(lib, error_handler)
@@ -367,44 +430,11 @@ def to_py_str(key):
# H: void f(WGPUAdapterInfo adapterInfo)
libf.wgpuAdapterInfoFreeMembers(c_info[0])
- # ----- Get adapter limits
-
- # H: nextInChain: WGPUChainedStructOut *, limits: WGPULimits
- c_supported_limits = new_struct_p(
- "WGPUSupportedLimits *",
- # not used: nextInChain
- # not used: limits
- )
- c_limits = c_supported_limits.limits
- # H: WGPUBool f(WGPUAdapter adapter, WGPUSupportedLimits * limits)
- libf.wgpuAdapterGetLimits(adapter_id, c_supported_limits)
- limits = {to_snake_case(k): getattr(c_limits, k) for k in sorted(dir(c_limits))}
-
- # ----- Get adapter features
-
- # WebGPU features
- features = set()
- for f in sorted(enums.FeatureName):
- if f in [
- "clip-distances",
- "dual-source-blending",
- "texture-compression-bc-sliced-3d",
- ]:
- continue # not supported by wgpu-native yet
- key = f"FeatureName.{f}"
- i = enummap[key]
- # H: WGPUBool f(WGPUAdapter adapter, WGPUFeatureName feature)
- if libf.wgpuAdapterHasFeature(adapter_id, i):
- features.add(f)
-
- # Native features
- for name, i in enum_str2int["NativeFeature"].items():
- # H: WGPUBool f(WGPUAdapter adapter, WGPUFeatureName feature)
- if libf.wgpuAdapterHasFeature(adapter_id, i):
- features.add(name)
+ # ----- Get adapter limits and features
+ limits = _get_limits(adapter_id, adapter=True)
+ features = _get_features(adapter_id, adapter=True)
# ----- Done
-
return GPUAdapter(adapter_id, features, limits, adapter_info)
@@ -815,6 +845,7 @@ def _request_device(
for f in required_features:
if isinstance(f, str):
f = f.replace("_", "-")
+ f = to_snake_case(f, "-")
i = enummap.get(f"FeatureName.{f}", None)
if i is None:
i = enum_str2int["NativeFeature"].get(f, None)
@@ -828,24 +859,54 @@ def _request_device(
# ----- Set limits
+ # H: chain: WGPUChainedStruct, limits: WGPUNativeLimits
+ c_required_limits_extras = new_struct_p(
+ "WGPURequiredLimitsExtras *",
+ # not used: chain
+ # not used: limits
+ )
+ c_required_limits_extras.chain.sType = lib.WGPUSType_RequiredLimitsExtras
# H: nextInChain: WGPUChainedStruct *, limits: WGPULimits
c_required_limits = new_struct_p(
"WGPURequiredLimits *",
- # not used: nextInChain
+ nextInChain=ffi.cast("WGPUChainedStruct*", c_required_limits_extras),
# not used: limits
)
c_limits = c_required_limits.limits
-
- # Set all limits to the adapter default
- # This is important, because zero does NOT mean default, and a limit of zero
- # for a specific limit may break a lot of applications.
- for key, val in self.limits.items():
- setattr(c_limits, to_camel_case(key), val)
-
- # Overload with any set limits
- required_limits = required_limits or {}
- for key, val in required_limits.items():
- setattr(c_limits, to_camel_case(key), val)
+ c_limits_extras = c_required_limits_extras.limits
+
+ def canonicalize_limit_name(name):
+ if name in self._limits:
+ return name
+ if "_" in name:
+ alt_name = name.replace("_", "-")
+ if alt_name in self._limits:
+ return alt_name
+ alt_name = to_snake_case(name, "-")
+ if alt_name in self._limits:
+ return alt_name
+ raise KeyError(f"Unknown limit name '{name}'")
+
+ if required_limits:
+ assert isinstance(required_limits, dict)
+ required_limits = {
+ canonicalize_limit_name(key): value
+ for key, value in required_limits.items()
+ }
+ else:
+ # If required_limits isn't set, set it to self._limits. This is the same as
+ # setting it to {}, but the loop below goes just a little bit faster.
+ required_limits = self._limits
+
+ for limit in (c_limits, c_limits_extras):
+ for key in dir(limit):
+ snake_key = to_snake_case(key, "-")
+ # Use the value in required_limits if it exists. Otherwise, the old value
+ try:
+ value = required_limits[snake_key]
+ except KeyError:
+ value = self._limits[snake_key]
+ setattr(limit, key, value)
# ---- Set queue descriptor
@@ -939,41 +1000,9 @@ def callback(status, result, message, userdata):
error_msg = error_msg or "Could not obtain new device id."
raise RuntimeError(error_msg)
- # ----- Get device limits
-
- # H: nextInChain: WGPUChainedStructOut *, limits: WGPULimits
- c_supported_limits = new_struct_p(
- "WGPUSupportedLimits *",
- # not used: nextInChain
- # not used: limits
- )
- c_limits = c_supported_limits.limits
- # H: WGPUBool f(WGPUDevice device, WGPUSupportedLimits * limits)
- libf.wgpuDeviceGetLimits(device_id, c_supported_limits)
- limits = {to_snake_case(k): getattr(c_limits, k) for k in dir(c_limits)}
-
- # ----- Get device features
-
- # WebGPU features
- features = set()
- for f in sorted(enums.FeatureName):
- if f in [
- "clip-distances",
- "dual-source-blending",
- "texture-compression-bc-sliced-3d",
- ]:
- continue # not supported by wgpu-native yet
- key = f"FeatureName.{f}"
- i = enummap[key]
- # H: WGPUBool f(WGPUDevice device, WGPUFeatureName feature)
- if libf.wgpuDeviceHasFeature(device_id, i):
- features.add(f)
-
- # Native features
- for name, i in enum_str2int["NativeFeature"].items():
- # H: WGPUBool f(WGPUDevice device, WGPUFeatureName feature)
- if libf.wgpuDeviceHasFeature(device_id, i):
- features.add(name)
+ # ----- Get device limits and features
+ limits = _get_limits(device_id, device=True)
+ features = _get_features(device_id, device=True)
# ---- Get queue
@@ -1353,16 +1382,43 @@ def create_bind_group(
def create_pipeline_layout(
self, *, label="", bind_group_layouts: "List[GPUBindGroupLayout]"
):
+ return self._create_pipeline_layout(label, bind_group_layouts, [])
+
+ def _create_pipeline_layout(self, label, bind_group_layouts, push_constant_layouts):
bind_group_layouts_ids = [x._internal for x in bind_group_layouts]
c_layout_array = ffi.new("WGPUBindGroupLayout []", bind_group_layouts_ids)
+ next_in_chain = ffi.NULL
+ if push_constant_layouts:
+ count = len(push_constant_layouts)
+ c_push_constant_ranges = ffi.new("WGPUPushConstantRange[]", count)
+ for layout, c_push_constant_range in zip(
+ push_constant_layouts, c_push_constant_ranges
+ ):
+ visibility = layout["visibility"]
+ if isinstance(visibility, str):
+ visibility = str_flag_to_int(flags.ShaderStage, visibility)
+ c_push_constant_range.stages = visibility
+ c_push_constant_range.start = layout["start"]
+ c_push_constant_range.end = layout["end"]
+
+ # H: chain: WGPUChainedStruct, pushConstantRangeCount: int, pushConstantRanges: WGPUPushConstantRange *
+ c_pipeline_layout_extras = new_struct_p(
+ "WGPUPipelineLayoutExtras *",
+ pushConstantRangeCount=count,
+ pushConstantRanges=c_push_constant_ranges,
+ # not used: chain
+ )
+ c_pipeline_layout_extras.chain.sType = lib.WGPUSType_PipelineLayoutExtras
+ next_in_chain = ffi.cast("WGPUChainedStruct *", c_pipeline_layout_extras)
+
# H: nextInChain: WGPUChainedStruct *, label: char *, bindGroupLayoutCount: int, bindGroupLayouts: WGPUBindGroupLayout *
struct = new_struct_p(
"WGPUPipelineLayoutDescriptor *",
label=to_c_label(label),
bindGroupLayouts=c_layout_array,
bindGroupLayoutCount=len(bind_group_layouts),
- # not used: nextInChain
+ nextInChain=next_in_chain,
)
# H: WGPUPipelineLayout f(WGPUDevice device, WGPUPipelineLayoutDescriptor const * descriptor)
@@ -1776,7 +1832,6 @@ def create_render_bundle_encoder(
self._internal, render_bundle_encoder_descriptor
)
return GPURenderBundleEncoder(label, render_bundle_id, self)
- # Note: also enable the coresponing memtest when implementing this!
def create_query_set(self, *, label="", type: "enums.QueryType", count: int):
# H: nextInChain: WGPUChainedStruct *, label: char *, type: WGPUQueryType, count: int
@@ -2751,6 +2806,7 @@ def finish(self, *, label=""):
)
# H: WGPUCommandBuffer f(WGPUCommandEncoder commandEncoder, WGPUCommandBufferDescriptor const * descriptor)
id = libf.wgpuCommandEncoderFinish(self._internal, struct)
+
return GPUCommandBuffer(label, id, self._device)
def resolve_query_set(
@@ -2912,6 +2968,35 @@ def end_occlusion_query(self):
# H: void f(WGPURenderPassEncoder renderPassEncoder)
libf.wgpuRenderPassEncoderEndOcclusionQuery(self._internal)
+ def _set_push_constants(self, visibility, offset, size_in_bytes, data, data_offset):
+ # Implementation of set_push_constant. The public API is in extras.py since
+ # this is a wgpu extension.
+
+ # We support anything that memoryview supports, i.e. anything
+ # that implements the buffer protocol, including, bytes,
+ # bytearray, ctypes arrays, numpy arrays, etc.
+ m, address = get_memoryview_and_address(data)
+
+ # Deal with offset and size
+ offset = int(offset)
+ data_offset = int(data_offset)
+ size = int(size_in_bytes)
+ if isinstance(visibility, str):
+ visibility = str_flag_to_int(flags.ShaderStage, visibility)
+
+ if not (0 <= size_in_bytes <= m.nbytes):
+ raise ValueError("Invalid size_in_bytes")
+ if not (0 <= size_in_bytes <= m.nbytes):
+ raise ValueError("Invalid data_offset")
+ if size_in_bytes + data_offset > m.nbytes:
+ raise ValueError("size_in_bytes + data_offset is too large")
+
+ c_data = ffi.cast("void *", address) # do we want to add data_offset?
+ # H: void f(WGPURenderPassEncoder encoder, WGPUShaderStageFlags stages, uint32_t offset, uint32_t sizeBytes, void const * data)
+ libf.wgpuRenderPassEncoderSetPushConstants(
+ self._internal, int(visibility), offset, size, c_data + data_offset
+ )
+
def _release(self):
if self._internal is not None and libf is not None:
self._internal, internal = None, self._internal
diff --git a/wgpu/backends/wgpu_native/_helpers.py b/wgpu/backends/wgpu_native/_helpers.py
index 43b58495..2c214dbe 100644
--- a/wgpu/backends/wgpu_native/_helpers.py
+++ b/wgpu/backends/wgpu_native/_helpers.py
@@ -202,15 +202,17 @@ def get_surface_id_from_canvas(canvas):
# The functions below are copied from codegen/utils.py
-def to_snake_case(name):
+def to_snake_case(name, separator="_"):
"""Convert a name from camelCase to snake_case. Names that already are
snake_case remain the same.
"""
name2 = ""
for c in name:
c2 = c.lower()
- if c2 != c and len(name2) > 0 and name2[-1] not in "_123":
- name2 += "_"
+ if c2 != c and len(name2) > 0:
+ prev = name2[-1]
+ if prev not in "123" and prev != separator:
+ name2 += separator
name2 += c2
return name2
diff --git a/wgpu/backends/wgpu_native/extras.py b/wgpu/backends/wgpu_native/extras.py
index 3f7306ce..b54d44fb 100644
--- a/wgpu/backends/wgpu_native/extras.py
+++ b/wgpu/backends/wgpu_native/extras.py
@@ -1,7 +1,7 @@
import os
-from ._api import structs, enums, Dict, logger
-
+from ._api import GPUBindGroupLayout, structs, enums, Dict, logger
+from typing import List
# NOTE: these functions represent backend-specific extra API.
# NOTE: changes to this module must be reflected in docs/backends.rst.
@@ -33,3 +33,32 @@ def request_device_tracing(
return adapter._request_device(
label, required_features, required_limits, default_queue, trace_path
)
+
+
+def create_pipeline_layout(
+ device,
+ *,
+ label="",
+ bind_group_layouts: "List[GPUBindGroupLayout]",
+ push_constant_layouts: "List[Dict]" = [],
+):
+ return device._create_pipeline_layout(
+ label, bind_group_layouts, push_constant_layouts
+ )
+
+
+def set_push_constants(
+ render_pass_encoder, visibility, offset, size_in_bytes, data, data_offset=0
+):
+ """
+ Set push-constant data for subsequent draw calls.
+
+ Writes the first size_in_bytes bytes of data to push-constant storage,
+ starting at the specified offset. These bytes are visible to the pipeline
+ stages indicated by the visibility argument.
+ """
+
+ # Actual implementation is hidden in _api.py
+ render_pass_encoder._set_push_constants(
+ visibility, offset, size_in_bytes, data, data_offset
+ )
diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md
index 7c72d1fc..a61155dc 100644
--- a/wgpu/resources/codegen_report.md
+++ b/wgpu/resources/codegen_report.md
@@ -20,7 +20,7 @@
* Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture
* Validated 37 classes, 112 methods, 45 properties
### Patching API for backends/wgpu_native/_api.py
-* Validated 37 classes, 112 methods, 0 properties
+* Validated 37 classes, 114 methods, 0 properties
## Validating backends/wgpu_native/_api.py
* Enum field FeatureName.texture-compression-bc-sliced-3d missing in wgpu.h
* Enum field FeatureName.clip-distances missing in wgpu.h
@@ -35,6 +35,6 @@
* Enum CanvasAlphaMode missing in wgpu.h
* Enum CanvasToneMappingMode missing in wgpu.h
* Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py
-* Validated 132 C function calls
-* Not using 73 C functions
-* Validated 78 C structs
+* Validated 131 C function calls
+* Not using 72 C functions
+* Validated 80 C structs