diff --git a/.deny.toml b/.deny.toml index a8b6db974e..7e000d6f82 100644 --- a/.deny.toml +++ b/.deny.toml @@ -2,12 +2,15 @@ multiple-versions = "deny" skip-tree = [ { name = "windows-sys", version = "0.45" }, - { name = "winit", version = "0.27.5" }, + { name = "winit", version = "0.27" }, + { name = "winit", version = "0.29" }, { name = "rustc_version", version = "0.2.3" }, { name = "sourcemap", version = "7.1.1" }, ] skip = [ { name = "hlsl-snapshots", version = "0.1.0" }, + # Strum uses an old version + { name = "heck", version = "0.4.0" }, ] wildcards = "deny" allow-wildcard-paths = true @@ -20,6 +23,7 @@ allow = [ "BSD-3-Clause", "CC0-1.0", "ISC", + "MPL-2.0", "MIT", "MIT-0", "Unicode-DFS-2016", diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000..1d953f4bd7 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 972d02caff..dba0cd1228 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,6 +78,7 @@ jobs: # runtime is normally 2-8 minutes # # currently high due to documentation time problems on mac. + # https://github.com/rust-lang/rust/issues/114891 timeout-minutes: 30 strategy: @@ -229,6 +230,14 @@ jobs: # build docs cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps + - name: check private item docs + if: matrix.kind == 'native' + shell: bash + run: | + set -e + + # wgpu_core package + cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps --package wgpu-core --package wgpu-hal --document-private-items # We run minimal checks on the MSRV of the core crates, ensuring that # its dependency tree does not cause issues for firefox. @@ -614,7 +623,7 @@ jobs: cargo fmt --manifest-path xtask/Cargo.toml -- --check - name: Check for typos - uses: crate-ci/typos@v1.20.8 + uses: crate-ci/typos@v1.20.10 check-cts-runner: # runtime is normally 2 minutes diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ceecbb703f..9017220fe5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -41,7 +41,7 @@ jobs: if: ${{ failure() }} - name: Deploy the docs - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.6.0 if: github.ref == 'refs/heads/trunk' with: token: ${{ secrets.WEB_DEPLOY }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6dfed56f6a..258c788a4e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -41,7 +41,7 @@ jobs: run: cargo xtask run-wasm --no-serve - name: Deploy WebGPU examples - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.6.0 if: github.ref == 'refs/heads/trunk' with: token: ${{ secrets.WEB_DEPLOY }} diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7c17a6b9..538546e4c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,100 +41,174 @@ Bottom level categories: ### Major Changes -### Documentation +#### Querying shader compilation errors + +Wgpu now supports querying [shader compilation info](https://www.w3.org/TR/webgpu/#dom-gpushadermodule-getcompilationinfo). + +This allows you to get more structured information about compilation errors, warnings and info: +```rust +... +let lighting_shader = ctx.device.create_shader_module(include_wgsl!("lighting.wgsl")); +let compilation_info = lighting_shader.get_compilation_info().await; +for message in compilation_info + .messages + .iter() + .filter(|m| m.message_type == wgpu::CompilationMessageType::Error) +{ + let line = message.location.map(|l| l.line_number).unwrap_or(1); + println!("Compile error at line {line}"); +} +``` + +By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410) + -- Add mention of primitive restart in the description of `PrimitiveState::strip_index_format`. By @cpsdqs in [#5350](https://github.com/gfx-rs/wgpu/pull/5350) -- Document precise behaviour of `SourceLocation`. By @stefnotch in [#5386](https://github.com/gfx-rs/wgpu/pull/5386) -- Give short example of WGSL `push_constant` syntax. By @waywardmonkeys in [#5393](https://github.com/gfx-rs/wgpu/pull/5393) ### New features #### General -- Implemented the `Unorm10_10_10_2` VertexFormat. -- Many numeric built-ins have had a constant evaluation implementation added for them, which allows them to be used in a `const` context: - - [#4879](https://github.com/gfx-rs/wgpu/pull/4879) by @ErichDonGubler: - - `abs` - - `acos` - - `acosh` - - `asin` - - `asinh` - - `atan` - - `atanh` - - `cos` - - `cosh` - - `round` - - `saturate` - - `sin` - - `sinh` - - `sqrt` - - `step` - - `tan` - - `tanh` - - [#5098](https://github.com/gfx-rs/wgpu/pull/5098) by @ErichDonGubler: - - `ceil` - - `countLeadingZeros` - - `countOneBits` - - `countTrailingZeros` - - `degrees` - - `exp` - - `exp2` - - `floor` - - `fract` - - `fma` - - `inverseSqrt` - - `log` - - `log2` - - `max` - - `min` - - `radians` - - `reverseBits` - - `sign` - - `trunc` -- Eager release of GPU resources comes from device.trackers. By @bradwerth in [#5075](https://github.com/gfx-rs/wgpu/pull/5075) +#### Naga + +### Bug Fixes + +## v0.20.0 (2024-04-28) + +### Major Changes + +#### Pipeline overridable constants + +Wgpu supports now [pipeline-overridable constants](https://www.w3.org/TR/webgpu/#dom-gpuprogrammablestage-constants) + +This allows you to define constants in wgsl like this: +```rust +override some_factor: f32 = 42.1337; // Specifies a default of 42.1337 if it's not set. +``` +And then set them at runtime like so on your pipeline consuming this shader: +```rust +// ... +fragment: Some(wgpu::FragmentState { + compilation_options: wgpu::PipelineCompilationOptions { + constants: &[("some_factor".to_owned(), 0.1234)].into(), // Sets `some_factor` to 0.1234. + ..Default::default() + }, + // ... +}), +// ... +``` + +By @teoxoy & @jimblandy in [#5500](https://github.com/gfx-rs/wgpu/pull/5500) + +#### Changed feature requirements for timestamps + +Due to a specification change `write_timestamp` is no longer supported on WebGPU. +`wgpu::CommandEncoder::write_timestamp` requires now the new `wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS` feature which is available on all native backends but not on WebGPU. + +By @wumpf in [#5188](https://github.com/gfx-rs/wgpu/pull/5188) + + +#### Wgsl const evaluation for many more built-ins + +Many numeric built-ins have had a constant evaluation implementation added for them, which allows them to be used in a `const` context: + +`abs`, `acos`, `acosh`, `asin`, `asinh`, `atan`, `atanh`, `cos`, `cosh`, `round`, `saturate`, `sin`, `sinh`, `sqrt`, `step`, `tan`, `tanh`, `ceil`, `countLeadingZeros`, `countOneBits`, `countTrailingZeros`, `degrees`, `exp`, `exp2`, `floor`, `fract`, `fma`, `inverseSqrt`, `log`, `log2`, `max`, `min`, `radians`, `reverseBits`, `sign`, `trunc` + +By @ErichDonGubler in [#4879](https://github.com/gfx-rs/wgpu/pull/4879), [#5098](https://github.com/gfx-rs/wgpu/pull/5098) + +#### New **native-only** wgsl features + +##### Subgroup operations + +The following subgroup operations are available in wgsl now: + +`subgroupBallot`, `subgroupAll`, `subgroupAny`, `subgroupAdd`, `subgroupMul`, `subgroupMin`, `subgroupMax`, `subgroupAnd`, `subgroupOr`, `subgroupXor`, `subgroupExclusiveAdd`, `subgroupExclusiveMul`, `subgroupInclusiveAdd`, `subgroupInclusiveMul`, `subgroupBroadcastFirst`, `subgroupBroadcast`, `subgroupShuffle`, `subgroupShuffleDown`, `subgroupShuffleUp`, `subgroupShuffleXor` + + +Availability is governed by the following feature flags: +* `wgpu::Features::SUBGROUP` for all operations except `subgroupBarrier` in fragment & compute, supported on Vulkan, DX12 and Metal. +* `wgpu::Features::SUBGROUP_VERTEX`, for all operations except `subgroupBarrier` general operations in vertex shaders, supported on Vulkan +* `wgpu::Features::SUBGROUP_BARRIER`, for support of the `subgroupBarrier` operation, supported on Vulkan & Metal + +Note that there currently [some differences](https://github.com/gfx-rs/wgpu/issues/5555) between wgpu's native-only implementation and the [open WebGPU proposal](https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md). + +By @exrook and @lichtso in [#5301](https://github.com/gfx-rs/wgpu/pull/5301) + +##### Signed and unsigned 64 bit integer support in shaders. + +`wgpu::Features::SHADER_INT64` enables 64 bit integer signed and unsigned integer variables in wgsl (`i64` and `u64` respectively). +Supported on Vulkan, DX12 (requires DXC) and Metal (with MSL 2.3+ support). + +By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154) + +### New features + +#### General + +- Implemented the `Unorm10_10_10_2` VertexFormat by @McMackety in [#5477](https://github.com/gfx-rs/wgpu/pull/5477) - `wgpu-types`'s `trace` and `replay` features have been replaced by the `serde` feature. By @KirmesBude in [#5149](https://github.com/gfx-rs/wgpu/pull/5149) - `wgpu-core`'s `serial-pass` feature has been removed. Use `serde` instead. By @KirmesBude in [#5149](https://github.com/gfx-rs/wgpu/pull/5149) -- Added `InstanceFlags::GPU_BASED_VALIDATION`, which enables GPU-based validation for shaders. This is currently only supported on the DX12 and Vulkan backends; other platforms ignore this flag, for now. +- Added `InstanceFlags::GPU_BASED_VALIDATION`, which enables GPU-based validation for shaders. This is currently only supported on the DX12 and Vulkan backends; other platforms ignore this flag, for now. By @ErichDonGubler in [#5146](https://github.com/gfx-rs/wgpu/pull/5146), [#5046](https://github.com/gfx-rs/wgpu/pull/5046). - When set, this flag implies `InstanceFlags::VALIDATION`. - This has been added to the set of flags set by `InstanceFlags::advanced_debugging`. Since the overhead is potentially very large, the flag is not enabled by default in debug builds when using `InstanceFlags::from_build_config`. - As with other instance flags, this flag can be changed in calls to `InstanceFlags::with_env` with the new `WGPU_GPU_BASED_VALIDATION` environment variable. - - By @ErichDonGubler in [#5146](https://github.com/gfx-rs/wgpu/pull/5146), [#5046](https://github.com/gfx-rs/wgpu/pull/5046). -- Signed and unsigned 64 bit integer support in shaders. By @rodolphito and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154) - `wgpu::Instance` can now report which `wgpu::Backends` are available based on the build configuration. By @wumpf [#5167](https://github.com/gfx-rs/wgpu/pull/5167) - ```diff -wgpu::Instance::any_backend_feature_enabled() +!wgpu::Instance::enabled_backend_features().is_empty() ``` - -- `wgpu::CommandEncoder::write_timestamp` requires now the new `wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS` feature which is available on all native backends but not on WebGPU (due to a spec change `write_timestamp` is no longer supported on WebGPU). By @wumpf in [#5188](https://github.com/gfx-rs/wgpu/pull/5188) - Breaking change: [`wgpu_core::pipeline::ProgrammableStageDescriptor`](https://docs.rs/wgpu-core/latest/wgpu_core/pipeline/struct.ProgrammableStageDescriptor.html#structfield.entry_point) is now optional. By @ErichDonGubler in [#5305](https://github.com/gfx-rs/wgpu/pull/5305). - `Features::downlevel{_webgl2,}_features` was made const by @MultisampledNight in [#5343](https://github.com/gfx-rs/wgpu/pull/5343) - +- Breaking change: [`wgpu_core::pipeline::ShaderError`](https://docs.rs/wgpu-core/latest/wgpu_core/pipeline/struct.ShaderError.html) has been moved to `naga`. By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410) - More as_hal methods and improvements by @JMS55 in [#5452](https://github.com/gfx-rs/wgpu/pull/5452) - Added `wgpu::CommandEncoder::as_hal_mut` - Added `wgpu::TextureView::as_hal` - `wgpu::Texture::as_hal` now returns a user-defined type to match the other as_hal functions -- Added support for pipeline-overridable constants. By @teoxoy & @jimblandy in [#5500](https://github.com/gfx-rs/wgpu/pull/5500) - -#### GLES - -- Log an error when GLES texture format heuristics fail. By @PolyMeilex in [#5266](https://github.com/gfx-rs/wgpu/issues/5266) -- Cache the sample count to keep `get_texture_format_features` cheap. By @Dinnerbone in [#5346](https://github.com/gfx-rs/wgpu/pull/5346) -- Mark `DEPTH32FLOAT_STENCIL8` as supported in GLES. By @Dinnerbone in [#5370](https://github.com/gfx-rs/wgpu/pull/5370) - #### Naga - Allow user to select which MSL version to use via `--metal-version` with Naga CLI. By @pcleavelin in [#5392](https://github.com/gfx-rs/wgpu/pull/5392) - Support `arrayLength` for runtime-sized arrays inside binding arrays (for WGSL input and SPIR-V output). By @kvark in [#5428](https://github.com/gfx-rs/wgpu/pull/5428) +- Added `--shader-stage` and `--input-kind` options to naga-cli for specifying vertex/fragment/compute shaders, and frontend. by @ratmice in [#5411](https://github.com/gfx-rs/wgpu/pull/5411) +- Added a `create_validator` function to wgpu_core `Device` to create naga `Validator`s. By @atlv24 [#5606](https://github.com/gfx-rs/wgpu/pull/5606) #### WebGPU - Implement the `device_set_device_lost_callback` method for `ContextWebGpu`. By @suti in [#5438](https://github.com/gfx-rs/wgpu/pull/5438) - Add support for storage texture access modes `ReadOnly` and `ReadWrite`. By @JolifantoBambla in [#5434](https://github.com/gfx-rs/wgpu/pull/5434) +#### GLES / OpenGL + +- Log an error when GLES texture format heuristics fail. By @PolyMeilex in [#5266](https://github.com/gfx-rs/wgpu/issues/5266) +- Cache the sample count to keep `get_texture_format_features` cheap. By @Dinnerbone in [#5346](https://github.com/gfx-rs/wgpu/pull/5346) +- Mark `DEPTH32FLOAT_STENCIL8` as supported in GLES. By @Dinnerbone in [#5370](https://github.com/gfx-rs/wgpu/pull/5370) +- Desktop GL now also supports `TEXTURE_COMPRESSION_ETC2`. By @Valaphee in [#5568](https://github.com/gfx-rs/wgpu/pull/5568) +- Don't create a program for shader-clearing if that workaround isn't required. By @Dinnerbone in [#5348](https://github.com/gfx-rs/wgpu/pull/5348). +- OpenGL will now be preferred over OpenGL ES on EGL, making it consistent with WGL. By @valaphee in [#5482](https://github.com/gfx-rs/wgpu/pull/5482) +- Fill out `driver` and `driver_info`, with the OpenGL flavor and version, similar to Vulkan. By @valaphee in [#5482](https://github.com/gfx-rs/wgpu/pull/5482) + +#### Metal + +- Metal 3.0 and 3.1 detection. By @atlv24 in [#5497](https://github.com/gfx-rs/wgpu/pull/5497) + +#### DX12 + +- Shader Model 6.1-6.7 detection. By @atlv24 in [#5498](https://github.com/gfx-rs/wgpu/pull/5498) + +### Other performance improvements + +- Simplify and speed up the allocation of internal IDs. By @nical in [#5229](https://github.com/gfx-rs/wgpu/pull/5229) +- Use memory pooling for UsageScopes to avoid frequent large allocations. by @robtfm in [#5414](https://github.com/gfx-rs/wgpu/pull/5414) +- Eager release of GPU resources comes from device.trackers. By @bradwerth in [#5075](https://github.com/gfx-rs/wgpu/pull/5075) +- Support disabling zero-initialization of workgroup local memory in compute shaders. By @DJMcNab in [#5508](https://github.com/gfx-rs/wgpu/pull/5508) + +### Documentation + +- Improved `wgpu_hal` documentation. By @jimblandy in [#5516](https://github.com/gfx-rs/wgpu/pull/5516), [#5524](https://github.com/gfx-rs/wgpu/pull/5524), [#5562](https://github.com/gfx-rs/wgpu/pull/5562), [#5563](https://github.com/gfx-rs/wgpu/pull/5563), [#5566](https://github.com/gfx-rs/wgpu/pull/5566), [#5617](https://github.com/gfx-rs/wgpu/pull/5617), [#5618](https://github.com/gfx-rs/wgpu/pull/5618) +- Add mention of primitive restart in the description of `PrimitiveState::strip_index_format`. By @cpsdqs in [#5350](https://github.com/gfx-rs/wgpu/pull/5350) +- Document and tweak precise behaviour of `SourceLocation`. By @stefnotch in [#5386](https://github.com/gfx-rs/wgpu/pull/5386) and [#5410](https://github.com/gfx-rs/wgpu/pull/5410) +- Give short example of WGSL `push_constant` syntax. By @waywardmonkeys in [#5393](https://github.com/gfx-rs/wgpu/pull/5393) +- Fix incorrect documentation of `Limits::max_compute_workgroup_storage_size` default value. By @atlv24 in [#5601](https://github.com/gfx-rs/wgpu/pull/5601) + ### Bug Fixes #### General @@ -143,48 +217,54 @@ Bottom level categories: - Fix panic when creating a surface while no backend is available. By @wumpf [#5166](https://github.com/gfx-rs/wgpu/pull/5166) - Correctly compute minimum buffer size for array-typed `storage` and `uniform` vars. By @jimblandy [#5222](https://github.com/gfx-rs/wgpu/pull/5222) - Fix timeout when presenting a surface where no work has been done. By @waywardmonkeys in [#5200](https://github.com/gfx-rs/wgpu/pull/5200) -- Simplify and speed up the allocation of internal IDs. By @nical in [#5229](https://github.com/gfx-rs/wgpu/pull/5229) -- Fix behavior of `extractBits` and `insertBits` when `offset + count` overflows the bit width. By @cwfitzgerald in [#5305](https://github.com/gfx-rs/wgpu/pull/5305) - Fix registry leaks with de-duplicated resources. By @nical in [#5244](https://github.com/gfx-rs/wgpu/pull/5244) -- Fix behavior of integer `clamp` when `min` argument > `max` argument. By @cwfitzgerald in [#5300](https://github.com/gfx-rs/wgpu/pull/5300). - Fix linking when targeting android. By @ashdnazg in [#5326](https://github.com/gfx-rs/wgpu/pull/5326). -- fix resource leak for buffers/textures dropped while having pending writes. By @robtfm in [#5413](https://github.com/gfx-rs/wgpu/pull/5413) - Failing to set the device lost closure will call the closure before returning. By @bradwerth in [#5358](https://github.com/gfx-rs/wgpu/pull/5358). -- Use memory pooling for UsageScopes to avoid frequent large allocations. by @robtfm in [#5414](https://github.com/gfx-rs/wgpu/pull/5414) - Fix deadlocks caused by recursive read-write lock acquisitions [#5426](https://github.com/gfx-rs/wgpu/pull/5426). +- Remove exposed C symbols (`extern "C"` + [no_mangle]) from RenderPass & ComputePass recording. By @wumpf in [#5409](https://github.com/gfx-rs/wgpu/pull/5409). +- Fix surfaces being only compatible with first backend enabled on an instance, causing failures when manually specifying an adapter. By @Wumpf in [#5535](https://github.com/gfx-rs/wgpu/pull/5535). #### Naga -- In spv-in, remove unnecessary "gl_PerVertex" name check so unused builtins will always be skipped. By @Imberflur in [#5227](https://github.com/gfx-rs/wgpu/pull/5227). -- GLSL 410 does not support layout(binding = ...), enable only for GLSL 420. By @bes in [#5357](https://github.com/gfx-rs/wgpu/pull/5357) + +- In spv-in, remove unnecessary "gl_PerVertex" name check so unused builtins will always be skipped. Prevents validation errors caused by capability requirements of these builtins [#4915](https://github.com/gfx-rs/wgpu/issues/4915). By @Imberflur in [#5227](https://github.com/gfx-rs/wgpu/pull/5227). - In spv-out, check for acceleration and ray-query types when enabling ray-query extension to prevent validation error. By @Vecvec in [#5463](https://github.com/gfx-rs/wgpu/pull/5463) - Add a limit for curly brace nesting in WGSL parsing, plus a note about stack size requirements. By @ErichDonGubler in [#5447](https://github.com/gfx-rs/wgpu/pull/5447). +- In hlsl-out, fix accesses on zero value expressions by generating helper functions for `Expression::ZeroValue`. By @Imberflur in [#5587](https://github.com/gfx-rs/wgpu/pull/5587). +- Fix behavior of `extractBits` and `insertBits` when `offset + count` overflows the bit width. By @cwfitzgerald in [#5305](https://github.com/gfx-rs/wgpu/pull/5305) +- Fix behavior of integer `clamp` when `min` argument > `max` argument. By @cwfitzgerald in [#5300](https://github.com/gfx-rs/wgpu/pull/5300). +- Fix `TypeInner::scalar_width` to be consistent with the rest of the codebase and return values in bytes not bits. By @atlv24 in [#5532](https://github.com/gfx-rs/wgpu/pull/5532). -#### Tests - -- Fix intermittent crashes on Linux in the `multithreaded_compute` test. By @jimblandy in [#5129](https://github.com/gfx-rs/wgpu/pull/5129). -- Refactor tests to read feature flags by name instead of a hardcoded hexadecimal u64. By @rodolphito in [#5155](https://github.com/gfx-rs/wgpu/pull/5155). -- Add test that verifies that we can drop the queue before using the device to create a command encoder. By @Davidster in [#5211](https://github.com/gfx-rs/wgpu/pull/5211) - -#### GLES +#### GLES / OpenGL +- GLSL 410 does not support layout(binding = ...), enable only for GLSL 420. By @bes in [#5357](https://github.com/gfx-rs/wgpu/pull/5357) - Fixes for being able to use an OpenGL 4.1 core context provided by macOS with wgpu. By @bes in [#5331](https://github.com/gfx-rs/wgpu/pull/5331). -- Don't create a program for shader-clearing if that workaround isn't required. By @Dinnerbone in [#5348](https://github.com/gfx-rs/wgpu/pull/5348). - Fix crash when holding multiple devices on wayland/surfaceless. By @ashdnazg in [#5351](https://github.com/gfx-rs/wgpu/pull/5351). -- Don't depend on bind group and bind group layout entry order in HAL. This caused incorrect severely incorrect command execution and, in some cases, crashes. By @ErichDonGubler in [#5421](https://github.com/gfx-rs/wgpu/pull/5421). +- Fix `first_instance` getting ignored in draw indexed when `ARB_shader_draw_parameters` feature is present and `base_vertex` is 0. By @valaphee in [#5482](https://github.com/gfx-rs/wgpu/pull/5482) #### Vulkan - Set object labels when the DEBUG flag is set, even if the VALIDATION flag is disabled. By @DJMcNab in [#5345](https://github.com/gfx-rs/wgpu/pull/5345). +- Add safety check to `wgpu_hal::vulkan::CommandEncoder` to make sure `discard_encoding` is not called in the closed state. By @villuna in [#5557](https://github.com/gfx-rs/wgpu/pull/5557) +- Fix SPIR-V type capability requests to not depend on `LocalType` caching. By @atlv24 in [#5590](https://github.com/gfx-rs/wgpu/pull/5590) -#### Metal +#### Tests -- Don't depend on bind group and bind group layout entry order in HAL. This caused incorrect severely incorrect command execution and, in some cases, crashes. By @ErichDonGubler in [#5421](https://github.com/gfx-rs/wgpu/pull/5421). -- Metal 3.0 and 3.1 detection. By @atlv24 in [#5497](https://github.com/gfx-rs/wgpu/pull/5497) +- Fix intermittent crashes on Linux in the `multithreaded_compute` test. By @jimblandy in [#5129](https://github.com/gfx-rs/wgpu/pull/5129). +- Refactor tests to read feature flags by name instead of a hardcoded hexadecimal u64. By @atlv24 in [#5155](https://github.com/gfx-rs/wgpu/pull/5155). +- Add test that verifies that we can drop the queue before using the device to create a command encoder. By @Davidster in [#5211](https://github.com/gfx-rs/wgpu/pull/5211) -#### DX12 +## v0.19.4 (2024-04-17) + +### Bug Fixes -- Don't depend on bind group and bind group layout entry order in HAL. This caused incorrect severely incorrect command execution and, in some cases, crashes. By @ErichDonGubler in [#5421](https://github.com/gfx-rs/wgpu/pull/5421). -- Shader Model 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, and 6.7 detection. By @atlv24 in [#5498](https://github.com/gfx-rs/wgpu/pull/5498) +#### General + +- Don't depend on bind group and bind group layout entry order in backends. This caused incorrect severely incorrect command execution and, in some cases, crashes. By @ErichDonGubler in [#5421](https://github.com/gfx-rs/wgpu/pull/5421). +- Properly clean up all write_buffer/texture temporary resources. By @robtfm in [#5413](https://github.com/gfx-rs/wgpu/pull/5413). +- Fix deadlock in certain situations when mapping buffers using `wgpu-profiler`. By @cwfitzgerald in [#5517](https://github.com/gfx-rs/wgpu/pull/5517) + +#### WebGPU +- Correctly pass through timestamp queries to WebGPU. By @cwfitzgerald in [#5527](https://github.com/gfx-rs/wgpu/pull/5527). ## v0.19.3 (2024-03-01) diff --git a/Cargo.lock b/Cargo.lock index 71bc0211e7..9d2c2baa56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "ab_glyph" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e08104bebc65a46f8bc7aa733d39ea6874bfa7156f41a46b805785e3af1587d" +checksum = "6f90148830dac590fac7ccfe78ec4a8ea404c60f75a24e16407a71f0f40de775" dependencies = [ "ab_glyph_rasterizer", "owned_ttf_parser", @@ -57,9 +57,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" [[package]] name = "android-activity" @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" [[package]] name = "arbitrary" @@ -185,7 +185,7 @@ dependencies = [ "argh_shared", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -235,13 +235,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -384,7 +384,7 @@ checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -447,12 +447,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.92" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" +checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" dependencies = [ "jobserver", "libc", + "once_cell", ] [[package]] @@ -511,9 +512,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.0" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80c21025abd42669a92efc996ef13cfb2c5c627858421ea58d5c3b331a6c134f" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", @@ -521,9 +522,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.0" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458bf1f341769dfcf849846f65dffdf9146daa56bcd2a47cb4e1de9915567c99" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstream", "anstyle", @@ -533,14 +534,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.0" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -659,9 +660,9 @@ dependencies = [ [[package]] name = "combine" -version = "4.6.6" +version = "4.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" dependencies = [ "bytes", "memchr", @@ -669,9 +670,9 @@ dependencies = [ [[package]] name = "concurrent-queue" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ "crossbeam-utils", ] @@ -880,12 +881,12 @@ checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "ctor" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" +checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -916,7 +917,7 @@ checksum = "96a6ac251f4a2aca6b3f91340350eab87ae57c3f127ffeb585e92bd336717991" [[package]] name = "d3d12" -version = "0.19.0" +version = "0.20.0" dependencies = [ "bitflags 2.5.0", "libloading 0.8.3", @@ -960,9 +961,9 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" +checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "debugid" @@ -1032,15 +1033,15 @@ dependencies = [ "quote", "strum", "strum_macros", - "syn 2.0.58", + "syn 2.0.60", "thiserror", ] [[package]] name = "deno_unsync" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30dff7e03584dbae188dae96a0f1876740054809b2ad0cf7c9fc5d361f20e739" +checksum = "e3d79c7af81e0a5ac75cff7b2fff4d1896e2bff694c688258edf21ef8a519736" dependencies = [ "tokio", ] @@ -1080,7 +1081,7 @@ name = "deno_webgpu" version = "0.110.0" dependencies = [ "deno_core", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "serde", "tokio", "wgpu-core", @@ -1105,7 +1106,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1173,15 +1174,15 @@ dependencies = [ [[package]] name = "either" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "encase" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ed933078d2e659745df651f4c180511cd582e5b9414ff896e7d50d207e3103" +checksum = "5a9299a95fa5671ddf29ecc22b00e121843a65cb9ff24911e394b4ae556baf36" dependencies = [ "const_panic", "encase_derive", @@ -1191,22 +1192,22 @@ dependencies = [ [[package]] name = "encase_derive" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4ce1449c7d19eba6cc0abd231150ad81620a8dce29601d7f8d236e5d431d72a" +checksum = "07e09decb3beb1fe2db6940f598957b2e1f7df6206a804d438ff6cb2a9cddc10" dependencies = [ "encase_derive_impl", ] [[package]] name = "encase_derive_impl" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92959a9e8d13eaa13b8ae8c7b583c3bf1669ca7a8e7708a088d12587ba86effc" +checksum = "fd31dbbd9743684d339f907a87fe212cb7b51d75b9e8e74181fe363199ee9b47" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1269,9 +1270,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "fdeflate" @@ -1299,9 +1300,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "4556222738635b7a3417ae6130d8f52201e45a0c4d1a907f0826383adb5f85e7" dependencies = [ "crc32fast", "miniz_oxide", @@ -1352,7 +1353,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1477,7 +1478,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1552,9 +1553,9 @@ dependencies = [ [[package]] name = "glam" -version = "0.25.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "151665d9be52f9bb40fc7966565d39666f2d1e69233571b71b87791c7e0528b3" +checksum = "9e05e7e6723e3455f4818c7b26e855439f7546cf617ef669d1adedb8669e5cb9" [[package]] name = "glow" @@ -1676,9 +1677,9 @@ dependencies = [ [[package]] name = "gpu-descriptor" -version = "0.2.4" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc11df1ace8e7e564511f53af41f3e42ddc95b56fd07b3f4445d2a6048bc682c" +checksum = "9c08c1f623a8d0b722b8b99f821eb0ba672a1618f0d3b16ddbee1cedd2dd8557" dependencies = [ "bitflags 2.5.0", "gpu-descriptor-types", @@ -1687,9 +1688,9 @@ dependencies = [ [[package]] name = "gpu-descriptor-types" -version = "0.1.2" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf0b36e6f090b7e1d8a4b49c0cb81c1f8376f72198c65dd3ad9ff3556b8b78c" +checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" dependencies = [ "bitflags 2.5.0", ] @@ -1715,9 +1716,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", @@ -1744,6 +1745,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.3.9" @@ -1899,9 +1906,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "jobserver" -version = "0.1.28" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ "libc", ] @@ -1981,7 +1988,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -2020,9 +2027,9 @@ checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -2087,8 +2094,9 @@ dependencies = [ [[package]] name = "metal" -version = "0.27.0" -source = "git+https://github.com/gfx-rs/metal-rs?rev=ff8fd3d6dc7792852f8a015458d7e6d42d7fb352#ff8fd3d6dc7792852f8a015458d7e6d42d7fb352" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5637e166ea14be6063a3f8ba5ccb9a4159df7d8f6d61c02fc3d480b1f90dcfcb" dependencies = [ "bitflags 2.5.0", "block", @@ -2123,7 +2131,7 @@ dependencies = [ [[package]] name = "naga" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arbitrary", "arrayvec 0.7.4", @@ -2153,8 +2161,9 @@ dependencies = [ [[package]] name = "naga-cli" -version = "0.19.0" +version = "0.20.0" dependencies = [ + "anyhow", "argh", "bincode", "codespan-reporting", @@ -2221,7 +2230,7 @@ dependencies = [ "log", "ndk-sys 0.5.0+25.2.9519653", "num_enum 0.7.2", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "thiserror", ] @@ -2392,7 +2401,7 @@ dependencies = [ "proc-macro-crate 3.1.0", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2430,9 +2439,9 @@ dependencies = [ [[package]] name = "objc-sys" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c71324e4180d0899963fc83d9d241ac39e699609fc1025a850aadac8257459" +checksum = "da284c198fb9b7b0603f8635185e85fbd5b64ee154b1ed406d489077de2d6d60" [[package]] name = "objc2" @@ -2518,9 +2527,9 @@ checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" dependencies = [ "lock_api", "parking_lot_core", @@ -2528,18 +2537,18 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "backtrace", "cfg-if", "libc", "petgraph", - "redox_syscall 0.4.1", + "redox_syscall 0.5.1", "smallvec", "thread-id", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -2587,7 +2596,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2610,11 +2619,11 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "player" -version = "0.19.3" +version = "0.20.0" dependencies = [ "env_logger", "log", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "ron", "serde", "wgpu-core", @@ -2665,9 +2674,9 @@ dependencies = [ [[package]] name = "polling" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c976a60b2d7e99d6f229e414670a9b85d13ac305cc6d1e9c134de58c5aaaf6" +checksum = "645493cf344456ef24219d02a768cf1fb92ddf8c92161679ae3d91b91a637be3" dependencies = [ "cfg-if", "concurrent-queue", @@ -2726,7 +2735,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f" dependencies = [ "proc-macro-rules-macros", "proc-macro2", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2738,14 +2747,14 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] @@ -2767,9 +2776,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -2821,9 +2830,9 @@ checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" [[package]] name = "raw-window-handle" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a9830a0e1b9fb145ebb365b8bc4ccd75f290f98c0247deafbbe2c75cefb544" +checksum = "8cc3bcbdb1ddfc11e700e62968e6b4cc9c75bb466464ad28fb61c5b2c964418b" [[package]] name = "rayon" @@ -2863,6 +2872,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] + [[package]] name = "regex" version = "1.10.4" @@ -2951,9 +2969,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.5.0", "errno", @@ -3052,29 +3070,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.199" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "0c9f6e76df036c77cd94996771fb40db98187f096dd0b9af39c6c6e452ba966a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.199" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "11bd257a6541e141e42ca6d24ae26f7714887b47e89aa739099104c7e4d3b7fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "indexmap", "itoa", @@ -3129,9 +3147,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ "libc", ] @@ -3321,11 +3339,11 @@ version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3341,9 +3359,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.58" +version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", @@ -3361,22 +3379,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3500,7 +3518,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3629,9 +3647,9 @@ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" [[package]] name = "unicode-xid" @@ -3750,7 +3768,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-shared", ] @@ -3784,7 +3802,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3817,7 +3835,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -4034,7 +4052,7 @@ dependencies = [ [[package]] name = "wgpu" -version = "0.19.3" +version = "0.20.0" dependencies = [ "arrayvec 0.7.4", "cfg-if", @@ -4045,7 +4063,7 @@ dependencies = [ "naga", "parking_lot", "profiling", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "serde", "smallvec", "static_assertions", @@ -4059,14 +4077,13 @@ dependencies = [ [[package]] name = "wgpu-core" -version = "0.19.3" +version = "0.20.0" dependencies = [ "arrayvec 0.7.4", "bit-vec", "bitflags 2.5.0", "bytemuck", "cfg_aliases", - "codespan-reporting", "document-features", "indexmap", "log", @@ -4074,7 +4091,7 @@ dependencies = [ "once_cell", "parking_lot", "profiling", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "ron", "rustc-hash", "serde", @@ -4087,7 +4104,7 @@ dependencies = [ [[package]] name = "wgpu-examples" -version = "0.19.3" +version = "0.20.0" dependencies = [ "bytemuck", "cfg-if", @@ -4120,7 +4137,7 @@ dependencies = [ [[package]] name = "wgpu-hal" -version = "0.19.3" +version = "0.20.0" dependencies = [ "android_system_properties", "arrayvec 0.7.4", @@ -4144,7 +4161,7 @@ dependencies = [ "js-sys", "khronos-egl", "libc", - "libloading 0.7.4", + "libloading 0.8.3", "log", "metal", "naga", @@ -4154,7 +4171,7 @@ dependencies = [ "parking_lot", "profiling", "range-alloc", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "renderdoc-sys", "rustc-hash", "smallvec", @@ -4168,7 +4185,7 @@ dependencies = [ [[package]] name = "wgpu-info" -version = "0.19.3" +version = "0.20.0" dependencies = [ "anyhow", "bitflags 2.5.0", @@ -4182,16 +4199,16 @@ dependencies = [ [[package]] name = "wgpu-macros" -version = "0.19.3" +version = "0.20.0" dependencies = [ - "heck", + "heck 0.5.0", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "wgpu-test" -version = "0.19.3" +version = "0.20.0" dependencies = [ "anyhow", "arrayvec 0.7.4", @@ -4203,7 +4220,7 @@ dependencies = [ "env_logger", "futures-lite", "glam", - "heck", + "heck 0.5.0", "image", "js-sys", "libtest-mimic", @@ -4214,7 +4231,7 @@ dependencies = [ "png", "pollster", "profiling", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "serde", "serde_json", "wasm-bindgen", @@ -4228,7 +4245,7 @@ dependencies = [ [[package]] name = "wgpu-types" -version = "0.19.2" +version = "0.20.0" dependencies = [ "bitflags 2.5.0", "js-sys", @@ -4274,11 +4291,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -4294,7 +4311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -4303,7 +4320,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -4343,7 +4360,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -4378,17 +4395,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -4405,9 +4423,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" @@ -4429,9 +4447,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" @@ -4453,9 +4471,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" @@ -4477,9 +4501,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" @@ -4501,9 +4525,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" @@ -4519,9 +4543,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" @@ -4543,9 +4567,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "winit" @@ -4607,7 +4631,7 @@ dependencies = [ "once_cell", "orbclient", "percent-encoding", - "raw-window-handle 0.6.0", + "raw-window-handle 0.6.1", "redox_syscall 0.3.5", "rustix", "sctk-adwaita 0.8.1", @@ -4726,5 +4750,5 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] diff --git a/Cargo.toml b/Cargo.toml index c992222cf4..fbc0dba87c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,30 +45,30 @@ keywords = ["graphics"] license = "MIT OR Apache-2.0" homepage = "https://wgpu.rs/" repository = "https://github.com/gfx-rs/wgpu" -version = "0.19.3" +version = "0.20.0" authors = ["gfx-rs developers"] [workspace.dependencies.wgc] package = "wgpu-core" path = "./wgpu-core" -version = "0.19.3" +version = "0.20.0" [workspace.dependencies.wgt] package = "wgpu-types" path = "./wgpu-types" -version = "0.19.2" +version = "0.20.0" [workspace.dependencies.hal] package = "wgpu-hal" path = "./wgpu-hal" -version = "0.19.3" +version = "0.20.0" [workspace.dependencies.naga] path = "./naga" -version = "0.19.2" +version = "0.20.0" [workspace.dependencies] -anyhow = "1.0" +anyhow = "1.0.23" arrayvec = "0.7" bit-vec = "0.6" bitflags = "2" @@ -78,14 +78,14 @@ cfg-if = "1" codespan-reporting = "0.11" ctor = "0.2" document-features = "0.2.8" -encase = "0.7" +encase = "0.8" env_logger = "0.11" fern = "0.6" flume = "0.11" futures-lite = "2" getrandom = "0.2" -glam = "0.25" -heck = "0.4.0" +glam = "0.27" +heck = "0.5.0" image = { version = "0.24", default-features = false, features = ["png"] } ktx2 = "0.3" libc = "0.2" @@ -114,29 +114,29 @@ renderdoc-sys = "1.1.0" ron = "0.8" rustc-hash = "1.1.0" serde = "1" -serde_json = "1.0.115" +serde_json = "1.0.116" smallvec = "1" static_assertions = "1.1.0" thiserror = "1" -wgpu = { version = "0.19.3", path = "./wgpu" } -wgpu-core = { version = "0.19.3", path = "./wgpu-core" } -wgpu-example = { version = "0.19.0", path = "./examples/common" } -wgpu-macros = { version = "0.19.0", path = "./wgpu-macros" } -wgpu-test = { version = "0.19.0", path = "./tests" } -wgpu-types = { version = "0.19.2", path = "./wgpu-types" } +wgpu = { version = "0.20.0", path = "./wgpu" } +wgpu-core = { version = "0.20.0", path = "./wgpu-core" } +wgpu-example = { version = "0.20.0", path = "./examples/common" } +wgpu-macros = { version = "0.20.0", path = "./wgpu-macros" } +wgpu-test = { version = "0.20.0", path = "./tests" } +wgpu-types = { version = "0.20.0", path = "./wgpu-types" } winit = { version = "0.29", features = ["android-native-activity"] } # Metal dependencies block = "0.1" core-graphics-types = "0.1" -metal = { version = "0.27.0", git = "https://github.com/gfx-rs/metal-rs", rev = "ff8fd3d6dc7792852f8a015458d7e6d42d7fb352" } +metal = { version = "0.28.0" } objc = "0.2.5" # Vulkan dependencies android_system_properties = "0.1.1" ash = "0.37.3" gpu-alloc = "0.6" -gpu-descriptor = "0.2" +gpu-descriptor = "0.3" # DX dependencies bit-set = "0.5" @@ -144,7 +144,7 @@ gpu-allocator = { version = "0.25", default_features = false, features = [ "d3d12", "public-winapi", ] } -d3d12 = { version = "0.7.0", path = "./d3d12/" } +d3d12 = { version = "0.20.0", path = "./d3d12/" } range-alloc = "0.1" winapi = "0.3" hassle-rs = "0.11.0" diff --git a/README.md b/README.md index bc0f01b302..c1635042f0 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ To run a given set of tests: ``` # Must be inside the `cts` folder we just checked out, else this will fail -cargo run --manifest-path ../Cargo.toml --bin cts_runner -- ./tools/run_deno --verbose "" +cargo run --manifest-path ../Cargo.toml -p cts_runner --bin cts_runner -- ./tools/run_deno --verbose "" ``` To find the full list of tests, go to the [online cts viewer](https://gpuweb.github.io/cts/standalone/?runnow=0&worker=0&debug=0&q=webgpu:*). diff --git a/d3d12/Cargo.toml b/d3d12/Cargo.toml index 44f5dc35e2..2c3f721525 100644 --- a/d3d12/Cargo.toml +++ b/d3d12/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "d3d12" -version = "0.19.0" +version = "0.20.0" authors = ["gfx-rs developers"] description = "Low level D3D12 API wrapper" repository = "https://github.com/gfx-rs/wgpu/tree/trunk/d3d12" diff --git a/deno_webgpu/01_webgpu.js b/deno_webgpu/01_webgpu.js index f1916e81ee..369d1cd9b9 100644 --- a/deno_webgpu/01_webgpu.js +++ b/deno_webgpu/01_webgpu.js @@ -92,7 +92,7 @@ const { ArrayBuffer, ArrayBufferPrototypeGetByteLength, ArrayIsArray, - ArrayPrototypeFilter, + ArrayPrototypeFindLast, ArrayPrototypeMap, ArrayPrototypePop, ArrayPrototypePush, @@ -103,12 +103,9 @@ const { ObjectHasOwn, ObjectPrototypeIsPrototypeOf, Promise, - PromisePrototypeCatch, - PromisePrototypeThen, PromiseReject, PromiseResolve, SafeArrayIterator, - SafePromiseAll, SafeSet, SafeWeakRef, SetPrototypeHas, @@ -908,7 +905,7 @@ function GPUObjectBaseMixin(name, type) { /** * @typedef ErrorScope * @property {string} filter - * @property {Promise[]} operations + * @property {GPUError[]} errors */ /** @@ -964,114 +961,47 @@ class InnerGPUDevice { ArrayPrototypePush(this.resources, new SafeWeakRef(resource)); } - /** @param {{ type: string, value: string | null } | undefined} err */ - pushError(err) { - this.pushErrorPromise(PromiseResolve(err)); - } - - /** @param {Promise<{ type: string, value: string | null } | undefined>} promise */ - pushErrorPromise(promise) { - const operation = PromisePrototypeThen(promise, (err) => { - if (err) { - switch (err.type) { - case "lost": - this.isLost = true; - this.resolveLost( - createGPUDeviceLostInfo(undefined, "device was lost"), - ); - break; - case "validation": - return PromiseReject( - new GPUValidationError(err.value ?? "validation error"), - ); - case "out-of-memory": - return PromiseReject(new GPUOutOfMemoryError()); - case "internal": - return PromiseReject(new GPUInternalError()); - } - } - }); + // Ref: https://gpuweb.github.io/gpuweb/#abstract-opdef-dispatch-error + /** @param {{ type: string, value: string | null } | undefined} error */ + pushError(error) { + if (!error) { + return; + } - const validationStack = ArrayPrototypeFilter( - this.errorScopeStack, - ({ filter }) => filter == "validation", - ); - const validationScope = validationStack[validationStack.length - 1]; - const validationFilteredPromise = PromisePrototypeCatch( - operation, - (err) => { - if (ObjectPrototypeIsPrototypeOf(GPUValidationErrorPrototype, err)) { - return PromiseReject(err); - } - return PromiseResolve(); - }, - ); - if (validationScope) { - ArrayPrototypePush( - validationScope.operations, - validationFilteredPromise, - ); - } else { - PromisePrototypeCatch(validationFilteredPromise, (err) => { - this.device.dispatchEvent( - new GPUUncapturedErrorEvent("uncapturederror", { - error: err, - }), + let constructedError; + switch (error.type) { + case "lost": + this.isLost = true; + this.resolveLost( + createGPUDeviceLostInfo(undefined, "device was lost"), ); - }); + return; + case "validation": + constructedError = new GPUValidationError(error.value ?? "validation error"); + break; + case "out-of-memory": + constructedError = new GPUOutOfMemoryError(); + break; + case "internal": + constructedError = new GPUInternalError(); + break; } - // prevent uncaptured promise rejections - PromisePrototypeCatch(validationFilteredPromise, (_err) => {}); - const oomStack = ArrayPrototypeFilter( - this.errorScopeStack, - ({ filter }) => filter == "out-of-memory", - ); - const oomScope = oomStack[oomStack.length - 1]; - const oomFilteredPromise = PromisePrototypeCatch(operation, (err) => { - if (ObjectPrototypeIsPrototypeOf(GPUOutOfMemoryErrorPrototype, err)) { - return PromiseReject(err); - } - return PromiseResolve(); - }); - if (oomScope) { - ArrayPrototypePush(oomScope.operations, oomFilteredPromise); - } else { - PromisePrototypeCatch(oomFilteredPromise, (err) => { - this.device.dispatchEvent( - new GPUUncapturedErrorEvent("uncapturederror", { - error: err, - }), - ); - }); + if (this.isLost) { + return; } - // prevent uncaptured promise rejections - PromisePrototypeCatch(oomFilteredPromise, (_err) => {}); - const internalStack = ArrayPrototypeFilter( + const scope = ArrayPrototypeFindLast( this.errorScopeStack, - ({ filter }) => filter == "internal", + ({ filter }) => filter === error.type, ); - const internalScope = internalStack[internalStack.length - 1]; - const internalFilteredPromise = PromisePrototypeCatch(operation, (err) => { - if (ObjectPrototypeIsPrototypeOf(GPUInternalErrorPrototype, err)) { - return PromiseReject(err); - } - return PromiseResolve(); - }); - if (internalScope) { - ArrayPrototypePush(internalScope.operations, internalFilteredPromise); + if (scope) { + scope.errors.push(constructedError); } else { - PromisePrototypeCatch(internalFilteredPromise, (err) => { - this.device.dispatchEvent( - new GPUUncapturedErrorEvent("uncapturederror", { - error: err, - }), - ); - }); + this.device.dispatchEvent(new GPUUncapturedErrorEvent("uncapturederror", { + error: constructedError, + })); } - // prevent uncaptured promise rejections - PromisePrototypeCatch(internalFilteredPromise, (_err) => {}); } } @@ -1359,11 +1289,6 @@ class GPUDevice extends EventTarget { const resource = entry.resource; if (ObjectPrototypeIsPrototypeOf(GPUSamplerPrototype, resource)) { const rid = assertResource(resource, prefix, context); - assertDeviceMatch(device, resource, { - prefix, - resourceContext: context, - selfContext: "this", - }); return { binding: entry.binding, kind: "GPUSampler", @@ -1374,11 +1299,6 @@ class GPUDevice extends EventTarget { ) { const rid = assertResource(resource, prefix, context); assertResource(resource[_texture], prefix, context); - assertDeviceMatch(device, resource[_texture], { - prefix, - resourceContext: context, - selfContext: "this", - }); return { binding: entry.binding, kind: "GPUTextureView", @@ -1388,11 +1308,6 @@ class GPUDevice extends EventTarget { // deno-lint-ignore prefer-primordials const rid = assertResource(resource.buffer, prefix, context); // deno-lint-ignore prefer-primordials - assertDeviceMatch(device, resource.buffer, { - prefix, - resourceContext: context, - selfContext: "this", - }); return { binding: entry.binding, kind: "GPUBufferBinding", @@ -1856,7 +1771,7 @@ class GPUDevice extends EventTarget { webidl.requiredArguments(arguments.length, 1, prefix); filter = webidl.converters.GPUErrorFilter(filter, prefix, "Argument 1"); const device = assertDevice(this, prefix, "this"); - ArrayPrototypePush(device.errorScopeStack, { filter, operations: [] }); + ArrayPrototypePush(device.errorScopeStack, { filter, errors: [] }); } /** @@ -1877,12 +1792,7 @@ class GPUDevice extends EventTarget { "OperationError", ); } - const operations = SafePromiseAll(scope.operations); - return PromisePrototypeThen( - operations, - () => PromiseResolve(null), - (err) => PromiseResolve(err), - ); + return PromiseResolve(scope.errors[0] ?? null); } [SymbolFor("Deno.privateCustomInspect")](inspect, inspectOptions) { @@ -2284,19 +2194,15 @@ class GPUBuffer { this[_mapMode] = mode; this[_state] = "pending"; - const promise = PromisePrototypeThen( - op_webgpu_buffer_get_map_async( - bufferRid, - device.rid, - mode, - offset, - rangeSize, - ), - ({ err }) => err, + const { err } = await op_webgpu_buffer_get_map_async( + bufferRid, + device.rid, + mode, + offset, + rangeSize, ); - device.pushErrorPromise(promise); - const err = await promise; if (err) { + device.pushError(err); throw new DOMException("validation error occurred", "OperationError"); } this[_state] = "mapped"; diff --git a/deno_webgpu/Cargo.toml b/deno_webgpu/Cargo.toml index 586eb90c85..cf05e00f96 100644 --- a/deno_webgpu/Cargo.toml +++ b/deno_webgpu/Cargo.toml @@ -24,7 +24,15 @@ raw-window-handle = { workspace = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies.wgpu-core] workspace = true -features = ["raw-window-handle", "trace", "replay", "serde", "strict_asserts", "wgsl", "gles"] +features = [ + "raw-window-handle", + "trace", + "replay", + "serde", + "strict_asserts", + "wgsl", + "gles", +] # We want the wgpu-core Metal backend on macOS and iOS. [target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies.wgpu-core] @@ -37,7 +45,7 @@ workspace = true features = ["dx12"] [target.'cfg(windows)'.dependencies.wgpu-hal] -version = "0.19.0" +version = "0.20.0" path = "../wgpu-hal" features = ["windows_rs"] diff --git a/deno_webgpu/compute_pass.rs b/deno_webgpu/compute_pass.rs index 65ac93d632..2cdea2c8f2 100644 --- a/deno_webgpu/compute_pass.rs +++ b/deno_webgpu/compute_pass.rs @@ -31,7 +31,7 @@ pub fn op_webgpu_compute_pass_set_pipeline( .resource_table .get::(compute_pass_rid)?; - wgpu_core::command::compute_ffi::wgpu_compute_pass_set_pipeline( + wgpu_core::command::compute_commands::wgpu_compute_pass_set_pipeline( &mut compute_pass_resource.0.borrow_mut(), compute_pipeline_resource.1, ); @@ -52,7 +52,7 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups( .resource_table .get::(compute_pass_rid)?; - wgpu_core::command::compute_ffi::wgpu_compute_pass_dispatch_workgroups( + wgpu_core::command::compute_commands::wgpu_compute_pass_dispatch_workgroups( &mut compute_pass_resource.0.borrow_mut(), x, y, @@ -77,7 +77,7 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups_indirect( .resource_table .get::(compute_pass_rid)?; - wgpu_core::command::compute_ffi::wgpu_compute_pass_dispatch_workgroups_indirect( + wgpu_core::command::compute_commands::wgpu_compute_pass_dispatch_workgroups_indirect( &mut compute_pass_resource.0.borrow_mut(), buffer_resource.1, indirect_offset, @@ -137,17 +137,12 @@ pub fn op_webgpu_compute_pass_set_bind_group( let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len]; - // SAFETY: the raw pointer and length are of the same slice, and that slice - // lives longer than the below function invocation. - unsafe { - wgpu_core::command::compute_ffi::wgpu_compute_pass_set_bind_group( - &mut compute_pass_resource.0.borrow_mut(), - index, - bind_group_resource.1, - dynamic_offsets_data.as_ptr(), - dynamic_offsets_data.len(), - ); - } + wgpu_core::command::compute_commands::wgpu_compute_pass_set_bind_group( + &mut compute_pass_resource.0.borrow_mut(), + index, + bind_group_resource.1, + dynamic_offsets_data, + ); Ok(WebGpuResult::empty()) } @@ -163,16 +158,11 @@ pub fn op_webgpu_compute_pass_push_debug_group( .resource_table .get::(compute_pass_rid)?; - let label = std::ffi::CString::new(group_label).unwrap(); - // SAFETY: the string the raw pointer points to lives longer than the below - // function invocation. - unsafe { - wgpu_core::command::compute_ffi::wgpu_compute_pass_push_debug_group( - &mut compute_pass_resource.0.borrow_mut(), - label.as_ptr(), - 0, // wgpu#975 - ); - } + wgpu_core::command::compute_commands::wgpu_compute_pass_push_debug_group( + &mut compute_pass_resource.0.borrow_mut(), + group_label, + 0, // wgpu#975 + ); Ok(WebGpuResult::empty()) } @@ -187,7 +177,7 @@ pub fn op_webgpu_compute_pass_pop_debug_group( .resource_table .get::(compute_pass_rid)?; - wgpu_core::command::compute_ffi::wgpu_compute_pass_pop_debug_group( + wgpu_core::command::compute_commands::wgpu_compute_pass_pop_debug_group( &mut compute_pass_resource.0.borrow_mut(), ); @@ -205,16 +195,11 @@ pub fn op_webgpu_compute_pass_insert_debug_marker( .resource_table .get::(compute_pass_rid)?; - let label = std::ffi::CString::new(marker_label).unwrap(); - // SAFETY: the string the raw pointer points to lives longer than the below - // function invocation. - unsafe { - wgpu_core::command::compute_ffi::wgpu_compute_pass_insert_debug_marker( - &mut compute_pass_resource.0.borrow_mut(), - label.as_ptr(), - 0, // wgpu#975 - ); - } + wgpu_core::command::compute_commands::wgpu_compute_pass_insert_debug_marker( + &mut compute_pass_resource.0.borrow_mut(), + marker_label, + 0, // wgpu#975 + ); Ok(WebGpuResult::empty()) } diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs index 3031287607..e8b5a71cf0 100644 --- a/deno_webgpu/pipeline.rs +++ b/deno_webgpu/pipeline.rs @@ -113,6 +113,7 @@ pub fn op_webgpu_create_compute_pipeline( module: compute_shader_module_resource.1, entry_point: compute.entry_point.map(Cow::from), constants: Cow::Owned(compute.constants), + zero_initialize_workgroup_memory: true, }, }; let implicit_pipelines = match layout { @@ -359,6 +360,8 @@ pub fn op_webgpu_create_render_pipeline( module: fragment_shader_module_resource.1, entry_point: Some(Cow::from(fragment.entry_point)), constants: Cow::Owned(fragment.constants), + // Required to be true for WebGPU + zero_initialize_workgroup_memory: true, }, targets: Cow::Owned(fragment.targets), }) @@ -382,6 +385,8 @@ pub fn op_webgpu_create_render_pipeline( module: vertex_shader_module_resource.1, entry_point: Some(Cow::Owned(args.vertex.entry_point)), constants: Cow::Owned(args.vertex.constants), + // Required to be true for WebGPU + zero_initialize_workgroup_memory: true, }, buffers: Cow::Owned(vertex_buffers), }, diff --git a/deno_webgpu/render_pass.rs b/deno_webgpu/render_pass.rs index 11b2f22865..5a5ecdbadc 100644 --- a/deno_webgpu/render_pass.rs +++ b/deno_webgpu/render_pass.rs @@ -41,7 +41,7 @@ pub fn op_webgpu_render_pass_set_viewport( .resource_table .get::(args.render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_set_viewport( + wgpu_core::command::render_commands::wgpu_render_pass_set_viewport( &mut render_pass_resource.0.borrow_mut(), args.x, args.y, @@ -68,7 +68,7 @@ pub fn op_webgpu_render_pass_set_scissor_rect( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_set_scissor_rect( + wgpu_core::command::render_commands::wgpu_render_pass_set_scissor_rect( &mut render_pass_resource.0.borrow_mut(), x, y, @@ -90,7 +90,7 @@ pub fn op_webgpu_render_pass_set_blend_constant( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_set_blend_constant( + wgpu_core::command::render_commands::wgpu_render_pass_set_blend_constant( &mut render_pass_resource.0.borrow_mut(), &color, ); @@ -109,7 +109,7 @@ pub fn op_webgpu_render_pass_set_stencil_reference( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_set_stencil_reference( + wgpu_core::command::render_commands::wgpu_render_pass_set_stencil_reference( &mut render_pass_resource.0.borrow_mut(), reference, ); @@ -128,7 +128,7 @@ pub fn op_webgpu_render_pass_begin_occlusion_query( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_begin_occlusion_query( + wgpu_core::command::render_commands::wgpu_render_pass_begin_occlusion_query( &mut render_pass_resource.0.borrow_mut(), query_index, ); @@ -146,7 +146,7 @@ pub fn op_webgpu_render_pass_end_occlusion_query( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_end_occlusion_query( + wgpu_core::command::render_commands::wgpu_render_pass_end_occlusion_query( &mut render_pass_resource.0.borrow_mut(), ); @@ -174,15 +174,10 @@ pub fn op_webgpu_render_pass_execute_bundles( .resource_table .get::(render_pass_rid)?; - // SAFETY: the raw pointer and length are of the same slice, and that slice - // lives longer than the below function invocation. - unsafe { - wgpu_core::command::render_ffi::wgpu_render_pass_execute_bundles( - &mut render_pass_resource.0.borrow_mut(), - bundles.as_ptr(), - bundles.len(), - ); - } + wgpu_core::command::render_commands::wgpu_render_pass_execute_bundles( + &mut render_pass_resource.0.borrow_mut(), + &bundles, + ); Ok(WebGpuResult::empty()) } @@ -235,17 +230,12 @@ pub fn op_webgpu_render_pass_set_bind_group( let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len]; - // SAFETY: the raw pointer and length are of the same slice, and that slice - // lives longer than the below function invocation. - unsafe { - wgpu_core::command::render_ffi::wgpu_render_pass_set_bind_group( - &mut render_pass_resource.0.borrow_mut(), - index, - bind_group_resource.1, - dynamic_offsets_data.as_ptr(), - dynamic_offsets_data.len(), - ); - } + wgpu_core::command::render_commands::wgpu_render_pass_set_bind_group( + &mut render_pass_resource.0.borrow_mut(), + index, + bind_group_resource.1, + dynamic_offsets_data, + ); Ok(WebGpuResult::empty()) } @@ -261,16 +251,11 @@ pub fn op_webgpu_render_pass_push_debug_group( .resource_table .get::(render_pass_rid)?; - let label = std::ffi::CString::new(group_label).unwrap(); - // SAFETY: the string the raw pointer points to lives longer than the below - // function invocation. - unsafe { - wgpu_core::command::render_ffi::wgpu_render_pass_push_debug_group( - &mut render_pass_resource.0.borrow_mut(), - label.as_ptr(), - 0, // wgpu#975 - ); - } + wgpu_core::command::render_commands::wgpu_render_pass_push_debug_group( + &mut render_pass_resource.0.borrow_mut(), + group_label, + 0, // wgpu#975 + ); Ok(WebGpuResult::empty()) } @@ -285,7 +270,7 @@ pub fn op_webgpu_render_pass_pop_debug_group( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_pop_debug_group( + wgpu_core::command::render_commands::wgpu_render_pass_pop_debug_group( &mut render_pass_resource.0.borrow_mut(), ); @@ -303,16 +288,11 @@ pub fn op_webgpu_render_pass_insert_debug_marker( .resource_table .get::(render_pass_rid)?; - let label = std::ffi::CString::new(marker_label).unwrap(); - // SAFETY: the string the raw pointer points to lives longer than the below - // function invocation. - unsafe { - wgpu_core::command::render_ffi::wgpu_render_pass_insert_debug_marker( - &mut render_pass_resource.0.borrow_mut(), - label.as_ptr(), - 0, // wgpu#975 - ); - } + wgpu_core::command::render_commands::wgpu_render_pass_insert_debug_marker( + &mut render_pass_resource.0.borrow_mut(), + marker_label, + 0, // wgpu#975 + ); Ok(WebGpuResult::empty()) } @@ -331,7 +311,7 @@ pub fn op_webgpu_render_pass_set_pipeline( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_set_pipeline( + wgpu_core::command::render_commands::wgpu_render_pass_set_pipeline( &mut render_pass_resource.0.borrow_mut(), render_pipeline_resource.1, ); @@ -401,7 +381,7 @@ pub fn op_webgpu_render_pass_set_vertex_buffer( None }; - wgpu_core::command::render_ffi::wgpu_render_pass_set_vertex_buffer( + wgpu_core::command::render_commands::wgpu_render_pass_set_vertex_buffer( &mut render_pass_resource.0.borrow_mut(), slot, buffer_resource.1, @@ -426,7 +406,7 @@ pub fn op_webgpu_render_pass_draw( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_draw( + wgpu_core::command::render_commands::wgpu_render_pass_draw( &mut render_pass_resource.0.borrow_mut(), vertex_count, instance_count, @@ -452,7 +432,7 @@ pub fn op_webgpu_render_pass_draw_indexed( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_draw_indexed( + wgpu_core::command::render_commands::wgpu_render_pass_draw_indexed( &mut render_pass_resource.0.borrow_mut(), index_count, instance_count, @@ -479,7 +459,7 @@ pub fn op_webgpu_render_pass_draw_indirect( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_draw_indirect( + wgpu_core::command::render_commands::wgpu_render_pass_draw_indirect( &mut render_pass_resource.0.borrow_mut(), buffer_resource.1, indirect_offset, @@ -503,7 +483,7 @@ pub fn op_webgpu_render_pass_draw_indexed_indirect( .resource_table .get::(render_pass_rid)?; - wgpu_core::command::render_ffi::wgpu_render_pass_draw_indexed_indirect( + wgpu_core::command::render_commands::wgpu_render_pass_draw_indexed_indirect( &mut render_pass_resource.0.borrow_mut(), buffer_resource.1, indirect_offset, diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs index 02846beeae..6c8bb6e76c 100644 --- a/examples/src/boids/mod.rs +++ b/examples/src/boids/mod.rs @@ -132,7 +132,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &draw_shader, entry_point: "main_vs", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[ wgpu::VertexBufferLayout { array_stride: 4 * 4, @@ -149,7 +149,7 @@ impl crate::framework::Example for Example { fragment: Some(wgpu::FragmentState { module: &draw_shader, entry_point: "main_fs", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState::default(), @@ -165,7 +165,7 @@ impl crate::framework::Example for Example { layout: Some(&compute_pipeline_layout), module: &compute_shader, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); // buffer for the three 2d triangle vertices of each instance diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs index be09478071..679fc5014a 100644 --- a/examples/src/bunnymark/mod.rs +++ b/examples/src/bunnymark/mod.rs @@ -203,13 +203,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: config.view_formats[0], blend: Some(wgpu::BlendState::ALPHA_BLENDING), diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs index 12cdaa399d..89500a798f 100644 --- a/examples/src/conservative_raster/mod.rs +++ b/examples/src/conservative_raster/mod.rs @@ -97,13 +97,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader_triangle_and_lines, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader_triangle_and_lines, entry_point: "fs_main_red", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(RENDER_TARGET_FORMAT.into())], }), primitive: wgpu::PrimitiveState { @@ -122,13 +122,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader_triangle_and_lines, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader_triangle_and_lines, entry_point: "fs_main_blue", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(RENDER_TARGET_FORMAT.into())], }), primitive: wgpu::PrimitiveState::default(), @@ -148,13 +148,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader_triangle_and_lines, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader_triangle_and_lines, entry_point: "fs_main_white", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { @@ -211,13 +211,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState::default(), diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs index d87193fcfe..9347627812 100644 --- a/examples/src/cube/mod.rs +++ b/examples/src/cube/mod.rs @@ -244,13 +244,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &vertex_buffers, }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { @@ -272,13 +272,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &vertex_buffers, }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_wire", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: config.view_formats[0], blend: Some(wgpu::BlendState { diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs index 63169662e0..d04aaa4309 100644 --- a/examples/src/hello_compute/mod.rs +++ b/examples/src/hello_compute/mod.rs @@ -109,7 +109,7 @@ async fn execute_gpu_inner( layout: None, module: &cs_module, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); // Instantiates the bind group, once again specifying the binding of buffers. diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs index 7dc2e6c9c0..0a222fbe54 100644 --- a/examples/src/hello_synchronization/mod.rs +++ b/examples/src/hello_synchronization/mod.rs @@ -103,14 +103,14 @@ async fn execute( layout: Some(&pipeline_layout), module: &shaders_module, entry_point: "patient_main", - constants: &Default::default(), + compilation_options: Default::default(), }); let hasty_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { label: None, layout: Some(&pipeline_layout), module: &shaders_module, entry_point: "hasty_main", - constants: &Default::default(), + compilation_options: Default::default(), }); //---------------------------------------------------------- diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs index 76b7a5a73d..79162a6956 100644 --- a/examples/src/hello_triangle/mod.rs +++ b/examples/src/hello_triangle/mod.rs @@ -60,12 +60,12 @@ async fn run(event_loop: EventLoop<()>, window: Window) { module: &shader, entry_point: "vs_main", buffers: &[], - constants: &Default::default(), + compilation_options: Default::default(), }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(swapchain_format.into())], }), primitive: wgpu::PrimitiveState::default(), diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs index 5fb0eff6b1..572de36d3e 100644 --- a/examples/src/hello_workgroups/mod.rs +++ b/examples/src/hello_workgroups/mod.rs @@ -110,7 +110,7 @@ async fn run() { layout: Some(&pipeline_layout), module: &shader, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); //---------------------------------------------------------- diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs index fc40d5d884..0848e94e10 100644 --- a/examples/src/mipmap/mod.rs +++ b/examples/src/mipmap/mod.rs @@ -93,13 +93,13 @@ impl Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(TEXTURE_FORMAT.into())], }), primitive: wgpu::PrimitiveState { @@ -292,13 +292,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs index 178968f47b..cd22e75bc4 100644 --- a/examples/src/msaa_line/mod.rs +++ b/examples/src/msaa_line/mod.rs @@ -54,7 +54,7 @@ impl Example { vertex: wgpu::VertexState { module: shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[wgpu::VertexBufferLayout { array_stride: std::mem::size_of::() as wgpu::BufferAddress, step_mode: wgpu::VertexStepMode::Vertex, @@ -64,7 +64,7 @@ impl Example { fragment: Some(wgpu::FragmentState { module: shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs index 0cb2cdea74..5e571dc74e 100644 --- a/examples/src/render_to_texture/mod.rs +++ b/examples/src/render_to_texture/mod.rs @@ -59,13 +59,13 @@ async fn run(_path: Option) { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::TextureFormat::Rgba8UnormSrgb.into())], }), primitive: wgpu::PrimitiveState::default(), diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs index 0c47055191..55e87eed9a 100644 --- a/examples/src/repeated_compute/mod.rs +++ b/examples/src/repeated_compute/mod.rs @@ -245,7 +245,7 @@ impl WgpuContext { layout: Some(&pipeline_layout), module: &shader, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); WgpuContext { diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs index d0a29cc8b0..2cb6d6f3e2 100644 --- a/examples/src/shadow/mod.rs +++ b/examples/src/shadow/mod.rs @@ -500,7 +500,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_bake", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[vb_desc.clone()], }, fragment: None, @@ -633,7 +633,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[vb_desc], }, fragment: Some(wgpu::FragmentState { @@ -643,7 +643,7 @@ impl crate::framework::Example for Example { } else { "fs_main_without_storage" }, - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs index 443c9d41e0..35a4266d20 100644 --- a/examples/src/skybox/mod.rs +++ b/examples/src/skybox/mod.rs @@ -199,13 +199,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_sky", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_sky", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { @@ -228,7 +228,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_entity", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[wgpu::VertexBufferLayout { array_stride: std::mem::size_of::() as wgpu::BufferAddress, step_mode: wgpu::VertexStepMode::Vertex, @@ -238,7 +238,7 @@ impl crate::framework::Example for Example { fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_entity", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs index fdff310c31..f701aff989 100644 --- a/examples/src/srgb_blend/mod.rs +++ b/examples/src/srgb_blend/mod.rs @@ -131,13 +131,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &vertex_buffers, }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: config.view_formats[0], blend: Some(wgpu::BlendState::ALPHA_BLENDING), diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs index 07b8e3ec51..e0f495177f 100644 --- a/examples/src/stencil_triangles/mod.rs +++ b/examples/src/stencil_triangles/mod.rs @@ -74,13 +74,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &vertex_buffers, }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: config.view_formats[0], blend: None, @@ -114,13 +114,13 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &vertex_buffers, }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: Default::default(), diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs index f83f61967d..02900c8918 100644 --- a/examples/src/storage_texture/mod.rs +++ b/examples/src/storage_texture/mod.rs @@ -100,7 +100,7 @@ async fn run(_path: Option) { layout: Some(&pipeline_layout), module: &shader, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); log::info!("Wgpu context set up."); diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs index c786b0efee..dd7b4ec89a 100644 --- a/examples/src/texture_arrays/mod.rs +++ b/examples/src/texture_arrays/mod.rs @@ -321,7 +321,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &base_shader_module, entry_point: "vert_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[wgpu::VertexBufferLayout { array_stride: vertex_size as wgpu::BufferAddress, step_mode: wgpu::VertexStepMode::Vertex, @@ -331,7 +331,7 @@ impl crate::framework::Example for Example { fragment: Some(wgpu::FragmentState { module: fragment_shader_module, entry_point: fragment_entry_point, - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs index 58952c76c0..7042d60fe9 100644 --- a/examples/src/timestamp_queries/mod.rs +++ b/examples/src/timestamp_queries/mod.rs @@ -298,7 +298,7 @@ fn compute_pass( layout: None, module, entry_point: "main_cs", - constants: &Default::default(), + compilation_options: Default::default(), }); let bind_group_layout = compute_pipeline.get_bind_group_layout(0); let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { @@ -353,13 +353,13 @@ fn render_pass( vertex: wgpu::VertexState { module, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(format.into())], }), primitive: wgpu::PrimitiveState::default(), diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs index 1ddee03e9f..932c7aaeec 100644 --- a/examples/src/uniform_values/mod.rs +++ b/examples/src/uniform_values/mod.rs @@ -179,13 +179,13 @@ impl WgpuContext { vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(swapchain_format.into())], }), primitive: wgpu::PrimitiveState::default(), diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs index 6bc3824e73..94f12895a8 100644 --- a/examples/src/water/mod.rs +++ b/examples/src/water/mod.rs @@ -512,7 +512,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &water_module, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), // Layout of our vertices. This should match the structs // which are uploaded to the GPU. This should also be // ensured by tagging on either a `#[repr(C)]` onto a @@ -528,7 +528,7 @@ impl crate::framework::Example for Example { fragment: Some(wgpu::FragmentState { module: &water_module, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), // Describes how the colour will be interpolated // and assigned to the output attachment. targets: &[Some(wgpu::ColorTargetState { @@ -583,7 +583,7 @@ impl crate::framework::Example for Example { vertex: wgpu::VertexState { module: &terrain_module, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[wgpu::VertexBufferLayout { array_stride: terrain_vertex_size as wgpu::BufferAddress, step_mode: wgpu::VertexStepMode::Vertex, @@ -593,7 +593,7 @@ impl crate::framework::Example for Example { fragment: Some(wgpu::FragmentState { module: &terrain_module, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(config.view_formats[0].into())], }), primitive: wgpu::PrimitiveState { diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml index 1f35499589..dc03fc96c4 100644 --- a/naga-cli/Cargo.toml +++ b/naga-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "naga-cli" -version = "0.19.0" +version = "0.20.0" authors = ["gfx-rs developers"] edition = "2021" description = "Shader translation command line tool" @@ -23,9 +23,10 @@ log = "0.4" codespan-reporting = "0.11" env_logger = "0.11" argh = "0.1.5" +anyhow.workspace = true [dependencies.naga] -version = "0.19" +version = "0.20.0" path = "../naga" features = [ "compact", diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs index eaa37b8fc3..7ff086d3f7 100644 --- a/naga-cli/src/bin/naga.rs +++ b/naga-cli/src/bin/naga.rs @@ -1,4 +1,5 @@ #![allow(clippy::manual_strip)] +use anyhow::{anyhow, Context as _}; #[allow(unused_imports)] use std::fs; use std::{error::Error, fmt, io::Read, path::Path, str::FromStr}; @@ -62,6 +63,16 @@ struct Args { #[argh(option)] shader_model: Option, + /// the shader stage, for example 'frag', 'vert', or 'compute'. + /// if the shader stage is unspecified it will be derived from + /// the file extension. + #[argh(option)] + shader_stage: Option, + + /// the kind of input, e.g. 'glsl', 'wgsl', 'spv', or 'bin'. + #[argh(option)] + input_kind: Option, + /// the metal version to use, for example, 1.0, 1.1, 1.2, etc. #[argh(option)] metal_version: Option, @@ -170,6 +181,46 @@ impl FromStr for ShaderModelArg { } } +/// Newtype so we can implement [`FromStr`] for `ShaderSource`. +#[derive(Debug, Clone, Copy)] +struct ShaderStage(naga::ShaderStage); + +impl FromStr for ShaderStage { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + use naga::ShaderStage; + Ok(Self(match s.to_lowercase().as_str() { + "frag" | "fragment" => ShaderStage::Fragment, + "comp" | "compute" => ShaderStage::Compute, + "vert" | "vertex" => ShaderStage::Vertex, + _ => return Err(anyhow!("Invalid shader stage: {s}")), + })) + } +} + +/// Input kind/file extension mapping +#[derive(Debug, Clone, Copy)] +enum InputKind { + Bincode, + Glsl, + SpirV, + Wgsl, +} +impl FromStr for InputKind { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + Ok(match s.to_lowercase().as_str() { + "bin" => InputKind::Bincode, + "glsl" => InputKind::Glsl, + "spv" => InputKind::SpirV, + "wgsl" => InputKind::Wgsl, + _ => return Err(anyhow!("Invalid value for --input-kind: {s}")), + }) + } +} + /// Newtype so we can implement [`FromStr`] for [`naga::back::glsl::Version`]. #[derive(Clone, Debug)] struct GlslProfileArg(naga::back::glsl::Version); @@ -247,6 +298,8 @@ struct Parameters<'a> { msl: naga::back::msl::Options, glsl: naga::back::glsl::Options, hlsl: naga::back::hlsl::Options, + input_kind: Option, + shader_stage: Option, } trait PrettyResult { @@ -300,7 +353,7 @@ impl fmt::Display for CliError { } impl std::error::Error for CliError {} -fn run() -> Result<(), Box> { +fn run() -> anyhow::Result<()> { env_logger::init(); // Parse commandline arguments @@ -381,6 +434,9 @@ fn run() -> Result<(), Box> { return Err(CliError("Input file path is not specified").into()); }; + params.input_kind = args.input_kind; + params.shader_stage = args.shader_stage; + let Parsed { mut module, input_text, @@ -424,6 +480,8 @@ fn run() -> Result<(), Box> { // Validate the IR before compaction. let info = match naga::valid::Validator::new(params.validation_flags, validation_caps) + .subgroup_stages(naga::valid::ShaderStages::all()) + .subgroup_operations(naga::valid::SubgroupOperationSet::all()) .validate(&module) { Ok(info) => Some(info), @@ -498,67 +556,70 @@ struct Parsed { input_text: Option, } -fn parse_input( - input_path: &Path, - input: Vec, - params: &Parameters, -) -> Result> { - let (module, input_text) = match Path::new(&input_path) - .extension() - .ok_or(CliError("Input filename has no extension"))? - .to_str() - .ok_or(CliError("Input filename not valid unicode"))? - { - "bin" => (bincode::deserialize(&input)?, None), - "spv" => naga::front::spv::parse_u8_slice(&input, ¶ms.spv_in).map(|m| (m, None))?, - "wgsl" => { +fn parse_input(input_path: &Path, input: Vec, params: &Parameters) -> anyhow::Result { + let input_kind = match params.input_kind { + Some(kind) => kind, + None => input_path + .extension() + .context("Input filename has no extension")? + .to_str() + .context("Input filename not valid unicode")? + .parse() + .context("Unable to determine --input-kind from filename")?, + }; + + let (module, input_text) = match input_kind { + InputKind::Bincode => (bincode::deserialize(&input)?, None), + InputKind::SpirV => { + naga::front::spv::parse_u8_slice(&input, ¶ms.spv_in).map(|m| (m, None))? + } + InputKind::Wgsl => { let input = String::from_utf8(input)?; let result = naga::front::wgsl::parse_str(&input); match result { Ok(v) => (v, Some(input)), Err(ref e) => { - let message = format!( + let message = anyhow!( "Could not parse WGSL:\n{}", e.emit_to_string_with_path(&input, input_path) ); - return Err(message.into()); + return Err(message); } } } - ext @ ("vert" | "frag" | "comp" | "glsl") => { + InputKind::Glsl => { + let shader_stage = match params.shader_stage { + Some(shader_stage) => shader_stage, + None => { + // filename.shader_stage.glsl -> filename.shader_stage + let file_stem = input_path + .file_stem() + .context("Unable to determine file stem from input filename.")?; + // filename.shader_stage -> shader_stage + let inner_ext = Path::new(file_stem) + .extension() + .context("Unable to determine inner extension from input filename.")? + .to_str() + .context("Input filename not valid unicode")?; + inner_ext.parse().context("from input filename")? + } + }; let input = String::from_utf8(input)?; let mut parser = naga::front::glsl::Frontend::default(); - ( parser .parse( &naga::front::glsl::Options { - stage: match ext { - "vert" => naga::ShaderStage::Vertex, - "frag" => naga::ShaderStage::Fragment, - "comp" => naga::ShaderStage::Compute, - "glsl" => { - let internal_name = input_path.to_string_lossy(); - match Path::new(&internal_name[..internal_name.len()-5]) - .extension() - .ok_or(CliError("Input filename ending with .glsl has no internal extension"))? - .to_str() - .ok_or(CliError("Input filename not valid unicode"))? - { - "vert" => naga::ShaderStage::Vertex, - "frag" => naga::ShaderStage::Fragment, - "comp" => naga::ShaderStage::Compute, - _ => unreachable!(), - } - }, - _ => unreachable!(), - }, + stage: shader_stage.0, defines: Default::default(), }, &input, ) .unwrap_or_else(|error| { - let filename = input_path.file_name().and_then(std::ffi::OsStr::to_str).unwrap_or("glsl"); + let filename = input_path + .file_name() + .and_then(std::ffi::OsStr::to_str) + .unwrap_or("glsl"); let mut writer = StandardStream::stderr(ColorChoice::Auto); error.emit_to_writer_with_path(&mut writer, &input, filename); std::process::exit(1); @@ -566,7 +627,6 @@ fn parse_input( Some(input), ) } - _ => return Err(CliError("Unknown input file extension").into()), }; Ok(Parsed { module, input_text }) @@ -577,7 +637,7 @@ fn write_output( info: &Option, params: &Parameters, output_path: &str, -) -> Result<(), Box> { +) -> anyhow::Result<()> { match Path::new(&output_path) .extension() .ok_or(CliError("Output filename has no extension"))? @@ -742,7 +802,7 @@ fn write_output( Ok(()) } -fn bulk_validate(args: Args, params: &Parameters) -> Result<(), Box> { +fn bulk_validate(args: Args, params: &Parameters) -> anyhow::Result<()> { let mut invalid = vec![]; for input_path in args.files { let path = Path::new(&input_path); @@ -760,6 +820,8 @@ fn bulk_validate(args: Args, params: &Parameters) -> Result<(), Box Result<(), Box "RayQueryTerminate", } } + S::SubgroupBallot { result, predicate } => { + if let Some(predicate) = predicate { + self.dependencies.push((id, predicate, "predicate")); + } + self.emits.push((id, result)); + "SubgroupBallot" + } + S::SubgroupCollectiveOperation { + op, + collective_op, + argument, + result, + } => { + self.dependencies.push((id, argument, "arg")); + self.emits.push((id, result)); + match (collective_op, op) { + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::All) => { + "SubgroupAll" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Any) => { + "SubgroupAny" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Add) => { + "SubgroupAdd" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Mul) => { + "SubgroupMul" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Max) => { + "SubgroupMax" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Min) => { + "SubgroupMin" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::And) => { + "SubgroupAnd" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Or) => { + "SubgroupOr" + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Xor) => { + "SubgroupXor" + } + ( + crate::CollectiveOperation::ExclusiveScan, + crate::SubgroupOperation::Add, + ) => "SubgroupExclusiveAdd", + ( + crate::CollectiveOperation::ExclusiveScan, + crate::SubgroupOperation::Mul, + ) => "SubgroupExclusiveMul", + ( + crate::CollectiveOperation::InclusiveScan, + crate::SubgroupOperation::Add, + ) => "SubgroupInclusiveAdd", + ( + crate::CollectiveOperation::InclusiveScan, + crate::SubgroupOperation::Mul, + ) => "SubgroupInclusiveMul", + _ => unimplemented!(), + } + } + S::SubgroupGather { + mode, + argument, + result, + } => { + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + self.dependencies.push((id, index, "index")) + } + } + self.dependencies.push((id, argument, "arg")); + self.emits.push((id, result)); + match mode { + crate::GatherMode::BroadcastFirst => "SubgroupBroadcastFirst", + crate::GatherMode::Broadcast(_) => "SubgroupBroadcast", + crate::GatherMode::Shuffle(_) => "SubgroupShuffle", + crate::GatherMode::ShuffleDown(_) => "SubgroupShuffleDown", + crate::GatherMode::ShuffleUp(_) => "SubgroupShuffleUp", + crate::GatherMode::ShuffleXor(_) => "SubgroupShuffleXor", + } + } }; // Set the last node to the merge node last_node = merge_id; @@ -587,6 +675,8 @@ fn write_function_expressions( let ty = if committed { "Committed" } else { "Candidate" }; (format!("rayQueryGet{}Intersection", ty).into(), 4) } + E::SubgroupBallotResult => ("SubgroupBallotResult".into(), 4), + E::SubgroupOperationResult { .. } => ("SubgroupOperationResult".into(), 4), }; // give uniform expressions an outline diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs index 99c128c6d9..e5a43f3e02 100644 --- a/naga/src/back/glsl/features.rs +++ b/naga/src/back/glsl/features.rs @@ -50,6 +50,8 @@ bitflags::bitflags! { const INSTANCE_INDEX = 1 << 22; /// Sample specific LODs of cube / array shadow textures const TEXTURE_SHADOW_LOD = 1 << 23; + /// Subgroup operations + const SUBGROUP_OPERATIONS = 1 << 24; } } @@ -117,6 +119,7 @@ impl FeaturesManager { check_feature!(SAMPLE_VARIABLES, 400, 300); check_feature!(DYNAMIC_ARRAY_SIZE, 430, 310); check_feature!(DUAL_SOURCE_BLENDING, 330, 300 /* with extension */); + check_feature!(SUBGROUP_OPERATIONS, 430, 310); match version { Version::Embedded { is_webgl: true, .. } => check_feature!(MULTI_VIEW, 140, 300), _ => check_feature!(MULTI_VIEW, 140, 310), @@ -259,6 +262,22 @@ impl FeaturesManager { writeln!(out, "#extension GL_EXT_texture_shadow_lod : require")?; } + if self.0.contains(Features::SUBGROUP_OPERATIONS) { + // https://registry.khronos.org/OpenGL/extensions/KHR/KHR_shader_subgroup.txt + writeln!(out, "#extension GL_KHR_shader_subgroup_basic : require")?; + writeln!(out, "#extension GL_KHR_shader_subgroup_vote : require")?; + writeln!( + out, + "#extension GL_KHR_shader_subgroup_arithmetic : require" + )?; + writeln!(out, "#extension GL_KHR_shader_subgroup_ballot : require")?; + writeln!(out, "#extension GL_KHR_shader_subgroup_shuffle : require")?; + writeln!( + out, + "#extension GL_KHR_shader_subgroup_shuffle_relative : require" + )?; + } + Ok(()) } } @@ -518,6 +537,10 @@ impl<'a, W> Writer<'a, W> { } } } + Expression::SubgroupBallotResult | + Expression::SubgroupOperationResult { .. } => { + features.request(Features::SUBGROUP_OPERATIONS) + } _ => {} } } diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index bede79610a..c8c7ea557d 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2390,6 +2390,125 @@ impl<'a, W: Write> Writer<'a, W> { writeln!(self.out, ");")?; } Statement::RayQuery { .. } => unreachable!(), + Statement::SubgroupBallot { result, predicate } => { + write!(self.out, "{level}")?; + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + let res_ty = ctx.info[result].ty.inner_with(&self.module.types); + self.write_value_type(res_ty)?; + write!(self.out, " {res_name} = ")?; + self.named_expressions.insert(result, res_name); + + write!(self.out, "subgroupBallot(")?; + match predicate { + Some(predicate) => self.write_expr(predicate, ctx)?, + None => write!(self.out, "true")?, + } + writeln!(self.out, ");")?; + } + Statement::SubgroupCollectiveOperation { + op, + collective_op, + argument, + result, + } => { + write!(self.out, "{level}")?; + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + let res_ty = ctx.info[result].ty.inner_with(&self.module.types); + self.write_value_type(res_ty)?; + write!(self.out, " {res_name} = ")?; + self.named_expressions.insert(result, res_name); + + match (collective_op, op) { + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::All) => { + write!(self.out, "subgroupAll(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Any) => { + write!(self.out, "subgroupAny(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Add) => { + write!(self.out, "subgroupAdd(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Mul) => { + write!(self.out, "subgroupMul(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Max) => { + write!(self.out, "subgroupMax(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Min) => { + write!(self.out, "subgroupMin(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::And) => { + write!(self.out, "subgroupAnd(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Or) => { + write!(self.out, "subgroupOr(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Xor) => { + write!(self.out, "subgroupXor(")? + } + (crate::CollectiveOperation::ExclusiveScan, crate::SubgroupOperation::Add) => { + write!(self.out, "subgroupExclusiveAdd(")? + } + (crate::CollectiveOperation::ExclusiveScan, crate::SubgroupOperation::Mul) => { + write!(self.out, "subgroupExclusiveMul(")? + } + (crate::CollectiveOperation::InclusiveScan, crate::SubgroupOperation::Add) => { + write!(self.out, "subgroupInclusiveAdd(")? + } + (crate::CollectiveOperation::InclusiveScan, crate::SubgroupOperation::Mul) => { + write!(self.out, "subgroupInclusiveMul(")? + } + _ => unimplemented!(), + } + self.write_expr(argument, ctx)?; + writeln!(self.out, ");")?; + } + Statement::SubgroupGather { + mode, + argument, + result, + } => { + write!(self.out, "{level}")?; + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + let res_ty = ctx.info[result].ty.inner_with(&self.module.types); + self.write_value_type(res_ty)?; + write!(self.out, " {res_name} = ")?; + self.named_expressions.insert(result, res_name); + + match mode { + crate::GatherMode::BroadcastFirst => { + write!(self.out, "subgroupBroadcastFirst(")?; + } + crate::GatherMode::Broadcast(_) => { + write!(self.out, "subgroupBroadcast(")?; + } + crate::GatherMode::Shuffle(_) => { + write!(self.out, "subgroupShuffle(")?; + } + crate::GatherMode::ShuffleDown(_) => { + write!(self.out, "subgroupShuffleDown(")?; + } + crate::GatherMode::ShuffleUp(_) => { + write!(self.out, "subgroupShuffleUp(")?; + } + crate::GatherMode::ShuffleXor(_) => { + write!(self.out, "subgroupShuffleXor(")?; + } + } + self.write_expr(argument, ctx)?; + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + write!(self.out, ", ")?; + self.write_expr(index, ctx)?; + } + } + writeln!(self.out, ");")?; + } } Ok(()) @@ -3418,7 +3537,8 @@ impl<'a, W: Write> Writer<'a, W> { let scalar_bits = ctx .resolve_type(arg, &self.module.types) .scalar_width() - .unwrap(); + .unwrap() + * 8; write!(self.out, "bitfieldExtract(")?; self.write_expr(arg, ctx)?; @@ -3437,7 +3557,8 @@ impl<'a, W: Write> Writer<'a, W> { let scalar_bits = ctx .resolve_type(arg, &self.module.types) .scalar_width() - .unwrap(); + .unwrap() + * 8; write!(self.out, "bitfieldInsert(")?; self.write_expr(arg, ctx)?; @@ -3656,7 +3777,9 @@ impl<'a, W: Write> Writer<'a, W> { Expression::CallResult(_) | Expression::AtomicResult { .. } | Expression::RayQueryProceedResult - | Expression::WorkGroupUniformLoadResult { .. } => unreachable!(), + | Expression::WorkGroupUniformLoadResult { .. } + | Expression::SubgroupOperationResult { .. } + | Expression::SubgroupBallotResult => unreachable!(), // `ArrayLength` is written as `expr.length()` and we convert it to a uint Expression::ArrayLength(expr) => { write!(self.out, "uint(")?; @@ -4225,6 +4348,9 @@ impl<'a, W: Write> Writer<'a, W> { if flags.contains(crate::Barrier::WORK_GROUP) { writeln!(self.out, "{level}memoryBarrierShared();")?; } + if flags.contains(crate::Barrier::SUB_GROUP) { + writeln!(self.out, "{level}subgroupMemoryBarrier();")?; + } writeln!(self.out, "{level}barrier();")?; Ok(()) } @@ -4494,6 +4620,11 @@ const fn glsl_built_in(built_in: crate::BuiltIn, options: VaryingOptions) -> &'s Bi::WorkGroupId => "gl_WorkGroupID", Bi::WorkGroupSize => "gl_WorkGroupSize", Bi::NumWorkGroups => "gl_NumWorkGroups", + // subgroup + Bi::NumSubgroups => "gl_NumSubgroups", + Bi::SubgroupId => "gl_SubgroupID", + Bi::SubgroupSize => "gl_SubgroupSize", + Bi::SubgroupInvocationId => "gl_SubgroupInvocationID", } } diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs index 2a6db35db8..7d15f43f6c 100644 --- a/naga/src/back/hlsl/conv.rs +++ b/naga/src/back/hlsl/conv.rs @@ -179,6 +179,11 @@ impl crate::BuiltIn { // to this field will get replaced with references to `SPECIAL_CBUF_VAR` // in `Writer::write_expr`. Self::NumWorkGroups => "SV_GroupID", + // These builtins map to functions + Self::SubgroupSize + | Self::SubgroupInvocationId + | Self::NumSubgroups + | Self::SubgroupId => unreachable!(), Self::BaseInstance | Self::BaseVertex | Self::WorkGroupSize => { return Err(Error::Unimplemented(format!("builtin {self:?}"))) } diff --git a/naga/src/back/hlsl/help.rs b/naga/src/back/hlsl/help.rs index 4dd9ea5987..d3bb1ce7f5 100644 --- a/naga/src/back/hlsl/help.rs +++ b/naga/src/back/hlsl/help.rs @@ -70,6 +70,11 @@ pub(super) struct WrappedMath { pub(super) components: Option, } +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +pub(super) struct WrappedZeroValue { + pub(super) ty: Handle, +} + /// HLSL backend requires its own `ImageQuery` enum. /// /// It is used inside `WrappedImageQuery` and should be unique per ImageQuery function. @@ -359,7 +364,7 @@ impl<'a, W: Write> super::Writer<'a, W> { } /// Helper function that write wrapped function for `Expression::Compose` for structures. - pub(super) fn write_wrapped_constructor_function( + fn write_wrapped_constructor_function( &mut self, module: &crate::Module, constructor: WrappedConstructor, @@ -862,6 +867,25 @@ impl<'a, W: Write> super::Writer<'a, W> { Ok(()) } + // TODO: we could merge this with iteration in write_wrapped_compose_functions... + // + /// Helper function that writes zero value wrapped functions + pub(super) fn write_wrapped_zero_value_functions( + &mut self, + module: &crate::Module, + expressions: &crate::Arena, + ) -> BackendResult { + for (handle, _) in expressions.iter() { + if let crate::Expression::ZeroValue(ty) = expressions[handle] { + let zero_value = WrappedZeroValue { ty }; + if self.wrapped.zero_values.insert(zero_value) { + self.write_wrapped_zero_value_function(module, zero_value)?; + } + } + } + Ok(()) + } + pub(super) fn write_wrapped_math_functions( &mut self, module: &crate::Module, @@ -1006,6 +1030,7 @@ impl<'a, W: Write> super::Writer<'a, W> { ) -> BackendResult { self.write_wrapped_math_functions(module, func_ctx)?; self.write_wrapped_compose_functions(module, func_ctx.expressions)?; + self.write_wrapped_zero_value_functions(module, func_ctx.expressions)?; for (handle, _) in func_ctx.expressions.iter() { match func_ctx.expressions[handle] { @@ -1283,4 +1308,71 @@ impl<'a, W: Write> super::Writer<'a, W> { Ok(()) } + + pub(super) fn write_wrapped_zero_value_function_name( + &mut self, + module: &crate::Module, + zero_value: WrappedZeroValue, + ) -> BackendResult { + let name = crate::TypeInner::hlsl_type_id(zero_value.ty, module.to_ctx(), &self.names)?; + write!(self.out, "ZeroValue{name}")?; + Ok(()) + } + + /// Helper function that write wrapped function for `Expression::ZeroValue` + /// + /// This is necessary since we might have a member access after the zero value expression, e.g. + /// `.y` (in practice this can come up when consuming SPIRV that's been produced by glslc). + /// + /// So we can't just write `(float4)0` since `(float4)0.y` won't parse correctly. + /// + /// Parenthesizing the expression like `((float4)0).y` would work... except DXC can't handle + /// cases like: + /// + /// ```ignore + /// tests\out\hlsl\access.hlsl:183:41: error: cannot compile this l-value expression yet + /// t_1.am = (__mat4x2[2])((float4x2[2])0); + /// ^ + /// ``` + fn write_wrapped_zero_value_function( + &mut self, + module: &crate::Module, + zero_value: WrappedZeroValue, + ) -> BackendResult { + use crate::back::INDENT; + + const RETURN_VARIABLE_NAME: &str = "ret"; + + // Write function return type and name + if let crate::TypeInner::Array { base, size, .. } = module.types[zero_value.ty].inner { + write!(self.out, "typedef ")?; + self.write_type(module, zero_value.ty)?; + write!(self.out, " ret_")?; + self.write_wrapped_zero_value_function_name(module, zero_value)?; + self.write_array_size(module, base, size)?; + writeln!(self.out, ";")?; + + write!(self.out, "ret_")?; + self.write_wrapped_zero_value_function_name(module, zero_value)?; + } else { + self.write_type(module, zero_value.ty)?; + } + write!(self.out, " ")?; + self.write_wrapped_zero_value_function_name(module, zero_value)?; + + // Write function parameters (none) and start function body + writeln!(self.out, "() {{")?; + + // Write `ZeroValue` function. + write!(self.out, "{INDENT}return ")?; + self.write_default_init(module, zero_value.ty)?; + writeln!(self.out, ";")?; + + // End of function body + writeln!(self.out, "}}")?; + // Write extra new line + writeln!(self.out)?; + + Ok(()) + } } diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index fe9740a2f4..28edbf70e1 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -267,6 +267,7 @@ pub enum Error { #[derive(Default)] struct Wrapped { + zero_values: crate::FastHashSet, array_lengths: crate::FastHashSet, image_queries: crate::FastHashSet, constructors: crate::FastHashSet, diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index d4c6097eb3..86d8f89035 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -1,5 +1,8 @@ use super::{ - help::{WrappedArrayLength, WrappedConstructor, WrappedImageQuery, WrappedStructMatrixAccess}, + help::{ + WrappedArrayLength, WrappedConstructor, WrappedImageQuery, WrappedStructMatrixAccess, + WrappedZeroValue, + }, storage::StoreValue, BackendResult, Error, Options, }; @@ -77,6 +80,19 @@ enum Io { Output, } +const fn is_subgroup_builtin_binding(binding: &Option) -> bool { + let &Some(crate::Binding::BuiltIn(builtin)) = binding else { + return false; + }; + matches!( + builtin, + crate::BuiltIn::SubgroupSize + | crate::BuiltIn::SubgroupInvocationId + | crate::BuiltIn::NumSubgroups + | crate::BuiltIn::SubgroupId + ) +} + impl<'a, W: fmt::Write> super::Writer<'a, W> { pub fn new(out: W, options: &'a Options) -> Self { Self { @@ -161,6 +177,19 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } } + for statement in func.body.iter() { + match *statement { + crate::Statement::SubgroupCollectiveOperation { + op: _, + collective_op: crate::CollectiveOperation::InclusiveScan, + argument, + result: _, + } => { + self.need_bake_expressions.insert(argument); + } + _ => {} + } + } } pub fn write( @@ -238,6 +267,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_special_functions(module)?; self.write_wrapped_compose_functions(module, &module.global_expressions)?; + self.write_wrapped_zero_value_functions(module, &module.global_expressions)?; // Write all named constants let mut constants = module @@ -401,31 +431,32 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // if they are struct, so that the `stage` argument here could be omitted. fn write_semantic( &mut self, - binding: &crate::Binding, + binding: &Option, stage: Option<(ShaderStage, Io)>, ) -> BackendResult { match *binding { - crate::Binding::BuiltIn(builtin) => { + Some(crate::Binding::BuiltIn(builtin)) if !is_subgroup_builtin_binding(binding) => { let builtin_str = builtin.to_hlsl_str()?; write!(self.out, " : {builtin_str}")?; } - crate::Binding::Location { + Some(crate::Binding::Location { second_blend_source: true, .. - } => { + }) => { write!(self.out, " : SV_Target1")?; } - crate::Binding::Location { + Some(crate::Binding::Location { location, second_blend_source: false, .. - } => { + }) => { if stage == Some((crate::ShaderStage::Fragment, Io::Output)) { write!(self.out, " : SV_Target{location}")?; } else { write!(self.out, " : {LOCATION_SEMANTIC}{location}")?; } } + _ => {} } Ok(()) @@ -446,17 +477,30 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { write!(self.out, "struct {struct_name}")?; writeln!(self.out, " {{")?; for m in members.iter() { + if is_subgroup_builtin_binding(&m.binding) { + continue; + } write!(self.out, "{}", back::INDENT)?; if let Some(ref binding) = m.binding { self.write_modifier(binding)?; } self.write_type(module, m.ty)?; write!(self.out, " {}", &m.name)?; - if let Some(ref binding) = m.binding { - self.write_semantic(binding, Some(shader_stage))?; - } + self.write_semantic(&m.binding, Some(shader_stage))?; writeln!(self.out, ";")?; } + if members.iter().any(|arg| { + matches!( + arg.binding, + Some(crate::Binding::BuiltIn(crate::BuiltIn::SubgroupId)) + ) + }) { + writeln!( + self.out, + "{}uint __local_invocation_index : SV_GroupIndex;", + back::INDENT + )?; + } writeln!(self.out, "}};")?; writeln!(self.out)?; @@ -557,8 +601,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } /// Writes special interface structures for an entry point. The special structures have - /// all the fields flattened into them and sorted by binding. They are only needed for - /// VS outputs and FS inputs, so that these interfaces match. + /// all the fields flattened into them and sorted by binding. They are needed to emulate + /// subgroup built-ins and to make the interfaces between VS outputs and FS inputs match. fn write_ep_interface( &mut self, module: &Module, @@ -567,7 +611,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ep_name: &str, ) -> Result { Ok(EntryPointInterface { - input: if !func.arguments.is_empty() && stage == ShaderStage::Fragment { + input: if !func.arguments.is_empty() + && (stage == ShaderStage::Fragment + || func + .arguments + .iter() + .any(|arg| is_subgroup_builtin_binding(&arg.binding))) + { Some(self.write_ep_input_struct(module, func, stage, ep_name)?) } else { None @@ -581,6 +631,38 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { }) } + fn write_ep_argument_initialization( + &mut self, + ep: &crate::EntryPoint, + ep_input: &EntryPointBinding, + fake_member: &EpStructMember, + ) -> BackendResult { + match fake_member.binding { + Some(crate::Binding::BuiltIn(crate::BuiltIn::SubgroupSize)) => { + write!(self.out, "WaveGetLaneCount()")? + } + Some(crate::Binding::BuiltIn(crate::BuiltIn::SubgroupInvocationId)) => { + write!(self.out, "WaveGetLaneIndex()")? + } + Some(crate::Binding::BuiltIn(crate::BuiltIn::NumSubgroups)) => write!( + self.out, + "({}u + WaveGetLaneCount() - 1u) / WaveGetLaneCount()", + ep.workgroup_size[0] * ep.workgroup_size[1] * ep.workgroup_size[2] + )?, + Some(crate::Binding::BuiltIn(crate::BuiltIn::SubgroupId)) => { + write!( + self.out, + "{}.__local_invocation_index / WaveGetLaneCount()", + ep_input.arg_name + )?; + } + _ => { + write!(self.out, "{}.{}", ep_input.arg_name, fake_member.name)?; + } + } + Ok(()) + } + /// Write an entry point preface that initializes the arguments as specified in IR. fn write_ep_arguments_initialization( &mut self, @@ -588,6 +670,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { func: &crate::Function, ep_index: u16, ) -> BackendResult { + let ep = &module.entry_points[ep_index as usize]; let ep_input = match self.entry_point_io[ep_index as usize].input.take() { Some(ep_input) => ep_input, None => return Ok(()), @@ -601,8 +684,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { match module.types[arg.ty].inner { TypeInner::Array { base, size, .. } => { self.write_array_size(module, base, size)?; - let fake_member = fake_iter.next().unwrap(); - writeln!(self.out, " = {}.{};", ep_input.arg_name, fake_member.name)?; + write!(self.out, " = ")?; + self.write_ep_argument_initialization( + ep, + &ep_input, + fake_iter.next().unwrap(), + )?; + writeln!(self.out, ";")?; } TypeInner::Struct { ref members, .. } => { write!(self.out, " = {{ ")?; @@ -610,14 +698,22 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { if index != 0 { write!(self.out, ", ")?; } - let fake_member = fake_iter.next().unwrap(); - write!(self.out, "{}.{}", ep_input.arg_name, fake_member.name)?; + self.write_ep_argument_initialization( + ep, + &ep_input, + fake_iter.next().unwrap(), + )?; } writeln!(self.out, " }};")?; } _ => { - let fake_member = fake_iter.next().unwrap(); - writeln!(self.out, " = {}.{};", ep_input.arg_name, fake_member.name)?; + write!(self.out, " = ")?; + self.write_ep_argument_initialization( + ep, + &ep_input, + fake_iter.next().unwrap(), + )?; + writeln!(self.out, ";")?; } } } @@ -932,9 +1028,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } - if let Some(ref binding) = member.binding { - self.write_semantic(binding, shader_stage)?; - }; + self.write_semantic(&member.binding, shader_stage)?; writeln!(self.out, ";")?; } @@ -1147,7 +1241,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } back::FunctionType::EntryPoint(ep_index) => { if let Some(ref ep_input) = self.entry_point_io[ep_index as usize].input { - write!(self.out, "{} {}", ep_input.ty_name, ep_input.arg_name,)?; + write!(self.out, "{} {}", ep_input.ty_name, ep_input.arg_name)?; } else { let stage = module.entry_points[ep_index as usize].stage; for (index, arg) in func.arguments.iter().enumerate() { @@ -1164,17 +1258,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_array_size(module, base, size)?; } - if let Some(ref binding) = arg.binding { - self.write_semantic(binding, Some((stage, Io::Input)))?; - } + self.write_semantic(&arg.binding, Some((stage, Io::Input)))?; } - - if need_workgroup_variables_initialization { - if !func.arguments.is_empty() { - write!(self.out, ", ")?; - } - write!(self.out, "uint3 __local_invocation_id : SV_GroupThreadID")?; + } + if need_workgroup_variables_initialization { + if self.entry_point_io[ep_index as usize].input.is_some() + || !func.arguments.is_empty() + { + write!(self.out, ", ")?; } + write!(self.out, "uint3 __local_invocation_id : SV_GroupThreadID")?; } } } @@ -1184,11 +1277,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // Write semantic if it present if let back::FunctionType::EntryPoint(index) = func_ctx.ty { let stage = module.entry_points[index as usize].stage; - if let Some(crate::FunctionResult { - binding: Some(ref binding), - .. - }) = func.result - { + if let Some(crate::FunctionResult { ref binding, .. }) = func.result { self.write_semantic(binding, Some((stage, Io::Output)))?; } } @@ -1988,6 +2077,129 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out, "{level}}}")? } Statement::RayQuery { .. } => unreachable!(), + Statement::SubgroupBallot { result, predicate } => { + write!(self.out, "{level}")?; + let name = format!("{}{}", back::BAKE_PREFIX, result.index()); + write!(self.out, "const uint4 {name} = ")?; + self.named_expressions.insert(result, name); + + write!(self.out, "WaveActiveBallot(")?; + match predicate { + Some(predicate) => self.write_expr(module, predicate, func_ctx)?, + None => write!(self.out, "true")?, + } + writeln!(self.out, ");")?; + } + Statement::SubgroupCollectiveOperation { + op, + collective_op, + argument, + result, + } => { + write!(self.out, "{level}")?; + write!(self.out, "const ")?; + let name = format!("{}{}", back::BAKE_PREFIX, result.index()); + match func_ctx.info[result].ty { + proc::TypeResolution::Handle(handle) => self.write_type(module, handle)?, + proc::TypeResolution::Value(ref value) => { + self.write_value_type(module, value)? + } + }; + write!(self.out, " {name} = ")?; + self.named_expressions.insert(result, name); + + match (collective_op, op) { + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::All) => { + write!(self.out, "WaveActiveAllTrue(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Any) => { + write!(self.out, "WaveActiveAnyTrue(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Add) => { + write!(self.out, "WaveActiveSum(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Mul) => { + write!(self.out, "WaveActiveProduct(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Max) => { + write!(self.out, "WaveActiveMax(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Min) => { + write!(self.out, "WaveActiveMin(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::And) => { + write!(self.out, "WaveActiveBitAnd(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Or) => { + write!(self.out, "WaveActiveBitOr(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Xor) => { + write!(self.out, "WaveActiveBitXor(")? + } + (crate::CollectiveOperation::ExclusiveScan, crate::SubgroupOperation::Add) => { + write!(self.out, "WavePrefixSum(")? + } + (crate::CollectiveOperation::ExclusiveScan, crate::SubgroupOperation::Mul) => { + write!(self.out, "WavePrefixProduct(")? + } + (crate::CollectiveOperation::InclusiveScan, crate::SubgroupOperation::Add) => { + self.write_expr(module, argument, func_ctx)?; + write!(self.out, " + WavePrefixSum(")?; + } + (crate::CollectiveOperation::InclusiveScan, crate::SubgroupOperation::Mul) => { + self.write_expr(module, argument, func_ctx)?; + write!(self.out, " * WavePrefixProduct(")?; + } + _ => unimplemented!(), + } + self.write_expr(module, argument, func_ctx)?; + writeln!(self.out, ");")?; + } + Statement::SubgroupGather { + mode, + argument, + result, + } => { + write!(self.out, "{level}")?; + write!(self.out, "const ")?; + let name = format!("{}{}", back::BAKE_PREFIX, result.index()); + match func_ctx.info[result].ty { + proc::TypeResolution::Handle(handle) => self.write_type(module, handle)?, + proc::TypeResolution::Value(ref value) => { + self.write_value_type(module, value)? + } + }; + write!(self.out, " {name} = ")?; + self.named_expressions.insert(result, name); + + if matches!(mode, crate::GatherMode::BroadcastFirst) { + write!(self.out, "WaveReadLaneFirst(")?; + self.write_expr(module, argument, func_ctx)?; + } else { + write!(self.out, "WaveReadLaneAt(")?; + self.write_expr(module, argument, func_ctx)?; + write!(self.out, ", ")?; + match mode { + crate::GatherMode::BroadcastFirst => unreachable!(), + crate::GatherMode::Broadcast(index) | crate::GatherMode::Shuffle(index) => { + self.write_expr(module, index, func_ctx)?; + } + crate::GatherMode::ShuffleDown(index) => { + write!(self.out, "WaveGetLaneIndex() + ")?; + self.write_expr(module, index, func_ctx)?; + } + crate::GatherMode::ShuffleUp(index) => { + write!(self.out, "WaveGetLaneIndex() - ")?; + self.write_expr(module, index, func_ctx)?; + } + crate::GatherMode::ShuffleXor(index) => { + write!(self.out, "WaveGetLaneIndex() ^ ")?; + self.write_expr(module, index, func_ctx)?; + } + } + } + writeln!(self.out, ");")?; + } } Ok(()) @@ -2043,7 +2255,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_const_expression(module, constant.init)?; } } - Expression::ZeroValue(ty) => self.write_default_init(module, ty)?, + Expression::ZeroValue(ty) => { + self.write_wrapped_zero_value_function_name(module, WrappedZeroValue { ty })?; + write!(self.out, "()")?; + } Expression::Compose { ty, ref components } => { match module.types[ty].inner { TypeInner::Struct { .. } | TypeInner::Array { .. } => { @@ -2593,7 +2808,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { true } None => { - if inner.scalar_width() == Some(64) { + if inner.scalar_width() == Some(8) { false } else { write!(self.out, "{}(", kind.to_hlsl_cast(),)?; @@ -3134,7 +3349,9 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { Expression::CallResult(_) | Expression::AtomicResult { .. } | Expression::WorkGroupUniformLoadResult { .. } - | Expression::RayQueryProceedResult => {} + | Expression::RayQueryProceedResult + | Expression::SubgroupBallotResult + | Expression::SubgroupOperationResult { .. } => {} } if !closing_bracket.is_empty() { @@ -3184,7 +3401,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } /// Helper function that write default zero initialization - fn write_default_init(&mut self, module: &Module, ty: Handle) -> BackendResult { + pub(super) fn write_default_init( + &mut self, + module: &Module, + ty: Handle, + ) -> BackendResult { write!(self.out, "(")?; self.write_type(module, ty)?; if let TypeInner::Array { base, size, .. } = module.types[ty].inner { @@ -3201,6 +3422,9 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { if barrier.contains(crate::Barrier::WORK_GROUP) { writeln!(self.out, "{level}GroupMemoryBarrierWithGroupSync();")?; } + if barrier.contains(crate::Barrier::SUB_GROUP) { + // Does not exist in DirectX + } Ok(()) } } diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index 2c7cdea6af..8b03e20376 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -436,6 +436,11 @@ impl ResolvedBinding { Bi::WorkGroupId => "threadgroup_position_in_grid", Bi::WorkGroupSize => "dispatch_threads_per_threadgroup", Bi::NumWorkGroups => "threadgroups_per_grid", + // subgroup + Bi::NumSubgroups => "simdgroups_per_threadgroup", + Bi::SubgroupId => "simdgroup_index_in_threadgroup", + Bi::SubgroupSize => "threads_per_simdgroup", + Bi::SubgroupInvocationId => "thread_index_in_simdgroup", Bi::CullDistance | Bi::ViewIndex => { return Err(Error::UnsupportedBuiltIn(built_in)) } @@ -538,3 +543,21 @@ fn test_error_size() { use std::mem::size_of; assert_eq!(size_of::(), 32); } + +impl crate::AtomicFunction { + fn to_msl(self) -> Result<&'static str, Error> { + Ok(match self { + Self::Add => "fetch_add", + Self::Subtract => "fetch_sub", + Self::And => "fetch_and", + Self::InclusiveOr => "fetch_or", + Self::ExclusiveOr => "fetch_xor", + Self::Min => "fetch_min", + Self::Max => "fetch_max", + Self::Exchange { compare: None } => "exchange", + Self::Exchange { compare: Some(_) } => Err(Error::FeatureNotImplemented( + "atomic CompareExchange".to_string(), + ))?, + }) + } +} diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 0d0f651665..e250d0b72c 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -1131,21 +1131,10 @@ impl Writer { Ok(()) } - fn put_atomic_fetch( - &mut self, - pointer: Handle, - key: &str, - value: Handle, - context: &ExpressionContext, - ) -> BackendResult { - self.put_atomic_operation(pointer, "fetch_", key, value, context) - } - fn put_atomic_operation( &mut self, pointer: Handle, - key1: &str, - key2: &str, + key: &str, value: Handle, context: &ExpressionContext, ) -> BackendResult { @@ -1163,7 +1152,7 @@ impl Writer { write!( self.out, - "{NAMESPACE}::atomic_{key1}{key2}_explicit({ATOMIC_REFERENCE}" + "{NAMESPACE}::atomic_{key}_explicit({ATOMIC_REFERENCE}" )?; self.put_access_chain(pointer, policy, context)?; write!(self.out, ", ")?; @@ -1945,7 +1934,7 @@ impl Writer { // // extract_bits(e, min(offset, w), min(count, w - min(offset, w)))) - let scalar_bits = context.resolve_type(arg).scalar_width().unwrap(); + let scalar_bits = context.resolve_type(arg).scalar_width().unwrap() * 8; write!(self.out, "{NAMESPACE}::extract_bits(")?; self.put_expression(arg, context, true)?; @@ -1961,7 +1950,7 @@ impl Writer { // // insertBits(e, newBits, min(offset, w), min(count, w - min(offset, w)))) - let scalar_bits = context.resolve_type(arg).scalar_width().unwrap(); + let scalar_bits = context.resolve_type(arg).scalar_width().unwrap() * 8; write!(self.out, "{NAMESPACE}::insert_bits(")?; self.put_expression(arg, context, true)?; @@ -2042,6 +2031,8 @@ impl Writer { crate::Expression::CallResult(_) | crate::Expression::AtomicResult { .. } | crate::Expression::WorkGroupUniformLoadResult { .. } + | crate::Expression::SubgroupBallotResult + | crate::Expression::SubgroupOperationResult { .. } | crate::Expression::RayQueryProceedResult => { unreachable!() } @@ -2995,43 +2986,8 @@ impl Writer { let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); self.start_baking_expression(result, &context.expression, &res_name)?; self.named_expressions.insert(result, res_name); - match *fun { - crate::AtomicFunction::Add => { - self.put_atomic_fetch(pointer, "add", value, &context.expression)?; - } - crate::AtomicFunction::Subtract => { - self.put_atomic_fetch(pointer, "sub", value, &context.expression)?; - } - crate::AtomicFunction::And => { - self.put_atomic_fetch(pointer, "and", value, &context.expression)?; - } - crate::AtomicFunction::InclusiveOr => { - self.put_atomic_fetch(pointer, "or", value, &context.expression)?; - } - crate::AtomicFunction::ExclusiveOr => { - self.put_atomic_fetch(pointer, "xor", value, &context.expression)?; - } - crate::AtomicFunction::Min => { - self.put_atomic_fetch(pointer, "min", value, &context.expression)?; - } - crate::AtomicFunction::Max => { - self.put_atomic_fetch(pointer, "max", value, &context.expression)?; - } - crate::AtomicFunction::Exchange { compare: None } => { - self.put_atomic_operation( - pointer, - "exchange", - "", - value, - &context.expression, - )?; - } - crate::AtomicFunction::Exchange { .. } => { - return Err(Error::FeatureNotImplemented( - "atomic CompareExchange".to_string(), - )); - } - } + let fun_str = fun.to_msl()?; + self.put_atomic_operation(pointer, fun_str, value, &context.expression)?; // done writeln!(self.out, ";")?; } @@ -3145,6 +3101,121 @@ impl Writer { } } } + crate::Statement::SubgroupBallot { result, predicate } => { + write!(self.out, "{level}")?; + let name = self.namer.call(""); + self.start_baking_expression(result, &context.expression, &name)?; + self.named_expressions.insert(result, name); + write!(self.out, "uint4((uint64_t){NAMESPACE}::simd_ballot(")?; + if let Some(predicate) = predicate { + self.put_expression(predicate, &context.expression, true)?; + } else { + write!(self.out, "true")?; + } + writeln!(self.out, "), 0, 0, 0);")?; + } + crate::Statement::SubgroupCollectiveOperation { + op, + collective_op, + argument, + result, + } => { + write!(self.out, "{level}")?; + let name = self.namer.call(""); + self.start_baking_expression(result, &context.expression, &name)?; + self.named_expressions.insert(result, name); + match (collective_op, op) { + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::All) => { + write!(self.out, "{NAMESPACE}::simd_all(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Any) => { + write!(self.out, "{NAMESPACE}::simd_any(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Add) => { + write!(self.out, "{NAMESPACE}::simd_sum(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Mul) => { + write!(self.out, "{NAMESPACE}::simd_product(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Max) => { + write!(self.out, "{NAMESPACE}::simd_max(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Min) => { + write!(self.out, "{NAMESPACE}::simd_min(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::And) => { + write!(self.out, "{NAMESPACE}::simd_and(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Or) => { + write!(self.out, "{NAMESPACE}::simd_or(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Xor) => { + write!(self.out, "{NAMESPACE}::simd_xor(")? + } + ( + crate::CollectiveOperation::ExclusiveScan, + crate::SubgroupOperation::Add, + ) => write!(self.out, "{NAMESPACE}::simd_prefix_exclusive_sum(")?, + ( + crate::CollectiveOperation::ExclusiveScan, + crate::SubgroupOperation::Mul, + ) => write!(self.out, "{NAMESPACE}::simd_prefix_exclusive_product(")?, + ( + crate::CollectiveOperation::InclusiveScan, + crate::SubgroupOperation::Add, + ) => write!(self.out, "{NAMESPACE}::simd_prefix_inclusive_sum(")?, + ( + crate::CollectiveOperation::InclusiveScan, + crate::SubgroupOperation::Mul, + ) => write!(self.out, "{NAMESPACE}::simd_prefix_inclusive_product(")?, + _ => unimplemented!(), + } + self.put_expression(argument, &context.expression, true)?; + writeln!(self.out, ");")?; + } + crate::Statement::SubgroupGather { + mode, + argument, + result, + } => { + write!(self.out, "{level}")?; + let name = self.namer.call(""); + self.start_baking_expression(result, &context.expression, &name)?; + self.named_expressions.insert(result, name); + match mode { + crate::GatherMode::BroadcastFirst => { + write!(self.out, "{NAMESPACE}::simd_broadcast_first(")?; + } + crate::GatherMode::Broadcast(_) => { + write!(self.out, "{NAMESPACE}::simd_broadcast(")?; + } + crate::GatherMode::Shuffle(_) => { + write!(self.out, "{NAMESPACE}::simd_shuffle(")?; + } + crate::GatherMode::ShuffleDown(_) => { + write!(self.out, "{NAMESPACE}::simd_shuffle_down(")?; + } + crate::GatherMode::ShuffleUp(_) => { + write!(self.out, "{NAMESPACE}::simd_shuffle_up(")?; + } + crate::GatherMode::ShuffleXor(_) => { + write!(self.out, "{NAMESPACE}::simd_shuffle_xor(")?; + } + } + self.put_expression(argument, &context.expression, true)?; + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + write!(self.out, ", ")?; + self.put_expression(index, &context.expression, true)?; + } + } + writeln!(self.out, ");")?; + } } } @@ -4492,6 +4563,12 @@ impl Writer { "{level}{NAMESPACE}::threadgroup_barrier({NAMESPACE}::mem_flags::mem_threadgroup);", )?; } + if flags.contains(crate::Barrier::SUB_GROUP) { + writeln!( + self.out, + "{level}{NAMESPACE}::simdgroup_barrier({NAMESPACE}::mem_flags::mem_threadgroup);", + )?; + } Ok(()) } } @@ -4762,8 +4839,8 @@ fn test_stack_size() { } let stack_size = addresses_end - addresses_start; // check the size (in debug only) - // last observed macOS value: 19152 (CI) - if !(9000..=20000).contains(&stack_size) { + // last observed macOS value: 22256 (CI) + if !(15000..=25000).contains(&stack_size) { panic!("`put_block` stack size {stack_size} has changed!"); } } diff --git a/naga/src/back/pipeline_constants.rs b/naga/src/back/pipeline_constants.rs index 50a6a3d57a..0dbe9cf4e8 100644 --- a/naga/src/back/pipeline_constants.rs +++ b/naga/src/back/pipeline_constants.rs @@ -129,8 +129,10 @@ pub fn process_overrides<'a>( Expression::Constant(c_h) } Expression::Constant(c_h) => { - adjusted_constant_initializers.insert(c_h); - module.constants[c_h].init = adjusted_global_expressions[c_h.index()]; + if adjusted_constant_initializers.insert(c_h) { + let init = &mut module.constants[c_h].init; + *init = adjusted_global_expressions[init.index()]; + } expr } expr => expr, @@ -522,7 +524,9 @@ fn adjust_expr(new_pos: &[Handle], expr: &mut Expression) { ty: _, comparison: _, } - | Expression::WorkGroupUniformLoadResult { ty: _ } => {} + | Expression::WorkGroupUniformLoadResult { ty: _ } + | Expression::SubgroupBallotResult + | Expression::SubgroupOperationResult { .. } => {} } } @@ -637,6 +641,41 @@ fn adjust_stmt(new_pos: &[Handle], stmt: &mut Statement) { adjust(pointer); adjust(result); } + Statement::SubgroupBallot { + ref mut result, + ref mut predicate, + } => { + if let Some(ref mut predicate) = *predicate { + adjust(predicate); + } + adjust(result); + } + Statement::SubgroupCollectiveOperation { + ref mut argument, + ref mut result, + .. + } => { + adjust(argument); + adjust(result); + } + Statement::SubgroupGather { + ref mut mode, + ref mut argument, + ref mut result, + } => { + match *mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(ref mut index) + | crate::GatherMode::Shuffle(ref mut index) + | crate::GatherMode::ShuffleDown(ref mut index) + | crate::GatherMode::ShuffleUp(ref mut index) + | crate::GatherMode::ShuffleXor(ref mut index) => { + adjust(index); + } + } + adjust(argument); + adjust(result) + } Statement::Call { ref mut arguments, ref mut result, diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 9b8430e861..120d60fc40 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -1073,7 +1073,7 @@ impl<'w> BlockContext<'w> { // // bitfieldExtract(x, o, c) - let bit_width = arg_ty.scalar_width().unwrap(); + let bit_width = arg_ty.scalar_width().unwrap() * 8; let width_constant = self .writer .get_constant_scalar(crate::Literal::U32(bit_width as u32)); @@ -1129,7 +1129,7 @@ impl<'w> BlockContext<'w> { Mf::InsertBits => { // The behavior of InsertBits has the same undefined behavior as ExtractBits. - let bit_width = arg_ty.scalar_width().unwrap(); + let bit_width = arg_ty.scalar_width().unwrap() * 8; let width_constant = self .writer .get_constant_scalar(crate::Literal::U32(bit_width as u32)); @@ -1185,7 +1185,7 @@ impl<'w> BlockContext<'w> { } Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb), Mf::FindMsb => { - if arg_ty.scalar_width() == Some(32) { + if arg_ty.scalar_width() == Some(4) { let thing = match arg_scalar_kind { Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb, Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb, @@ -1279,7 +1279,9 @@ impl<'w> BlockContext<'w> { crate::Expression::CallResult(_) | crate::Expression::AtomicResult { .. } | crate::Expression::WorkGroupUniformLoadResult { .. } - | crate::Expression::RayQueryProceedResult => self.cached[expr_handle], + | crate::Expression::RayQueryProceedResult + | crate::Expression::SubgroupBallotResult + | crate::Expression::SubgroupOperationResult { .. } => self.cached[expr_handle], crate::Expression::As { expr, kind, @@ -2490,6 +2492,27 @@ impl<'w> BlockContext<'w> { crate::Statement::RayQuery { query, ref fun } => { self.write_ray_query_function(query, fun, &mut block); } + crate::Statement::SubgroupBallot { + result, + ref predicate, + } => { + self.write_subgroup_ballot(predicate, result, &mut block)?; + } + crate::Statement::SubgroupCollectiveOperation { + ref op, + ref collective_op, + argument, + result, + } => { + self.write_subgroup_operation(op, collective_op, argument, result, &mut block)?; + } + crate::Statement::SubgroupGather { + ref mode, + argument, + result, + } => { + self.write_subgroup_gather(mode, argument, result, &mut block)?; + } } } diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs index f3acf01d6c..df2774ab9c 100644 --- a/naga/src/back/spv/instructions.rs +++ b/naga/src/back/spv/instructions.rs @@ -1073,6 +1073,73 @@ impl super::Instruction { instruction.add_operand(semantics_id); instruction } + + // Group Instructions + + pub(super) fn group_non_uniform_ballot( + result_type_id: Word, + id: Word, + exec_scope_id: Word, + predicate: Word, + ) -> Self { + let mut instruction = Self::new(Op::GroupNonUniformBallot); + instruction.set_type(result_type_id); + instruction.set_result(id); + instruction.add_operand(exec_scope_id); + instruction.add_operand(predicate); + + instruction + } + pub(super) fn group_non_uniform_broadcast_first( + result_type_id: Word, + id: Word, + exec_scope_id: Word, + value: Word, + ) -> Self { + let mut instruction = Self::new(Op::GroupNonUniformBroadcastFirst); + instruction.set_type(result_type_id); + instruction.set_result(id); + instruction.add_operand(exec_scope_id); + instruction.add_operand(value); + + instruction + } + pub(super) fn group_non_uniform_gather( + op: Op, + result_type_id: Word, + id: Word, + exec_scope_id: Word, + value: Word, + index: Word, + ) -> Self { + let mut instruction = Self::new(op); + instruction.set_type(result_type_id); + instruction.set_result(id); + instruction.add_operand(exec_scope_id); + instruction.add_operand(value); + instruction.add_operand(index); + + instruction + } + pub(super) fn group_non_uniform_arithmetic( + op: Op, + result_type_id: Word, + id: Word, + exec_scope_id: Word, + group_op: Option, + value: Word, + ) -> Self { + let mut instruction = Self::new(op); + instruction.set_type(result_type_id); + instruction.set_result(id); + instruction.add_operand(exec_scope_id); + if let Some(group_op) = group_op { + instruction.add_operand(group_op as u32); + } + instruction.add_operand(value); + + instruction + } } impl From for spirv::ImageFormat { diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 8626bb104d..38a87049e6 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -13,6 +13,7 @@ mod layout; mod ray; mod recyclable; mod selection; +mod subgroup; mod writer; pub use spirv::Capability; @@ -247,7 +248,7 @@ impl LocalImageType { /// this, by converting everything possible to a `LocalType` before inspecting /// it. /// -/// ## `Localtype` equality and SPIR-V `OpType` uniqueness +/// ## `LocalType` equality and SPIR-V `OpType` uniqueness /// /// The definition of `Eq` on `LocalType` is carefully chosen to help us follow /// certain SPIR-V rules. SPIR-V §2.8 requires some classes of `OpType...` @@ -456,7 +457,7 @@ impl recyclable::Recyclable for CachedExpressions { #[derive(Eq, Hash, PartialEq)] enum CachedConstant { - Literal(crate::Literal), + Literal(crate::proc::HashableLiteral), Composite { ty: LookupType, constituent_ids: Vec, diff --git a/naga/src/back/spv/subgroup.rs b/naga/src/back/spv/subgroup.rs new file mode 100644 index 0000000000..c952cb11a7 --- /dev/null +++ b/naga/src/back/spv/subgroup.rs @@ -0,0 +1,207 @@ +use super::{Block, BlockContext, Error, Instruction}; +use crate::{ + arena::Handle, + back::spv::{LocalType, LookupType}, + TypeInner, +}; + +impl<'w> BlockContext<'w> { + pub(super) fn write_subgroup_ballot( + &mut self, + predicate: &Option>, + result: Handle, + block: &mut Block, + ) -> Result<(), Error> { + self.writer.require_any( + "GroupNonUniformBallot", + &[spirv::Capability::GroupNonUniformBallot], + )?; + let vec4_u32_type_id = self.get_type_id(LookupType::Local(LocalType::Value { + vector_size: Some(crate::VectorSize::Quad), + scalar: crate::Scalar::U32, + pointer_space: None, + })); + let exec_scope_id = self.get_index_constant(spirv::Scope::Subgroup as u32); + let predicate = if let Some(predicate) = *predicate { + self.cached[predicate] + } else { + self.writer.get_constant_scalar(crate::Literal::Bool(true)) + }; + let id = self.gen_id(); + block.body.push(Instruction::group_non_uniform_ballot( + vec4_u32_type_id, + id, + exec_scope_id, + predicate, + )); + self.cached[result] = id; + Ok(()) + } + pub(super) fn write_subgroup_operation( + &mut self, + op: &crate::SubgroupOperation, + collective_op: &crate::CollectiveOperation, + argument: Handle, + result: Handle, + block: &mut Block, + ) -> Result<(), Error> { + use crate::SubgroupOperation as sg; + match *op { + sg::All | sg::Any => { + self.writer.require_any( + "GroupNonUniformVote", + &[spirv::Capability::GroupNonUniformVote], + )?; + } + _ => { + self.writer.require_any( + "GroupNonUniformArithmetic", + &[spirv::Capability::GroupNonUniformArithmetic], + )?; + } + } + + let id = self.gen_id(); + let result_ty = &self.fun_info[result].ty; + let result_type_id = self.get_expression_type_id(result_ty); + let result_ty_inner = result_ty.inner_with(&self.ir_module.types); + + let (is_scalar, scalar) = match *result_ty_inner { + TypeInner::Scalar(kind) => (true, kind), + TypeInner::Vector { scalar: kind, .. } => (false, kind), + _ => unimplemented!(), + }; + + use crate::ScalarKind as sk; + let spirv_op = match (scalar.kind, *op) { + (sk::Bool, sg::All) if is_scalar => spirv::Op::GroupNonUniformAll, + (sk::Bool, sg::Any) if is_scalar => spirv::Op::GroupNonUniformAny, + (_, sg::All | sg::Any) => unimplemented!(), + + (sk::Sint | sk::Uint, sg::Add) => spirv::Op::GroupNonUniformIAdd, + (sk::Float, sg::Add) => spirv::Op::GroupNonUniformFAdd, + (sk::Sint | sk::Uint, sg::Mul) => spirv::Op::GroupNonUniformIMul, + (sk::Float, sg::Mul) => spirv::Op::GroupNonUniformFMul, + (sk::Sint, sg::Max) => spirv::Op::GroupNonUniformSMax, + (sk::Uint, sg::Max) => spirv::Op::GroupNonUniformUMax, + (sk::Float, sg::Max) => spirv::Op::GroupNonUniformFMax, + (sk::Sint, sg::Min) => spirv::Op::GroupNonUniformSMin, + (sk::Uint, sg::Min) => spirv::Op::GroupNonUniformUMin, + (sk::Float, sg::Min) => spirv::Op::GroupNonUniformFMin, + (_, sg::Add | sg::Mul | sg::Min | sg::Max) => unimplemented!(), + + (sk::Sint | sk::Uint, sg::And) => spirv::Op::GroupNonUniformBitwiseAnd, + (sk::Sint | sk::Uint, sg::Or) => spirv::Op::GroupNonUniformBitwiseOr, + (sk::Sint | sk::Uint, sg::Xor) => spirv::Op::GroupNonUniformBitwiseXor, + (sk::Bool, sg::And) => spirv::Op::GroupNonUniformLogicalAnd, + (sk::Bool, sg::Or) => spirv::Op::GroupNonUniformLogicalOr, + (sk::Bool, sg::Xor) => spirv::Op::GroupNonUniformLogicalXor, + (_, sg::And | sg::Or | sg::Xor) => unimplemented!(), + }; + + let exec_scope_id = self.get_index_constant(spirv::Scope::Subgroup as u32); + + use crate::CollectiveOperation as c; + let group_op = match *op { + sg::All | sg::Any => None, + _ => Some(match *collective_op { + c::Reduce => spirv::GroupOperation::Reduce, + c::InclusiveScan => spirv::GroupOperation::InclusiveScan, + c::ExclusiveScan => spirv::GroupOperation::ExclusiveScan, + }), + }; + + let arg_id = self.cached[argument]; + block.body.push(Instruction::group_non_uniform_arithmetic( + spirv_op, + result_type_id, + id, + exec_scope_id, + group_op, + arg_id, + )); + self.cached[result] = id; + Ok(()) + } + pub(super) fn write_subgroup_gather( + &mut self, + mode: &crate::GatherMode, + argument: Handle, + result: Handle, + block: &mut Block, + ) -> Result<(), Error> { + self.writer.require_any( + "GroupNonUniformBallot", + &[spirv::Capability::GroupNonUniformBallot], + )?; + match *mode { + crate::GatherMode::BroadcastFirst | crate::GatherMode::Broadcast(_) => { + self.writer.require_any( + "GroupNonUniformBallot", + &[spirv::Capability::GroupNonUniformBallot], + )?; + } + crate::GatherMode::Shuffle(_) | crate::GatherMode::ShuffleXor(_) => { + self.writer.require_any( + "GroupNonUniformShuffle", + &[spirv::Capability::GroupNonUniformShuffle], + )?; + } + crate::GatherMode::ShuffleDown(_) | crate::GatherMode::ShuffleUp(_) => { + self.writer.require_any( + "GroupNonUniformShuffleRelative", + &[spirv::Capability::GroupNonUniformShuffleRelative], + )?; + } + } + + let id = self.gen_id(); + let result_ty = &self.fun_info[result].ty; + let result_type_id = self.get_expression_type_id(result_ty); + + let exec_scope_id = self.get_index_constant(spirv::Scope::Subgroup as u32); + + let arg_id = self.cached[argument]; + match *mode { + crate::GatherMode::BroadcastFirst => { + block + .body + .push(Instruction::group_non_uniform_broadcast_first( + result_type_id, + id, + exec_scope_id, + arg_id, + )); + } + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + let index_id = self.cached[index]; + let op = match *mode { + crate::GatherMode::BroadcastFirst => unreachable!(), + // Use shuffle to emit broadcast to allow the index to + // be dynamically uniform on Vulkan 1.1. The argument to + // OpGroupNonUniformBroadcast must be a constant pre SPIR-V + // 1.5 (vulkan 1.2) + crate::GatherMode::Broadcast(_) => spirv::Op::GroupNonUniformShuffle, + crate::GatherMode::Shuffle(_) => spirv::Op::GroupNonUniformShuffle, + crate::GatherMode::ShuffleDown(_) => spirv::Op::GroupNonUniformShuffleDown, + crate::GatherMode::ShuffleUp(_) => spirv::Op::GroupNonUniformShuffleUp, + crate::GatherMode::ShuffleXor(_) => spirv::Op::GroupNonUniformShuffleXor, + }; + block.body.push(Instruction::group_non_uniform_gather( + op, + result_type_id, + id, + exec_scope_id, + arg_id, + index_id, + )); + } + } + self.cached[result] = id; + Ok(()) + } +} diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index cf96fa59b4..73a16c273e 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -970,6 +970,11 @@ impl Writer { handle: Handle, ) -> Result { let ty = &arena[handle]; + // If it's a type that needs SPIR-V capabilities, request them now. + // This needs to happen regardless of the LocalType lookup succeeding, + // because some types which map to the same LocalType have different + // capability requirements. See https://github.com/gfx-rs/wgpu/issues/5569 + self.request_type_capabilities(&ty.inner)?; let id = if let Some(local) = make_local(&ty.inner) { // This type can be represented as a `LocalType`, so check if we've // already written an instruction for it. If not, do so now, with @@ -985,10 +990,6 @@ impl Writer { self.write_type_declaration_local(id, local); - // If it's a type that needs SPIR-V capabilities, request them now, - // so write_type_declaration_local can stay infallible. - self.request_type_capabilities(&ty.inner)?; - id } } @@ -1150,7 +1151,7 @@ impl Writer { } pub(super) fn get_constant_scalar(&mut self, value: crate::Literal) -> Word { - let scalar = CachedConstant::Literal(value); + let scalar = CachedConstant::Literal(value.into()); if let Some(&id) = self.cached_constants.get(&scalar) { return id; } @@ -1310,7 +1311,11 @@ impl Writer { spirv::MemorySemantics::WORKGROUP_MEMORY, flags.contains(crate::Barrier::WORK_GROUP), ); - let exec_scope_id = self.get_index_constant(spirv::Scope::Workgroup as u32); + let exec_scope_id = if flags.contains(crate::Barrier::SUB_GROUP) { + self.get_index_constant(spirv::Scope::Subgroup as u32) + } else { + self.get_index_constant(spirv::Scope::Workgroup as u32) + }; let mem_scope_id = self.get_index_constant(memory_scope as u32); let semantics_id = self.get_index_constant(semantics.bits()); block.body.push(Instruction::control_barrier( @@ -1585,6 +1590,41 @@ impl Writer { Bi::WorkGroupId => BuiltIn::WorkgroupId, Bi::WorkGroupSize => BuiltIn::WorkgroupSize, Bi::NumWorkGroups => BuiltIn::NumWorkgroups, + // Subgroup + Bi::NumSubgroups => { + self.require_any( + "`num_subgroups` built-in", + &[spirv::Capability::GroupNonUniform], + )?; + BuiltIn::NumSubgroups + } + Bi::SubgroupId => { + self.require_any( + "`subgroup_id` built-in", + &[spirv::Capability::GroupNonUniform], + )?; + BuiltIn::SubgroupId + } + Bi::SubgroupSize => { + self.require_any( + "`subgroup_size` built-in", + &[ + spirv::Capability::GroupNonUniform, + spirv::Capability::SubgroupBallotKHR, + ], + )?; + BuiltIn::SubgroupSize + } + Bi::SubgroupInvocationId => { + self.require_any( + "`subgroup_invocation_id` built-in", + &[ + spirv::Capability::GroupNonUniform, + spirv::Capability::SubgroupBallotKHR, + ], + )?; + BuiltIn::SubgroupLocalInvocationId + } }; self.decorate(id, Decoration::BuiltIn, &[built_in as u32]); diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index b63e16da3b..789f6f62bf 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -924,8 +924,124 @@ impl Writer { if barrier.contains(crate::Barrier::WORK_GROUP) { writeln!(self.out, "{level}workgroupBarrier();")?; } + + if barrier.contains(crate::Barrier::SUB_GROUP) { + writeln!(self.out, "{level}subgroupBarrier();")?; + } } Statement::RayQuery { .. } => unreachable!(), + Statement::SubgroupBallot { result, predicate } => { + write!(self.out, "{level}")?; + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_named_expr(module, result, func_ctx, &res_name)?; + self.named_expressions.insert(result, res_name); + + write!(self.out, "subgroupBallot(")?; + if let Some(predicate) = predicate { + self.write_expr(module, predicate, func_ctx)?; + } + writeln!(self.out, ");")?; + } + Statement::SubgroupCollectiveOperation { + op, + collective_op, + argument, + result, + } => { + write!(self.out, "{level}")?; + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_named_expr(module, result, func_ctx, &res_name)?; + self.named_expressions.insert(result, res_name); + + match (collective_op, op) { + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::All) => { + write!(self.out, "subgroupAll(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Any) => { + write!(self.out, "subgroupAny(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Add) => { + write!(self.out, "subgroupAdd(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Mul) => { + write!(self.out, "subgroupMul(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Max) => { + write!(self.out, "subgroupMax(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Min) => { + write!(self.out, "subgroupMin(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::And) => { + write!(self.out, "subgroupAnd(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Or) => { + write!(self.out, "subgroupOr(")? + } + (crate::CollectiveOperation::Reduce, crate::SubgroupOperation::Xor) => { + write!(self.out, "subgroupXor(")? + } + (crate::CollectiveOperation::ExclusiveScan, crate::SubgroupOperation::Add) => { + write!(self.out, "subgroupExclusiveAdd(")? + } + (crate::CollectiveOperation::ExclusiveScan, crate::SubgroupOperation::Mul) => { + write!(self.out, "subgroupExclusiveMul(")? + } + (crate::CollectiveOperation::InclusiveScan, crate::SubgroupOperation::Add) => { + write!(self.out, "subgroupInclusiveAdd(")? + } + (crate::CollectiveOperation::InclusiveScan, crate::SubgroupOperation::Mul) => { + write!(self.out, "subgroupInclusiveMul(")? + } + _ => unimplemented!(), + } + self.write_expr(module, argument, func_ctx)?; + writeln!(self.out, ");")?; + } + Statement::SubgroupGather { + mode, + argument, + result, + } => { + write!(self.out, "{level}")?; + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_named_expr(module, result, func_ctx, &res_name)?; + self.named_expressions.insert(result, res_name); + + match mode { + crate::GatherMode::BroadcastFirst => { + write!(self.out, "subgroupBroadcastFirst(")?; + } + crate::GatherMode::Broadcast(_) => { + write!(self.out, "subgroupBroadcast(")?; + } + crate::GatherMode::Shuffle(_) => { + write!(self.out, "subgroupShuffle(")?; + } + crate::GatherMode::ShuffleDown(_) => { + write!(self.out, "subgroupShuffleDown(")?; + } + crate::GatherMode::ShuffleUp(_) => { + write!(self.out, "subgroupShuffleUp(")?; + } + crate::GatherMode::ShuffleXor(_) => { + write!(self.out, "subgroupShuffleXor(")?; + } + } + self.write_expr(module, argument, func_ctx)?; + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + write!(self.out, ", ")?; + self.write_expr(module, index, func_ctx)?; + } + } + writeln!(self.out, ");")?; + } } Ok(()) @@ -1698,6 +1814,8 @@ impl Writer { Expression::CallResult(_) | Expression::AtomicResult { .. } | Expression::RayQueryProceedResult + | Expression::SubgroupBallotResult + | Expression::SubgroupOperationResult { .. } | Expression::WorkGroupUniformLoadResult { .. } => {} } @@ -1799,6 +1917,10 @@ fn builtin_str(built_in: crate::BuiltIn) -> Result<&'static str, Error> { Bi::SampleMask => "sample_mask", Bi::PrimitiveIndex => "primitive_index", Bi::ViewIndex => "view_index", + Bi::NumSubgroups => "num_subgroups", + Bi::SubgroupId => "subgroup_id", + Bi::SubgroupSize => "subgroup_size", + Bi::SubgroupInvocationId => "subgroup_invocation_id", Bi::BaseInstance | Bi::BaseVertex | Bi::ClipDistance diff --git a/naga/src/compact/expressions.rs b/naga/src/compact/expressions.rs index 0f2d8b1a02..a418bde301 100644 --- a/naga/src/compact/expressions.rs +++ b/naga/src/compact/expressions.rs @@ -72,6 +72,7 @@ impl<'tracer> ExpressionTracer<'tracer> { | Ex::GlobalVariable(_) | Ex::LocalVariable(_) | Ex::CallResult(_) + | Ex::SubgroupBallotResult | Ex::RayQueryProceedResult => {} Ex::Constant(handle) => { @@ -192,6 +193,7 @@ impl<'tracer> ExpressionTracer<'tracer> { Ex::AtomicResult { ty, comparison: _ } => self.types_used.insert(ty), Ex::WorkGroupUniformLoadResult { ty } => self.types_used.insert(ty), Ex::ArrayLength(expr) => self.expressions_used.insert(expr), + Ex::SubgroupOperationResult { ty } => self.types_used.insert(ty), Ex::RayQueryGetIntersection { query, committed: _, @@ -223,6 +225,7 @@ impl ModuleMap { | Ex::GlobalVariable(_) | Ex::LocalVariable(_) | Ex::CallResult(_) + | Ex::SubgroupBallotResult | Ex::RayQueryProceedResult => {} // All overrides are retained, so their handles never change. @@ -353,6 +356,7 @@ impl ModuleMap { comparison: _, } => self.types.adjust(ty), Ex::WorkGroupUniformLoadResult { ref mut ty } => self.types.adjust(ty), + Ex::SubgroupOperationResult { ref mut ty } => self.types.adjust(ty), Ex::ArrayLength(ref mut expr) => adjust(expr), Ex::RayQueryGetIntersection { ref mut query, diff --git a/naga/src/compact/statements.rs b/naga/src/compact/statements.rs index 0698b57258..a124281bc1 100644 --- a/naga/src/compact/statements.rs +++ b/naga/src/compact/statements.rs @@ -97,6 +97,39 @@ impl FunctionTracer<'_> { self.expressions_used.insert(query); self.trace_ray_query_function(fun); } + St::SubgroupBallot { result, predicate } => { + if let Some(predicate) = predicate { + self.expressions_used.insert(predicate) + } + self.expressions_used.insert(result) + } + St::SubgroupCollectiveOperation { + op: _, + collective_op: _, + argument, + result, + } => { + self.expressions_used.insert(argument); + self.expressions_used.insert(result) + } + St::SubgroupGather { + mode, + argument, + result, + } => { + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + self.expressions_used.insert(index) + } + } + self.expressions_used.insert(argument); + self.expressions_used.insert(result) + } // Trivial statements. St::Break @@ -250,6 +283,40 @@ impl FunctionMap { adjust(query); self.adjust_ray_query_function(fun); } + St::SubgroupBallot { + ref mut result, + ref mut predicate, + } => { + if let Some(ref mut predicate) = *predicate { + adjust(predicate); + } + adjust(result); + } + St::SubgroupCollectiveOperation { + op: _, + collective_op: _, + ref mut argument, + ref mut result, + } => { + adjust(argument); + adjust(result); + } + St::SubgroupGather { + ref mut mode, + ref mut argument, + ref mut result, + } => { + match *mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(ref mut index) + | crate::GatherMode::Shuffle(ref mut index) + | crate::GatherMode::ShuffleDown(ref mut index) + | crate::GatherMode::ShuffleUp(ref mut index) + | crate::GatherMode::ShuffleXor(ref mut index) => adjust(index), + } + adjust(argument); + adjust(result); + } // Trivial statements. St::Break diff --git a/naga/src/error.rs b/naga/src/error.rs new file mode 100644 index 0000000000..5f2e28360b --- /dev/null +++ b/naga/src/error.rs @@ -0,0 +1,74 @@ +use std::{error::Error, fmt}; + +#[derive(Clone, Debug)] +pub struct ShaderError { + /// The source code of the shader. + pub source: String, + pub label: Option, + pub inner: Box, +} + +#[cfg(feature = "wgsl-in")] +impl fmt::Display for ShaderError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{label}' parsing {string}") + } +} +#[cfg(feature = "glsl-in")] +impl fmt::Display for ShaderError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{label}' parsing {string}") + } +} +#[cfg(feature = "spv-in")] +impl fmt::Display for ShaderError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{label}' parsing {string}") + } +} +impl fmt::Display for ShaderError> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use codespan_reporting::{ + diagnostic::{Diagnostic, Label}, + files::SimpleFile, + term, + }; + + let label = self.label.as_deref().unwrap_or_default(); + let files = SimpleFile::new(label, &self.source); + let config = term::Config::default(); + let mut writer = term::termcolor::NoColor::new(Vec::new()); + + let diagnostic = Diagnostic::error().with_labels( + self.inner + .spans() + .map(|&(span, ref desc)| { + Label::primary((), span.to_range().unwrap()).with_message(desc.to_owned()) + }) + .collect(), + ); + + term::emit(&mut writer, &config, &files, &diagnostic).expect("cannot write error"); + + write!( + f, + "\nShader validation {}", + String::from_utf8_lossy(&writer.into_inner()) + ) + } +} +impl Error for ShaderError +where + ShaderError: fmt::Display, + E: Error + 'static, +{ + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.inner) + } +} diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs index bd16ee30bc..e0771437e6 100644 --- a/naga/src/front/glsl/error.rs +++ b/naga/src/front/glsl/error.rs @@ -1,4 +1,5 @@ use super::token::TokenValue; +use crate::SourceLocation; use crate::{proc::ConstantEvaluatorError, Span}; use codespan_reporting::diagnostic::{Diagnostic, Label}; use codespan_reporting::files::SimpleFile; @@ -137,14 +138,21 @@ pub struct Error { pub meta: Span, } +impl Error { + /// Returns a [`SourceLocation`] for the error message. + pub fn location(&self, source: &str) -> Option { + Some(self.meta.location(source)) + } +} + /// A collection of errors returned during shader parsing. #[derive(Clone, Debug)] #[cfg_attr(test, derive(PartialEq))] -pub struct ParseError { +pub struct ParseErrors { pub errors: Vec, } -impl ParseError { +impl ParseErrors { pub fn emit_to_writer(&self, writer: &mut impl WriteColor, source: &str) { self.emit_to_writer_with_path(writer, source, "glsl"); } @@ -172,19 +180,19 @@ impl ParseError { } } -impl std::fmt::Display for ParseError { +impl std::fmt::Display for ParseErrors { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { self.errors.iter().try_for_each(|e| write!(f, "{e:?}")) } } -impl std::error::Error for ParseError { +impl std::error::Error for ParseErrors { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { None } } -impl From> for ParseError { +impl From> for ParseErrors { fn from(errors: Vec) -> Self { Self { errors } } diff --git a/naga/src/front/glsl/mod.rs b/naga/src/front/glsl/mod.rs index 75f3929db4..ea202b2445 100644 --- a/naga/src/front/glsl/mod.rs +++ b/naga/src/front/glsl/mod.rs @@ -13,7 +13,7 @@ To begin, take a look at the documentation for the [`Frontend`]. */ pub use ast::{Precision, Profile}; -pub use error::{Error, ErrorKind, ExpectedToken, ParseError}; +pub use error::{Error, ErrorKind, ExpectedToken, ParseErrors}; pub use token::TokenValue; use crate::{proc::Layouter, FastHashMap, FastHashSet, Handle, Module, ShaderStage, Span, Type}; @@ -196,7 +196,7 @@ impl Frontend { &mut self, options: &Options, source: &str, - ) -> std::result::Result { + ) -> std::result::Result { self.reset(options.stage); let lexer = lex::Lexer::new(source, &options.defines); diff --git a/naga/src/front/glsl/parser_tests.rs b/naga/src/front/glsl/parser_tests.rs index c065dc15d6..135765ca58 100644 --- a/naga/src/front/glsl/parser_tests.rs +++ b/naga/src/front/glsl/parser_tests.rs @@ -1,7 +1,7 @@ use super::{ ast::Profile, error::ExpectedToken, - error::{Error, ErrorKind, ParseError}, + error::{Error, ErrorKind, ParseErrors}, token::TokenValue, Frontend, Options, Span, }; @@ -21,7 +21,7 @@ fn version() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![Error { kind: ErrorKind::InvalidVersion(99000), meta: Span::new(9, 14) @@ -37,7 +37,7 @@ fn version() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![Error { kind: ErrorKind::InvalidVersion(449), meta: Span::new(9, 12) @@ -53,7 +53,7 @@ fn version() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![Error { kind: ErrorKind::InvalidProfile("smart".into()), meta: Span::new(13, 18), @@ -69,7 +69,7 @@ fn version() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![ Error { kind: ErrorKind::PreprocessorError(PreprocessorError::UnexpectedHash,), @@ -455,7 +455,7 @@ fn functions() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![Error { kind: ErrorKind::SemanticError("Function already defined".into()), meta: Span::new(134, 152), @@ -634,7 +634,7 @@ fn implicit_conversions() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![Error { kind: ErrorKind::SemanticError("Unknown function \'test\'".into()), meta: Span::new(156, 165), @@ -658,7 +658,7 @@ fn implicit_conversions() { ) .err() .unwrap(), - ParseError { + ParseErrors { errors: vec![Error { kind: ErrorKind::SemanticError("Ambiguous best function for \'test\'".into()), meta: Span::new(158, 165), diff --git a/naga/src/front/spv/convert.rs b/naga/src/front/spv/convert.rs index f0a714fbeb..a6bf0e0451 100644 --- a/naga/src/front/spv/convert.rs +++ b/naga/src/front/spv/convert.rs @@ -153,6 +153,11 @@ pub(super) fn map_builtin(word: spirv::Word, invariant: bool) -> Result crate::BuiltIn::WorkGroupId, Some(Bi::WorkgroupSize) => crate::BuiltIn::WorkGroupSize, Some(Bi::NumWorkgroups) => crate::BuiltIn::NumWorkGroups, + // subgroup + Some(Bi::NumSubgroups) => crate::BuiltIn::NumSubgroups, + Some(Bi::SubgroupId) => crate::BuiltIn::SubgroupId, + Some(Bi::SubgroupSize) => crate::BuiltIn::SubgroupSize, + Some(Bi::SubgroupLocalInvocationId) => crate::BuiltIn::SubgroupInvocationId, _ => return Err(Error::UnsupportedBuiltIn(word)), }) } diff --git a/naga/src/front/spv/error.rs b/naga/src/front/spv/error.rs index 2825a44a00..44beadce98 100644 --- a/naga/src/front/spv/error.rs +++ b/naga/src/front/spv/error.rs @@ -5,7 +5,7 @@ use codespan_reporting::files::SimpleFile; use codespan_reporting::term; use termcolor::{NoColor, WriteColor}; -#[derive(Debug, thiserror::Error)] +#[derive(Clone, Debug, thiserror::Error)] pub enum Error { #[error("invalid header")] InvalidHeader, @@ -58,6 +58,8 @@ pub enum Error { UnknownBinaryOperator(spirv::Op), #[error("unknown relational function {0:?}")] UnknownRelationalFunction(spirv::Op), + #[error("unsupported group operation %{0}")] + UnsupportedGroupOperation(spirv::Word), #[error("invalid parameter {0:?}")] InvalidParameter(spirv::Op), #[error("invalid operand count {1} for {0:?}")] diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index 2ad40677fb..7ac5a18cd6 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -3700,6 +3700,254 @@ impl> Frontend { }, ); } + Op::GroupNonUniformBallot => { + inst.expect(5)?; + block.extend(emitter.finish(ctx.expressions)); + let result_type_id = self.next()?; + let result_id = self.next()?; + let exec_scope_id = self.next()?; + let predicate_id = self.next()?; + + let exec_scope_const = self.lookup_constant.lookup(exec_scope_id)?; + let _exec_scope = resolve_constant(ctx.gctx(), &exec_scope_const.inner) + .filter(|exec_scope| *exec_scope == spirv::Scope::Subgroup as u32) + .ok_or(Error::InvalidBarrierScope(exec_scope_id))?; + + let predicate = if self + .lookup_constant + .lookup(predicate_id) + .ok() + .filter(|predicate_const| match predicate_const.inner { + Constant::Constant(constant) => matches!( + ctx.gctx().global_expressions[ctx.gctx().constants[constant].init], + crate::Expression::Literal(crate::Literal::Bool(true)), + ), + Constant::Override(_) => false, + }) + .is_some() + { + None + } else { + let predicate_lookup = self.lookup_expression.lookup(predicate_id)?; + let predicate_handle = get_expr_handle!(predicate_id, predicate_lookup); + Some(predicate_handle) + }; + + let result_handle = ctx + .expressions + .append(crate::Expression::SubgroupBallotResult, span); + self.lookup_expression.insert( + result_id, + LookupExpression { + handle: result_handle, + type_id: result_type_id, + block_id, + }, + ); + + block.push( + crate::Statement::SubgroupBallot { + result: result_handle, + predicate, + }, + span, + ); + emitter.start(ctx.expressions); + } + spirv::Op::GroupNonUniformAll + | spirv::Op::GroupNonUniformAny + | spirv::Op::GroupNonUniformIAdd + | spirv::Op::GroupNonUniformFAdd + | spirv::Op::GroupNonUniformIMul + | spirv::Op::GroupNonUniformFMul + | spirv::Op::GroupNonUniformSMax + | spirv::Op::GroupNonUniformUMax + | spirv::Op::GroupNonUniformFMax + | spirv::Op::GroupNonUniformSMin + | spirv::Op::GroupNonUniformUMin + | spirv::Op::GroupNonUniformFMin + | spirv::Op::GroupNonUniformBitwiseAnd + | spirv::Op::GroupNonUniformBitwiseOr + | spirv::Op::GroupNonUniformBitwiseXor + | spirv::Op::GroupNonUniformLogicalAnd + | spirv::Op::GroupNonUniformLogicalOr + | spirv::Op::GroupNonUniformLogicalXor => { + block.extend(emitter.finish(ctx.expressions)); + inst.expect( + if matches!( + inst.op, + spirv::Op::GroupNonUniformAll | spirv::Op::GroupNonUniformAny + ) { + 5 + } else { + 6 + }, + )?; + let result_type_id = self.next()?; + let result_id = self.next()?; + let exec_scope_id = self.next()?; + let collective_op_id = match inst.op { + spirv::Op::GroupNonUniformAll | spirv::Op::GroupNonUniformAny => { + crate::CollectiveOperation::Reduce + } + _ => { + let group_op_id = self.next()?; + match spirv::GroupOperation::from_u32(group_op_id) { + Some(spirv::GroupOperation::Reduce) => { + crate::CollectiveOperation::Reduce + } + Some(spirv::GroupOperation::InclusiveScan) => { + crate::CollectiveOperation::InclusiveScan + } + Some(spirv::GroupOperation::ExclusiveScan) => { + crate::CollectiveOperation::ExclusiveScan + } + _ => return Err(Error::UnsupportedGroupOperation(group_op_id)), + } + } + }; + let argument_id = self.next()?; + + let argument_lookup = self.lookup_expression.lookup(argument_id)?; + let argument_handle = get_expr_handle!(argument_id, argument_lookup); + + let exec_scope_const = self.lookup_constant.lookup(exec_scope_id)?; + let _exec_scope = resolve_constant(ctx.gctx(), &exec_scope_const.inner) + .filter(|exec_scope| *exec_scope == spirv::Scope::Subgroup as u32) + .ok_or(Error::InvalidBarrierScope(exec_scope_id))?; + + let op_id = match inst.op { + spirv::Op::GroupNonUniformAll => crate::SubgroupOperation::All, + spirv::Op::GroupNonUniformAny => crate::SubgroupOperation::Any, + spirv::Op::GroupNonUniformIAdd | spirv::Op::GroupNonUniformFAdd => { + crate::SubgroupOperation::Add + } + spirv::Op::GroupNonUniformIMul | spirv::Op::GroupNonUniformFMul => { + crate::SubgroupOperation::Mul + } + spirv::Op::GroupNonUniformSMax + | spirv::Op::GroupNonUniformUMax + | spirv::Op::GroupNonUniformFMax => crate::SubgroupOperation::Max, + spirv::Op::GroupNonUniformSMin + | spirv::Op::GroupNonUniformUMin + | spirv::Op::GroupNonUniformFMin => crate::SubgroupOperation::Min, + spirv::Op::GroupNonUniformBitwiseAnd + | spirv::Op::GroupNonUniformLogicalAnd => crate::SubgroupOperation::And, + spirv::Op::GroupNonUniformBitwiseOr + | spirv::Op::GroupNonUniformLogicalOr => crate::SubgroupOperation::Or, + spirv::Op::GroupNonUniformBitwiseXor + | spirv::Op::GroupNonUniformLogicalXor => crate::SubgroupOperation::Xor, + _ => unreachable!(), + }; + + let result_type = self.lookup_type.lookup(result_type_id)?; + + let result_handle = ctx.expressions.append( + crate::Expression::SubgroupOperationResult { + ty: result_type.handle, + }, + span, + ); + self.lookup_expression.insert( + result_id, + LookupExpression { + handle: result_handle, + type_id: result_type_id, + block_id, + }, + ); + + block.push( + crate::Statement::SubgroupCollectiveOperation { + result: result_handle, + op: op_id, + collective_op: collective_op_id, + argument: argument_handle, + }, + span, + ); + emitter.start(ctx.expressions); + } + Op::GroupNonUniformBroadcastFirst + | Op::GroupNonUniformBroadcast + | Op::GroupNonUniformShuffle + | Op::GroupNonUniformShuffleDown + | Op::GroupNonUniformShuffleUp + | Op::GroupNonUniformShuffleXor => { + inst.expect( + if matches!(inst.op, spirv::Op::GroupNonUniformBroadcastFirst) { + 5 + } else { + 6 + }, + )?; + block.extend(emitter.finish(ctx.expressions)); + let result_type_id = self.next()?; + let result_id = self.next()?; + let exec_scope_id = self.next()?; + let argument_id = self.next()?; + + let argument_lookup = self.lookup_expression.lookup(argument_id)?; + let argument_handle = get_expr_handle!(argument_id, argument_lookup); + + let exec_scope_const = self.lookup_constant.lookup(exec_scope_id)?; + let _exec_scope = resolve_constant(ctx.gctx(), &exec_scope_const.inner) + .filter(|exec_scope| *exec_scope == spirv::Scope::Subgroup as u32) + .ok_or(Error::InvalidBarrierScope(exec_scope_id))?; + + let mode = if matches!(inst.op, spirv::Op::GroupNonUniformBroadcastFirst) { + crate::GatherMode::BroadcastFirst + } else { + let index_id = self.next()?; + let index_lookup = self.lookup_expression.lookup(index_id)?; + let index_handle = get_expr_handle!(index_id, index_lookup); + match inst.op { + spirv::Op::GroupNonUniformBroadcast => { + crate::GatherMode::Broadcast(index_handle) + } + spirv::Op::GroupNonUniformShuffle => { + crate::GatherMode::Shuffle(index_handle) + } + spirv::Op::GroupNonUniformShuffleDown => { + crate::GatherMode::ShuffleDown(index_handle) + } + spirv::Op::GroupNonUniformShuffleUp => { + crate::GatherMode::ShuffleUp(index_handle) + } + spirv::Op::GroupNonUniformShuffleXor => { + crate::GatherMode::ShuffleXor(index_handle) + } + _ => unreachable!(), + } + }; + + let result_type = self.lookup_type.lookup(result_type_id)?; + + let result_handle = ctx.expressions.append( + crate::Expression::SubgroupOperationResult { + ty: result_type.handle, + }, + span, + ); + self.lookup_expression.insert( + result_id, + LookupExpression { + handle: result_handle, + type_id: result_type_id, + block_id, + }, + ); + + block.push( + crate::Statement::SubgroupGather { + result: result_handle, + mode, + argument: argument_handle, + }, + span, + ); + emitter.start(ctx.expressions); + } _ => return Err(Error::UnsupportedInstruction(self.state, inst.op)), } }; @@ -3824,7 +4072,10 @@ impl> Frontend { | S::Store { .. } | S::ImageStore { .. } | S::Atomic { .. } - | S::RayQuery { .. } => {} + | S::RayQuery { .. } + | S::SubgroupBallot { .. } + | S::SubgroupCollectiveOperation { .. } + | S::SubgroupGather { .. } => {} S::Call { function: ref mut callee, ref arguments, diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index 24e6c9f8c5..dc1339521c 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -13,6 +13,7 @@ use thiserror::Error; #[derive(Clone, Debug)] pub struct ParseError { message: String, + // The first span should be the primary span, and the other ones should be complementary. labels: Vec<(Span, Cow<'static, str>)>, notes: Vec, } diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index 77212f2086..e7cce17723 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -874,6 +874,29 @@ impl Texture { } } +enum SubgroupGather { + BroadcastFirst, + Broadcast, + Shuffle, + ShuffleDown, + ShuffleUp, + ShuffleXor, +} + +impl SubgroupGather { + pub fn map(word: &str) -> Option { + Some(match word { + "subgroupBroadcastFirst" => Self::BroadcastFirst, + "subgroupBroadcast" => Self::Broadcast, + "subgroupShuffle" => Self::Shuffle, + "subgroupShuffleDown" => Self::ShuffleDown, + "subgroupShuffleUp" => Self::ShuffleUp, + "subgroupShuffleXor" => Self::ShuffleXor, + _ => return None, + }) + } +} + pub struct Lowerer<'source, 'temp> { index: &'temp Index<'source>, layouter: Layouter, @@ -2054,6 +2077,16 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { } } else if let Some(fun) = Texture::map(function.name) { self.texture_sample_helper(fun, arguments, span, ctx)? + } else if let Some((op, cop)) = conv::map_subgroup_operation(function.name) { + return Ok(Some( + self.subgroup_operation_helper(span, op, cop, arguments, ctx)?, + )); + } else if let Some(mode) = SubgroupGather::map(function.name) { + return Ok(Some( + self.subgroup_gather_helper(span, mode, arguments, ctx)?, + )); + } else if let Some(fun) = crate::AtomicFunction::map(function.name) { + return Ok(Some(self.atomic_helper(span, fun, arguments, ctx)?)); } else { match function.name { "select" => { @@ -2099,70 +2132,6 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { .push(crate::Statement::Store { pointer, value }, span); return Ok(None); } - "atomicAdd" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::Add, - arguments, - ctx, - )?)) - } - "atomicSub" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::Subtract, - arguments, - ctx, - )?)) - } - "atomicAnd" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::And, - arguments, - ctx, - )?)) - } - "atomicOr" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::InclusiveOr, - arguments, - ctx, - )?)) - } - "atomicXor" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::ExclusiveOr, - arguments, - ctx, - )?)) - } - "atomicMin" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::Min, - arguments, - ctx, - )?)) - } - "atomicMax" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::Max, - arguments, - ctx, - )?)) - } - "atomicExchange" => { - return Ok(Some(self.atomic_helper( - span, - crate::AtomicFunction::Exchange { compare: None }, - arguments, - ctx, - )?)) - } "atomicCompareExchangeWeak" => { let mut args = ctx.prepare_args(arguments, 3, span); @@ -2221,6 +2190,14 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { .push(crate::Statement::Barrier(crate::Barrier::WORK_GROUP), span); return Ok(None); } + "subgroupBarrier" => { + ctx.prepare_args(arguments, 0, span).finish()?; + + let rctx = ctx.runtime_expression_ctx(span)?; + rctx.block + .push(crate::Statement::Barrier(crate::Barrier::SUB_GROUP), span); + return Ok(None); + } "workgroupUniformLoad" => { let mut args = ctx.prepare_args(arguments, 1, span); let expr = args.next()?; @@ -2428,6 +2405,22 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { )?; return Ok(Some(handle)); } + "subgroupBallot" => { + let mut args = ctx.prepare_args(arguments, 0, span); + let predicate = if arguments.len() == 1 { + Some(self.expression(args.next()?, ctx)?) + } else { + None + }; + args.finish()?; + + let result = ctx + .interrupt_emitter(crate::Expression::SubgroupBallotResult, span)?; + let rctx = ctx.runtime_expression_ctx(span)?; + rctx.block + .push(crate::Statement::SubgroupBallot { result, predicate }, span); + return Ok(Some(result)); + } _ => return Err(Error::UnknownIdent(function.span, function.name)), } }; @@ -2619,6 +2612,80 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { }) } + fn subgroup_operation_helper( + &mut self, + span: Span, + op: crate::SubgroupOperation, + collective_op: crate::CollectiveOperation, + arguments: &[Handle>], + ctx: &mut ExpressionContext<'source, '_, '_>, + ) -> Result, Error<'source>> { + let mut args = ctx.prepare_args(arguments, 1, span); + + let argument = self.expression(args.next()?, ctx)?; + args.finish()?; + + let ty = ctx.register_type(argument)?; + + let result = + ctx.interrupt_emitter(crate::Expression::SubgroupOperationResult { ty }, span)?; + let rctx = ctx.runtime_expression_ctx(span)?; + rctx.block.push( + crate::Statement::SubgroupCollectiveOperation { + op, + collective_op, + argument, + result, + }, + span, + ); + Ok(result) + } + + fn subgroup_gather_helper( + &mut self, + span: Span, + mode: SubgroupGather, + arguments: &[Handle>], + ctx: &mut ExpressionContext<'source, '_, '_>, + ) -> Result, Error<'source>> { + let mut args = ctx.prepare_args(arguments, 2, span); + + let argument = self.expression(args.next()?, ctx)?; + + use SubgroupGather as Sg; + let mode = if let Sg::BroadcastFirst = mode { + crate::GatherMode::BroadcastFirst + } else { + let index = self.expression(args.next()?, ctx)?; + match mode { + Sg::Broadcast => crate::GatherMode::Broadcast(index), + Sg::Shuffle => crate::GatherMode::Shuffle(index), + Sg::ShuffleDown => crate::GatherMode::ShuffleDown(index), + Sg::ShuffleUp => crate::GatherMode::ShuffleUp(index), + Sg::ShuffleXor => crate::GatherMode::ShuffleXor(index), + Sg::BroadcastFirst => unreachable!(), + } + }; + + args.finish()?; + + let ty = ctx.register_type(argument)?; + + let result = + ctx.interrupt_emitter(crate::Expression::SubgroupOperationResult { ty }, span)?; + let rctx = ctx.runtime_expression_ctx(span)?; + rctx.block.push( + crate::Statement::SubgroupGather { + mode, + argument, + result, + }, + span, + ); + Ok(result) + } + fn r#struct( &mut self, s: &ast::Struct<'source>, @@ -2877,3 +2944,19 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { } } } + +impl crate::AtomicFunction { + pub fn map(word: &str) -> Option { + Some(match word { + "atomicAdd" => crate::AtomicFunction::Add, + "atomicSub" => crate::AtomicFunction::Subtract, + "atomicAnd" => crate::AtomicFunction::And, + "atomicOr" => crate::AtomicFunction::InclusiveOr, + "atomicXor" => crate::AtomicFunction::ExclusiveOr, + "atomicMin" => crate::AtomicFunction::Min, + "atomicMax" => crate::AtomicFunction::Max, + "atomicExchange" => crate::AtomicFunction::Exchange { compare: None }, + _ => return None, + }) + } +} diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs index 1a4911a3bd..207f0eda41 100644 --- a/naga/src/front/wgsl/parse/conv.rs +++ b/naga/src/front/wgsl/parse/conv.rs @@ -35,6 +35,11 @@ pub fn map_built_in(word: &str, span: Span) -> Result> "local_invocation_index" => crate::BuiltIn::LocalInvocationIndex, "workgroup_id" => crate::BuiltIn::WorkGroupId, "num_workgroups" => crate::BuiltIn::NumWorkGroups, + // subgroup + "num_subgroups" => crate::BuiltIn::NumSubgroups, + "subgroup_id" => crate::BuiltIn::SubgroupId, + "subgroup_size" => crate::BuiltIn::SubgroupSize, + "subgroup_invocation_id" => crate::BuiltIn::SubgroupInvocationId, _ => return Err(Error::UnknownBuiltin(span)), }) } @@ -260,3 +265,26 @@ pub fn map_conservative_depth( _ => Err(Error::UnknownConservativeDepth(span)), } } + +pub fn map_subgroup_operation( + word: &str, +) -> Option<(crate::SubgroupOperation, crate::CollectiveOperation)> { + use crate::CollectiveOperation as co; + use crate::SubgroupOperation as sg; + Some(match word { + "subgroupAll" => (sg::All, co::Reduce), + "subgroupAny" => (sg::Any, co::Reduce), + "subgroupAdd" => (sg::Add, co::Reduce), + "subgroupMul" => (sg::Mul, co::Reduce), + "subgroupMin" => (sg::Min, co::Reduce), + "subgroupMax" => (sg::Max, co::Reduce), + "subgroupAnd" => (sg::And, co::Reduce), + "subgroupOr" => (sg::Or, co::Reduce), + "subgroupXor" => (sg::Xor, co::Reduce), + "subgroupExclusiveAdd" => (sg::Add, co::ExclusiveScan), + "subgroupExclusiveMul" => (sg::Mul, co::ExclusiveScan), + "subgroupInclusiveAdd" => (sg::Add, co::InclusiveScan), + "subgroupInclusiveMul" => (sg::Mul, co::InclusiveScan), + _ => return None, + }) +} diff --git a/naga/src/lib.rs b/naga/src/lib.rs index ceb7e55b7b..24e1b02c76 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -274,6 +274,7 @@ pub mod back; mod block; #[cfg(feature = "compact")] pub mod compact; +pub mod error; pub mod front; pub mod keywords; pub mod proc; @@ -431,6 +432,11 @@ pub enum BuiltIn { WorkGroupId, WorkGroupSize, NumWorkGroups, + // subgroup + NumSubgroups, + SubgroupId, + SubgroupSize, + SubgroupInvocationId, } /// Number of bytes per scalar. @@ -866,7 +872,7 @@ pub enum TypeInner { BindingArray { base: Handle, size: ArraySize }, } -#[derive(Debug, Clone, Copy, PartialOrd)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] #[cfg_attr(feature = "serialize", derive(Serialize))] #[cfg_attr(feature = "deserialize", derive(Deserialize))] #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] @@ -1277,6 +1283,51 @@ pub enum SwizzleComponent { W = 3, } +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serialize", derive(Serialize))] +#[cfg_attr(feature = "deserialize", derive(Deserialize))] +#[cfg_attr(feature = "arbitrary", derive(Arbitrary))] +pub enum GatherMode { + /// All gather from the active lane with the smallest index + BroadcastFirst, + /// All gather from the same lane at the index given by the expression + Broadcast(Handle), + /// Each gathers from a different lane at the index given by the expression + Shuffle(Handle), + /// Each gathers from their lane plus the shift given by the expression + ShuffleDown(Handle), + /// Each gathers from their lane minus the shift given by the expression + ShuffleUp(Handle), + /// Each gathers from their lane xored with the given by the expression + ShuffleXor(Handle), +} + +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serialize", derive(Serialize))] +#[cfg_attr(feature = "deserialize", derive(Deserialize))] +#[cfg_attr(feature = "arbitrary", derive(Arbitrary))] +pub enum SubgroupOperation { + All = 0, + Any = 1, + Add = 2, + Mul = 3, + Min = 4, + Max = 5, + And = 6, + Or = 7, + Xor = 8, +} + +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serialize", derive(Serialize))] +#[cfg_attr(feature = "deserialize", derive(Deserialize))] +#[cfg_attr(feature = "arbitrary", derive(Arbitrary))] +pub enum CollectiveOperation { + Reduce = 0, + InclusiveScan = 1, + ExclusiveScan = 2, +} + bitflags::bitflags! { /// Memory barrier flags. #[cfg_attr(feature = "serialize", derive(Serialize))] @@ -1285,9 +1336,11 @@ bitflags::bitflags! { #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct Barrier: u32 { /// Barrier affects all `AddressSpace::Storage` accesses. - const STORAGE = 0x1; + const STORAGE = 1 << 0; /// Barrier affects all `AddressSpace::WorkGroup` accesses. - const WORK_GROUP = 0x2; + const WORK_GROUP = 1 << 1; + /// Barrier synchronizes execution across all invocations within a subgroup that exectue this instruction. + const SUB_GROUP = 1 << 2; } } @@ -1588,6 +1641,15 @@ pub enum Expression { query: Handle, committed: bool, }, + /// Result of a [`SubgroupBallot`] statement. + /// + /// [`SubgroupBallot`]: Statement::SubgroupBallot + SubgroupBallotResult, + /// Result of a [`SubgroupCollectiveOperation`] or [`SubgroupGather`] statement. + /// + /// [`SubgroupCollectiveOperation`]: Statement::SubgroupCollectiveOperation + /// [`SubgroupGather`]: Statement::SubgroupGather + SubgroupOperationResult { ty: Handle }, } pub use block::Block; @@ -1872,6 +1934,39 @@ pub enum Statement { /// The specific operation we're performing on `query`. fun: RayQueryFunction, }, + /// Calculate a bitmask using a boolean from each active thread in the subgroup + SubgroupBallot { + /// The [`SubgroupBallotResult`] expression representing this load's result. + /// + /// [`SubgroupBallotResult`]: Expression::SubgroupBallotResult + result: Handle, + /// The value from this thread to store in the ballot + predicate: Option>, + }, + /// Gather a value from another active thread in the subgroup + SubgroupGather { + /// Specifies which thread to gather from + mode: GatherMode, + /// The value to broadcast over + argument: Handle, + /// The [`SubgroupOperationResult`] expression representing this load's result. + /// + /// [`SubgroupOperationResult`]: Expression::SubgroupOperationResult + result: Handle, + }, + /// Compute a collective operation across all active threads in the subgroup + SubgroupCollectiveOperation { + /// What operation to compute + op: SubgroupOperation, + /// How to combine the results + collective_op: CollectiveOperation, + /// The value to compute over + argument: Handle, + /// The [`SubgroupOperationResult`] expression representing this load's result. + /// + /// [`SubgroupOperationResult`]: Expression::SubgroupOperationResult + result: Handle, + }, } /// A function argument. diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs index 547fbbc652..ead3d00980 100644 --- a/naga/src/proc/constant_evaluator.rs +++ b/naga/src/proc/constant_evaluator.rs @@ -476,6 +476,8 @@ pub enum ConstantEvaluatorError { ImageExpression, #[error("Constants don't support ray query expressions")] RayQueryExpression, + #[error("Constants don't support subgroup expressions")] + SubgroupExpression, #[error("Cannot access the type")] InvalidAccessBase, #[error("Cannot access at the index")] @@ -884,6 +886,12 @@ impl<'a> ConstantEvaluator<'a> { Expression::RayQueryProceedResult | Expression::RayQueryGetIntersection { .. } => { Err(ConstantEvaluatorError::RayQueryExpression) } + Expression::SubgroupBallotResult { .. } => { + Err(ConstantEvaluatorError::SubgroupExpression) + } + Expression::SubgroupOperationResult { .. } => { + Err(ConstantEvaluatorError::SubgroupExpression) + } } } @@ -942,10 +950,10 @@ impl<'a> ConstantEvaluator<'a> { pattern: [crate::SwizzleComponent; 4], ) -> Result, ConstantEvaluatorError> { let mut get_dst_ty = |ty| match self.types[ty].inner { - crate::TypeInner::Vector { size: _, scalar } => Ok(self.types.insert( + TypeInner::Vector { size: _, scalar } => Ok(self.types.insert( Type { name: None, - inner: crate::TypeInner::Vector { size, scalar }, + inner: TypeInner::Vector { size, scalar }, }, span, )), @@ -1236,13 +1244,11 @@ impl<'a> ConstantEvaluator<'a> { Expression::ZeroValue(ty) | Expression::Compose { ty, .. } => { match self.types[ty].inner { TypeInner::Array { size, .. } => match size { - crate::ArraySize::Constant(len) => { + ArraySize::Constant(len) => { let expr = Expression::Literal(Literal::U32(len.get())); self.register_evaluated_expr(expr, span) } - crate::ArraySize::Dynamic => { - Err(ConstantEvaluatorError::ArrayLengthDynamic) - } + ArraySize::Dynamic => Err(ConstantEvaluatorError::ArrayLengthDynamic), }, _ => Err(ConstantEvaluatorError::InvalidArrayLengthArg), } @@ -1305,7 +1311,7 @@ impl<'a> ConstantEvaluator<'a> { Expression::ZeroValue(ty) if matches!( self.types[ty].inner, - crate::TypeInner::Scalar(crate::Scalar { + TypeInner::Scalar(crate::Scalar { kind: ScalarKind::Uint, .. }) @@ -1620,7 +1626,7 @@ impl<'a> ConstantEvaluator<'a> { return self.cast(expr, target, span); }; - let crate::TypeInner::Array { + let TypeInner::Array { base: _, size, stride: _, diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs index af3221c0fe..e2c3de8eb0 100644 --- a/naga/src/proc/index.rs +++ b/naga/src/proc/index.rs @@ -239,7 +239,7 @@ pub enum GuardedIndex { pub fn find_checked_indexes( module: &crate::Module, function: &crate::Function, - info: &crate::valid::FunctionInfo, + info: &valid::FunctionInfo, policies: BoundsCheckPolicies, ) -> BitSet { use crate::Expression as Ex; @@ -321,7 +321,7 @@ pub fn access_needs_check( mut index: GuardedIndex, module: &crate::Module, function: &crate::Function, - info: &crate::valid::FunctionInfo, + info: &valid::FunctionInfo, ) -> Option { let base_inner = info[base].ty.inner_with(&module.types); // Unwrap safety: `Err` here indicates unindexable base types and invalid diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index 0e89f29032..93aac5b3e5 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -153,56 +153,31 @@ impl super::Scalar { } } -impl PartialEq for crate::Literal { - fn eq(&self, other: &Self) -> bool { - match (*self, *other) { - (Self::F64(a), Self::F64(b)) => a.to_bits() == b.to_bits(), - (Self::F32(a), Self::F32(b)) => a.to_bits() == b.to_bits(), - (Self::U32(a), Self::U32(b)) => a == b, - (Self::I32(a), Self::I32(b)) => a == b, - (Self::U64(a), Self::U64(b)) => a == b, - (Self::I64(a), Self::I64(b)) => a == b, - (Self::Bool(a), Self::Bool(b)) => a == b, - _ => false, - } - } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum HashableLiteral { + F64(u64), + F32(u32), + U32(u32), + I32(i32), + U64(u64), + I64(i64), + Bool(bool), + AbstractInt(i64), + AbstractFloat(u64), } -impl Eq for crate::Literal {} -impl std::hash::Hash for crate::Literal { - fn hash(&self, hasher: &mut H) { - match *self { - Self::F64(v) | Self::AbstractFloat(v) => { - hasher.write_u8(0); - v.to_bits().hash(hasher); - } - Self::F32(v) => { - hasher.write_u8(1); - v.to_bits().hash(hasher); - } - Self::U32(v) => { - hasher.write_u8(2); - v.hash(hasher); - } - Self::I32(v) => { - hasher.write_u8(3); - v.hash(hasher); - } - Self::Bool(v) => { - hasher.write_u8(4); - v.hash(hasher); - } - Self::I64(v) => { - hasher.write_u8(5); - v.hash(hasher); - } - Self::U64(v) => { - hasher.write_u8(6); - v.hash(hasher); - } - Self::AbstractInt(v) => { - hasher.write_u8(7); - v.hash(hasher); - } + +impl From for HashableLiteral { + fn from(l: crate::Literal) -> Self { + match l { + crate::Literal::F64(v) => Self::F64(v.to_bits()), + crate::Literal::F32(v) => Self::F32(v.to_bits()), + crate::Literal::U32(v) => Self::U32(v), + crate::Literal::I32(v) => Self::I32(v), + crate::Literal::U64(v) => Self::U64(v), + crate::Literal::I64(v) => Self::I64(v), + crate::Literal::Bool(v) => Self::Bool(v), + crate::Literal::AbstractInt(v) => Self::AbstractInt(v), + crate::Literal::AbstractFloat(v) => Self::AbstractFloat(v.to_bits()), } } } @@ -279,8 +254,9 @@ impl super::TypeInner { self.scalar().map(|scalar| scalar.kind) } + /// Returns the scalar width in bytes pub fn scalar_width(&self) -> Option { - self.scalar().map(|scalar| scalar.width * 8) + self.scalar().map(|scalar| scalar.width) } pub const fn pointer_space(&self) -> Option { @@ -532,6 +508,7 @@ impl crate::Expression { match *self { Self::Literal(_) | Self::Constant(_) + | Self::Override(_) | Self::ZeroValue(_) | Self::FunctionArgument(_) | Self::GlobalVariable(_) diff --git a/naga/src/proc/terminator.rs b/naga/src/proc/terminator.rs index a5239d4eca..5edf55cb73 100644 --- a/naga/src/proc/terminator.rs +++ b/naga/src/proc/terminator.rs @@ -37,6 +37,9 @@ pub fn ensure_block_returns(block: &mut crate::Block) { | S::RayQuery { .. } | S::Atomic { .. } | S::WorkGroupUniformLoad { .. } + | S::SubgroupBallot { .. } + | S::SubgroupCollectiveOperation { .. } + | S::SubgroupGather { .. } | S::Barrier(_)), ) | None => block.push(S::Return { value: None }, Default::default()), diff --git a/naga/src/proc/typifier.rs b/naga/src/proc/typifier.rs index 845b35cb4d..3936e7efbe 100644 --- a/naga/src/proc/typifier.rs +++ b/naga/src/proc/typifier.rs @@ -598,6 +598,7 @@ impl<'a> ResolveContext<'a> { | crate::BinaryOperator::ShiftRight => past(left)?.clone(), }, crate::Expression::AtomicResult { ty, .. } => TypeResolution::Handle(ty), + crate::Expression::SubgroupOperationResult { ty } => TypeResolution::Handle(ty), crate::Expression::WorkGroupUniformLoadResult { ty } => TypeResolution::Handle(ty), crate::Expression::Select { accept, .. } => past(accept)?.clone(), crate::Expression::Derivative { expr, .. } => past(expr)?.clone(), @@ -885,6 +886,10 @@ impl<'a> ResolveContext<'a> { .ok_or(ResolveError::MissingSpecialType)?; TypeResolution::Handle(result) } + crate::Expression::SubgroupBallotResult => TypeResolution::Value(Ti::Vector { + scalar: crate::Scalar::U32, + size: crate::VectorSize::Quad, + }), }) } } diff --git a/naga/src/span.rs b/naga/src/span.rs index 10744647e9..82cfbe5a4b 100644 --- a/naga/src/span.rs +++ b/naga/src/span.rs @@ -72,8 +72,8 @@ impl Span { pub fn location(&self, source: &str) -> SourceLocation { let prefix = &source[..self.start as usize]; let line_number = prefix.matches('\n').count() as u32 + 1; - let line_start = prefix.rfind('\n').map(|pos| pos + 1).unwrap_or(0); - let line_position = source[line_start..self.start as usize].chars().count() as u32 + 1; + let line_start = prefix.rfind('\n').map(|pos| pos + 1).unwrap_or(0) as u32; + let line_position = self.start - line_start + 1; SourceLocation { line_number, @@ -107,14 +107,14 @@ impl std::ops::Index for str { /// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from /// the WebGPU specification, except /// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units. -/// - `line_position` counts entire Unicode code points, instead of UTF-16 code units. +/// - `line_position` is in bytes (UTF-8 code units), instead of UTF-16 code units. /// /// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct SourceLocation { /// 1-based line number. pub line_number: u32, - /// 1-based column of the start of this span, counted in Unicode code points. + /// 1-based column in code units (in bytes) of the start of the span. pub line_position: u32, /// 0-based Offset in code units (in bytes) of the start of the span. pub offset: u32, @@ -136,7 +136,7 @@ impl fmt::Display for WithSpan where E: fmt::Display, { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.inner.fmt(f) } } @@ -304,7 +304,7 @@ impl WithSpan { use term::termcolor::NoColor; let files = files::SimpleFile::new(path, source); - let config = codespan_reporting::term::Config::default(); + let config = term::Config::default(); let mut writer = NoColor::new(Vec::new()); term::emit(&mut writer, &config, &files, &self.diagnostic()).expect("cannot write error"); String::from_utf8(writer.into_inner()).unwrap() diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs index d45c25c62e..6799e5db27 100644 --- a/naga/src/valid/analyzer.rs +++ b/naga/src/valid/analyzer.rs @@ -787,6 +787,14 @@ impl FunctionInfo { non_uniform_result: self.add_ref(query), requirements: UniformityRequirements::empty(), }, + E::SubgroupBallotResult => Uniformity { + non_uniform_result: Some(handle), + requirements: UniformityRequirements::empty(), + }, + E::SubgroupOperationResult { .. } => Uniformity { + non_uniform_result: Some(handle), + requirements: UniformityRequirements::empty(), + }, }; let ty = resolve_context.resolve(expression, |h| Ok(&self[h].ty))?; @@ -827,7 +835,7 @@ impl FunctionInfo { let req = self.expressions[expr.index()].uniformity.requirements; if self .flags - .contains(super::ValidationFlags::CONTROL_FLOW_UNIFORMITY) + .contains(ValidationFlags::CONTROL_FLOW_UNIFORMITY) && !req.is_empty() { if let Some(cause) = disruptor { @@ -1029,6 +1037,42 @@ impl FunctionInfo { } FunctionUniformity::new() } + S::SubgroupBallot { + result: _, + predicate, + } => { + if let Some(predicate) = predicate { + let _ = self.add_ref(predicate); + } + FunctionUniformity::new() + } + S::SubgroupCollectiveOperation { + op: _, + collective_op: _, + argument, + result: _, + } => { + let _ = self.add_ref(argument); + FunctionUniformity::new() + } + S::SubgroupGather { + mode, + argument, + result: _, + } => { + let _ = self.add_ref(argument); + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + let _ = self.add_ref(index); + } + } + FunctionUniformity::new() + } }; disruptor = disruptor.or(uniformity.exit_disruptor()); diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index bf46fd3262..525bd28c17 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -194,7 +194,7 @@ impl super::Validator { use crate::Expression as E; if !global_expr_kind.is_const_or_override(handle) { - return Err(super::ConstExpressionError::NonConstOrOverride); + return Err(ConstExpressionError::NonConstOrOverride); } match gctx.global_expressions[handle] { @@ -211,10 +211,10 @@ impl super::Validator { } E::Splat { value, .. } => match *mod_info[value].inner_with(gctx.types) { crate::TypeInner::Scalar { .. } => {} - _ => return Err(super::ConstExpressionError::InvalidSplatType(value)), + _ => return Err(ConstExpressionError::InvalidSplatType(value)), }, _ if global_expr_kind.is_const(handle) || !self.allow_overrides => { - return Err(super::ConstExpressionError::NonFullyEvaluatedConst) + return Err(ConstExpressionError::NonFullyEvaluatedConst) } // the constant evaluator will report errors about override-expressions _ => {} @@ -1641,6 +1641,7 @@ impl super::Validator { return Err(ExpressionError::InvalidRayQueryType(query)); } }, + E::SubgroupBallotResult | E::SubgroupOperationResult { .. } => self.subgroup_stages, }; Ok(stages) } diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index fe5681449e..71128fc86d 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -47,6 +47,19 @@ pub enum AtomicError { ResultTypeMismatch(Handle), } +#[derive(Clone, Debug, thiserror::Error)] +#[cfg_attr(test, derive(PartialEq))] +pub enum SubgroupError { + #[error("Operand {0:?} has invalid type.")] + InvalidOperand(Handle), + #[error("Result type for {0:?} doesn't match the statement")] + ResultTypeMismatch(Handle), + #[error("Support for subgroup operation {0:?} is required")] + UnsupportedOperation(super::SubgroupOperationSet), + #[error("Unknown operation")] + UnknownOperation, +} + #[derive(Clone, Debug, thiserror::Error)] #[cfg_attr(test, derive(PartialEq))] pub enum LocalVariableError { @@ -135,6 +148,8 @@ pub enum FunctionError { InvalidRayDescriptor(Handle), #[error("Ray Query {0:?} does not have a matching type")] InvalidRayQueryType(Handle), + #[error("Shader requires capability {0:?}")] + MissingCapability(super::Capabilities), #[error( "Required uniformity of control flow for {0:?} in {1:?} is not fulfilled because of {2:?}" )] @@ -155,6 +170,8 @@ pub enum FunctionError { WorkgroupUniformLoadExpressionMismatch(Handle), #[error("The expression {0:?} is not valid as a WorkGroupUniformLoad argument. It should be a Pointer in Workgroup address space")] WorkgroupUniformLoadInvalidPointer(Handle), + #[error("Subgroup operation is invalid")] + InvalidSubgroup(#[from] SubgroupError), } bitflags::bitflags! { @@ -399,6 +416,127 @@ impl super::Validator { } Ok(()) } + fn validate_subgroup_operation( + &mut self, + op: &crate::SubgroupOperation, + collective_op: &crate::CollectiveOperation, + argument: Handle, + result: Handle, + context: &BlockContext, + ) -> Result<(), WithSpan> { + let argument_inner = context.resolve_type(argument, &self.valid_expression_set)?; + + let (is_scalar, scalar) = match *argument_inner { + crate::TypeInner::Scalar(scalar) => (true, scalar), + crate::TypeInner::Vector { scalar, .. } => (false, scalar), + _ => { + log::error!("Subgroup operand type {:?}", argument_inner); + return Err(SubgroupError::InvalidOperand(argument) + .with_span_handle(argument, context.expressions) + .into_other()); + } + }; + + use crate::ScalarKind as sk; + use crate::SubgroupOperation as sg; + match (scalar.kind, *op) { + (sk::Bool, sg::All | sg::Any) if is_scalar => {} + (sk::Sint | sk::Uint | sk::Float, sg::Add | sg::Mul | sg::Min | sg::Max) => {} + (sk::Sint | sk::Uint, sg::And | sg::Or | sg::Xor) => {} + + (_, _) => { + log::error!("Subgroup operand type {:?}", argument_inner); + return Err(SubgroupError::InvalidOperand(argument) + .with_span_handle(argument, context.expressions) + .into_other()); + } + }; + + use crate::CollectiveOperation as co; + match (*collective_op, *op) { + ( + co::Reduce, + sg::All + | sg::Any + | sg::Add + | sg::Mul + | sg::Min + | sg::Max + | sg::And + | sg::Or + | sg::Xor, + ) => {} + (co::InclusiveScan | co::ExclusiveScan, sg::Add | sg::Mul) => {} + + (_, _) => { + return Err(SubgroupError::UnknownOperation.with_span().into_other()); + } + }; + + self.emit_expression(result, context)?; + match context.expressions[result] { + crate::Expression::SubgroupOperationResult { ty } + if { &context.types[ty].inner == argument_inner } => {} + _ => { + return Err(SubgroupError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()) + } + } + Ok(()) + } + fn validate_subgroup_gather( + &mut self, + mode: &crate::GatherMode, + argument: Handle, + result: Handle, + context: &BlockContext, + ) -> Result<(), WithSpan> { + match *mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => { + let index_ty = context.resolve_type(index, &self.valid_expression_set)?; + match *index_ty { + crate::TypeInner::Scalar(crate::Scalar::U32) => {} + _ => { + log::error!( + "Subgroup gather index type {:?}, expected unsigned int", + index_ty + ); + return Err(SubgroupError::InvalidOperand(argument) + .with_span_handle(index, context.expressions) + .into_other()); + } + } + } + } + let argument_inner = context.resolve_type(argument, &self.valid_expression_set)?; + if !matches!(*argument_inner, + crate::TypeInner::Scalar ( scalar, .. ) | crate::TypeInner::Vector { scalar, .. } + if matches!(scalar.kind, crate::ScalarKind::Uint | crate::ScalarKind::Sint | crate::ScalarKind::Float) + ) { + log::error!("Subgroup gather operand type {:?}", argument_inner); + return Err(SubgroupError::InvalidOperand(argument) + .with_span_handle(argument, context.expressions) + .into_other()); + } + + self.emit_expression(result, context)?; + match context.expressions[result] { + crate::Expression::SubgroupOperationResult { ty } + if { &context.types[ty].inner == argument_inner } => {} + _ => { + return Err(SubgroupError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()) + } + } + Ok(()) + } fn validate_block_impl( &mut self, @@ -613,8 +751,30 @@ impl super::Validator { stages &= super::ShaderStages::FRAGMENT; finished = true; } - S::Barrier(_) => { + S::Barrier(barrier) => { stages &= super::ShaderStages::COMPUTE; + if barrier.contains(crate::Barrier::SUB_GROUP) { + if !self.capabilities.contains( + super::Capabilities::SUBGROUP | super::Capabilities::SUBGROUP_BARRIER, + ) { + return Err(FunctionError::MissingCapability( + super::Capabilities::SUBGROUP + | super::Capabilities::SUBGROUP_BARRIER, + ) + .with_span_static(span, "missing capability for this operation")); + } + if !self + .subgroup_operations + .contains(super::SubgroupOperationSet::BASIC) + { + return Err(FunctionError::InvalidSubgroup( + SubgroupError::UnsupportedOperation( + super::SubgroupOperationSet::BASIC, + ), + ) + .with_span_static(span, "support for this operation is not present")); + } + } } S::Store { pointer, value } => { let mut current = pointer; @@ -904,6 +1064,86 @@ impl super::Validator { crate::RayQueryFunction::Terminate => {} } } + S::SubgroupBallot { result, predicate } => { + stages &= self.subgroup_stages; + if !self.capabilities.contains(super::Capabilities::SUBGROUP) { + return Err(FunctionError::MissingCapability( + super::Capabilities::SUBGROUP, + ) + .with_span_static(span, "missing capability for this operation")); + } + if !self + .subgroup_operations + .contains(super::SubgroupOperationSet::BALLOT) + { + return Err(FunctionError::InvalidSubgroup( + SubgroupError::UnsupportedOperation( + super::SubgroupOperationSet::BALLOT, + ), + ) + .with_span_static(span, "support for this operation is not present")); + } + if let Some(predicate) = predicate { + let predicate_inner = + context.resolve_type(predicate, &self.valid_expression_set)?; + if !matches!( + *predicate_inner, + crate::TypeInner::Scalar(crate::Scalar::BOOL,) + ) { + log::error!( + "Subgroup ballot predicate type {:?} expected bool", + predicate_inner + ); + return Err(SubgroupError::InvalidOperand(predicate) + .with_span_handle(predicate, context.expressions) + .into_other()); + } + } + self.emit_expression(result, context)?; + } + S::SubgroupCollectiveOperation { + ref op, + ref collective_op, + argument, + result, + } => { + stages &= self.subgroup_stages; + if !self.capabilities.contains(super::Capabilities::SUBGROUP) { + return Err(FunctionError::MissingCapability( + super::Capabilities::SUBGROUP, + ) + .with_span_static(span, "missing capability for this operation")); + } + let operation = op.required_operations(); + if !self.subgroup_operations.contains(operation) { + return Err(FunctionError::InvalidSubgroup( + SubgroupError::UnsupportedOperation(operation), + ) + .with_span_static(span, "support for this operation is not present")); + } + self.validate_subgroup_operation(op, collective_op, argument, result, context)?; + } + S::SubgroupGather { + ref mode, + argument, + result, + } => { + stages &= self.subgroup_stages; + if !self.capabilities.contains(super::Capabilities::SUBGROUP) { + return Err(FunctionError::MissingCapability( + super::Capabilities::SUBGROUP, + ) + .with_span_static(span, "missing capability for this operation")); + } + let operation = mode.required_operations(); + if !self.subgroup_operations.contains(operation) { + return Err(FunctionError::InvalidSubgroup( + SubgroupError::UnsupportedOperation(operation), + ) + .with_span_static(span, "support for this operation is not present")); + } + self.validate_subgroup_gather(mode, argument, result, context)?; + } } } Ok(BlockInfo { stages, finished }) diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs index 5d3087a28f..8f78204055 100644 --- a/naga/src/valid/handles.rs +++ b/naga/src/valid/handles.rs @@ -420,6 +420,8 @@ impl super::Validator { } crate::Expression::AtomicResult { .. } | crate::Expression::RayQueryProceedResult + | crate::Expression::SubgroupBallotResult + | crate::Expression::SubgroupOperationResult { .. } | crate::Expression::WorkGroupUniformLoadResult { .. } => (), crate::Expression::ArrayLength(array) => { handle.check_dep(array)?; @@ -565,6 +567,38 @@ impl super::Validator { } Ok(()) } + crate::Statement::SubgroupBallot { result, predicate } => { + validate_expr_opt(predicate)?; + validate_expr(result)?; + Ok(()) + } + crate::Statement::SubgroupCollectiveOperation { + op: _, + collective_op: _, + argument, + result, + } => { + validate_expr(argument)?; + validate_expr(result)?; + Ok(()) + } + crate::Statement::SubgroupGather { + mode, + argument, + result, + } => { + validate_expr(argument)?; + match mode { + crate::GatherMode::BroadcastFirst => {} + crate::GatherMode::Broadcast(index) + | crate::GatherMode::Shuffle(index) + | crate::GatherMode::ShuffleDown(index) + | crate::GatherMode::ShuffleUp(index) + | crate::GatherMode::ShuffleXor(index) => validate_expr(index)?, + } + validate_expr(result)?; + Ok(()) + } crate::Statement::Break | crate::Statement::Continue | crate::Statement::Kill diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index 2435b34c29..db890ddbac 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -77,6 +77,8 @@ pub enum VaryingError { location: u32, attribute: &'static str, }, + #[error("Workgroup size is multi dimensional, @builtin(subgroup_id) and @builtin(subgroup_invocation_id) are not supported.")] + InvalidMultiDimensionalSubgroupBuiltIn, } #[derive(Clone, Debug, thiserror::Error)] @@ -140,6 +142,7 @@ struct VaryingContext<'a> { impl VaryingContext<'_> { fn validate_impl( &mut self, + ep: &crate::EntryPoint, ty: Handle, binding: &crate::Binding, ) -> Result<(), VaryingError> { @@ -167,12 +170,24 @@ impl VaryingContext<'_> { Bi::PrimitiveIndex => Capabilities::PRIMITIVE_INDEX, Bi::ViewIndex => Capabilities::MULTIVIEW, Bi::SampleIndex => Capabilities::MULTISAMPLED_SHADING, + Bi::NumSubgroups + | Bi::SubgroupId + | Bi::SubgroupSize + | Bi::SubgroupInvocationId => Capabilities::SUBGROUP, _ => Capabilities::empty(), }; if !self.capabilities.contains(required) { return Err(VaryingError::UnsupportedCapability(required)); } + if matches!( + built_in, + crate::BuiltIn::SubgroupId | crate::BuiltIn::SubgroupInvocationId + ) && ep.workgroup_size[1..].iter().any(|&s| s > 1) + { + return Err(VaryingError::InvalidMultiDimensionalSubgroupBuiltIn); + } + let (visible, type_good) = match built_in { Bi::BaseInstance | Bi::BaseVertex | Bi::InstanceIndex | Bi::VertexIndex => ( self.stage == St::Vertex && !self.output, @@ -254,6 +269,17 @@ impl VaryingContext<'_> { scalar: crate::Scalar::U32, }, ), + Bi::NumSubgroups | Bi::SubgroupId => ( + self.stage == St::Compute && !self.output, + *ty_inner == Ti::Scalar(crate::Scalar::U32), + ), + Bi::SubgroupSize | Bi::SubgroupInvocationId => ( + match self.stage { + St::Compute | St::Fragment => !self.output, + St::Vertex => false, + }, + *ty_inner == Ti::Scalar(crate::Scalar::U32), + ), }; if !visible { @@ -354,13 +380,14 @@ impl VaryingContext<'_> { fn validate( &mut self, + ep: &crate::EntryPoint, ty: Handle, binding: Option<&crate::Binding>, ) -> Result<(), WithSpan> { let span_context = self.types.get_span_context(ty); match binding { Some(binding) => self - .validate_impl(ty, binding) + .validate_impl(ep, ty, binding) .map_err(|e| e.with_span_context(span_context)), None => { match self.types[ty].inner { @@ -377,7 +404,7 @@ impl VaryingContext<'_> { } } Some(ref binding) => self - .validate_impl(member.ty, binding) + .validate_impl(ep, member.ty, binding) .map_err(|e| e.with_span_context(span_context))?, } } @@ -609,7 +636,7 @@ impl super::Validator { capabilities: self.capabilities, flags: self.flags, }; - ctx.validate(fa.ty, fa.binding.as_ref()) + ctx.validate(ep, fa.ty, fa.binding.as_ref()) .map_err_inner(|e| EntryPointError::Argument(index as u32, e).with_span())?; } @@ -627,7 +654,7 @@ impl super::Validator { capabilities: self.capabilities, flags: self.flags, }; - ctx.validate(fr.ty, fr.binding.as_ref()) + ctx.validate(ep, fr.ty, fr.binding.as_ref()) .map_err_inner(|e| EntryPointError::Result(e).with_span())?; if ctx.second_blend_source { // Only the first location may be used when dual source blending diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index f34c0f6f1a..a0057f39ac 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -77,7 +77,7 @@ bitflags::bitflags! { #[cfg_attr(feature = "serialize", derive(serde::Serialize))] #[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] #[derive(Clone, Copy, Debug, Eq, PartialEq)] - pub struct Capabilities: u16 { + pub struct Capabilities: u32 { /// Support for [`AddressSpace:PushConstant`]. const PUSH_CONSTANT = 0x1; /// Float values with width = 8. @@ -110,6 +110,10 @@ bitflags::bitflags! { const CUBE_ARRAY_TEXTURES = 0x4000; /// Support for 64-bit signed and unsigned integers. const SHADER_INT64 = 0x8000; + /// Support for subgroup operations. + const SUBGROUP = 0x10000; + /// Support for subgroup barriers. + const SUBGROUP_BARRIER = 0x20000; } } @@ -119,6 +123,57 @@ impl Default for Capabilities { } } +bitflags::bitflags! { + /// Supported subgroup operations + #[cfg_attr(feature = "serialize", derive(serde::Serialize))] + #[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] + #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] + pub struct SubgroupOperationSet: u8 { + /// Elect, Barrier + const BASIC = 1 << 0; + /// Any, All + const VOTE = 1 << 1; + /// reductions, scans + const ARITHMETIC = 1 << 2; + /// ballot, broadcast + const BALLOT = 1 << 3; + /// shuffle, shuffle xor + const SHUFFLE = 1 << 4; + /// shuffle up, down + const SHUFFLE_RELATIVE = 1 << 5; + // We don't support these operations yet + // /// Clustered + // const CLUSTERED = 1 << 6; + // /// Quad supported + // const QUAD_FRAGMENT_COMPUTE = 1 << 7; + // /// Quad supported in all stages + // const QUAD_ALL_STAGES = 1 << 8; + } +} + +impl super::SubgroupOperation { + const fn required_operations(&self) -> SubgroupOperationSet { + use SubgroupOperationSet as S; + match *self { + Self::All | Self::Any => S::VOTE, + Self::Add | Self::Mul | Self::Min | Self::Max | Self::And | Self::Or | Self::Xor => { + S::ARITHMETIC + } + } + } +} + +impl super::GatherMode { + const fn required_operations(&self) -> SubgroupOperationSet { + use SubgroupOperationSet as S; + match *self { + Self::BroadcastFirst | Self::Broadcast(_) => S::BALLOT, + Self::Shuffle(_) | Self::ShuffleXor(_) => S::SHUFFLE, + Self::ShuffleUp(_) | Self::ShuffleDown(_) => S::SHUFFLE_RELATIVE, + } + } +} + bitflags::bitflags! { /// Validation flags. #[cfg_attr(feature = "serialize", derive(serde::Serialize))] @@ -166,6 +221,8 @@ impl ops::Index> for ModuleInfo { pub struct Validator { flags: ValidationFlags, capabilities: Capabilities, + subgroup_stages: ShaderStages, + subgroup_operations: SubgroupOperationSet, types: Vec, layouter: Layouter, location_mask: BitSet, @@ -317,6 +374,8 @@ impl Validator { Validator { flags, capabilities, + subgroup_stages: ShaderStages::empty(), + subgroup_operations: SubgroupOperationSet::empty(), types: Vec::new(), layouter: Layouter::default(), location_mask: BitSet::new(), @@ -329,6 +388,16 @@ impl Validator { } } + pub fn subgroup_stages(&mut self, stages: ShaderStages) -> &mut Self { + self.subgroup_stages = stages; + self + } + + pub fn subgroup_operations(&mut self, operations: SubgroupOperationSet) -> &mut Self { + self.subgroup_operations = operations; + self + } + /// Reset the validator internals pub fn reset(&mut self) { self.types.clear(); diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 03e87fd99b..f5b9856074 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -328,7 +328,6 @@ impl super::Validator { TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY - | TypeFlags::HOST_SHAREABLE | TypeFlags::ARGUMENT | TypeFlags::CONSTRUCTIBLE | shareable, diff --git a/naga/tests/in/overrides.wgsl b/naga/tests/in/overrides.wgsl index 6173c3463f..a746ce1c76 100644 --- a/naga/tests/in/overrides.wgsl +++ b/naga/tests/in/overrides.wgsl @@ -14,6 +14,7 @@ override inferred_f32 = 2.718; var gain_x_10: f32 = gain * 10.; +var store_override: f32; @compute @workgroup_size(1) fn main() { @@ -22,4 +23,6 @@ fn main() { var x = a; var gain_x_100 = gain_x_10 * 10.; + + store_override = gain; } diff --git a/naga/tests/in/spv/spec-constants-issue-5598.spv b/naga/tests/in/spv/spec-constants-issue-5598.spv new file mode 100644 index 0000000000..2f32de970d Binary files /dev/null and b/naga/tests/in/spv/spec-constants-issue-5598.spv differ diff --git a/naga/tests/in/spv/spec-constants-issue-5598.spvasm b/naga/tests/in/spv/spec-constants-issue-5598.spvasm new file mode 100644 index 0000000000..a1fdbcbdd8 --- /dev/null +++ b/naga/tests/in/spv/spec-constants-issue-5598.spvasm @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.5 +; Generator: Google rspirv; 0 +; Bound: 68 +; Schema: 0 + OpCapability Shader + OpCapability VulkanMemoryModel + OpMemoryModel Logical Vulkan + OpEntryPoint Fragment %1 "fragment" %gl_FragCoord %3 + OpEntryPoint Vertex %4 "vertex" %gl_VertexIndex %gl_Position + OpExecutionMode %1 OriginUpperLeft + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %10 SpecId 100 + OpDecorate %3 Location 0 + OpDecorate %_arr_v4float_uint_6 ArrayStride 16 + OpDecorate %gl_VertexIndex BuiltIn VertexIndex + OpDecorate %gl_Position BuiltIn Position + OpDecorate %gl_Position Invariant + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %17 = OpTypeFunction %void +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %10 = OpSpecConstant %uint 2 + %uint_1 = OpConstant %uint 1 + %v2float = OpTypeVector %float 2 +%_ptr_Output_float = OpTypePointer Output %float + %3 = OpVariable %_ptr_Output_v4float Output + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_6 = OpConstant %uint 6 +%_arr_v4float_uint_6 = OpTypeArray %v4float %uint_6 +%_ptr_Function__arr_v4float_uint_6 = OpTypePointer Function %_arr_v4float_uint_6 +%gl_VertexIndex = OpVariable %_ptr_Input_uint Input + %float_n1 = OpConstant %float -1 + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %32 = OpConstantComposite %v4float %float_n1 %float_n1 %float_0 %float_1 + %33 = OpConstantComposite %v4float %float_1 %float_n1 %float_0 %float_1 + %34 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1 + %35 = OpConstantComposite %v4float %float_n1 %float_1 %float_0 %float_1 + %36 = OpConstantComposite %_arr_v4float_uint_6 %32 %33 %34 %34 %35 %32 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%gl_Position = OpVariable %_ptr_Output_v4float Output + %float_0_25 = OpConstant %float 0.25 + %float_0_5 = OpConstant %float 0.5 + %1 = OpFunction %void None %17 + %38 = OpLabel + %39 = OpLoad %v4float %gl_FragCoord + %40 = OpCompositeExtract %float %39 0 + %41 = OpCompositeExtract %float %39 1 + %42 = OpIEqual %bool %10 %uint_1 + OpSelectionMerge %43 None + OpBranchConditional %42 %44 %45 + %44 = OpLabel + %46 = OpFMul %float %40 %float_0_5 + %47 = OpFMul %float %41 %float_0_5 + %48 = OpCompositeConstruct %v2float %46 %47 + OpBranch %43 + %45 = OpLabel + %49 = OpFMul %float %40 %float_0_25 + %50 = OpFMul %float %41 %float_0_25 + %51 = OpCompositeConstruct %v2float %49 %50 + OpBranch %43 + %43 = OpLabel + %52 = OpPhi %v2float %48 %44 %51 %45 + %53 = OpCompositeExtract %float %52 0 + %54 = OpAccessChain %_ptr_Output_float %3 %uint_0 + OpStore %54 %53 + %55 = OpCompositeExtract %float %52 1 + %56 = OpAccessChain %_ptr_Output_float %3 %uint_1 + OpStore %56 %55 + OpReturn + OpFunctionEnd + %4 = OpFunction %void None %17 + %57 = OpLabel + %58 = OpVariable %_ptr_Function__arr_v4float_uint_6 Function + %59 = OpLoad %uint %gl_VertexIndex + OpStore %58 %36 + %60 = OpULessThan %bool %59 %uint_6 + OpSelectionMerge %61 None + OpBranchConditional %60 %62 %63 + %62 = OpLabel + %64 = OpInBoundsAccessChain %_ptr_Function_v4float %58 %59 + %65 = OpLoad %v4float %64 + OpStore %gl_Position %65 + OpBranch %61 + %63 = OpLabel + OpBranch %61 + %61 = OpLabel + OpReturn + OpFunctionEnd diff --git a/naga/tests/in/spv/subgroup-operations-s.param.ron b/naga/tests/in/spv/subgroup-operations-s.param.ron new file mode 100644 index 0000000000..122542d1f6 --- /dev/null +++ b/naga/tests/in/spv/subgroup-operations-s.param.ron @@ -0,0 +1,27 @@ +( + god_mode: true, + spv: ( + version: (1, 3), + ), + msl: ( + lang_version: (2, 4), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: false, + zero_initialize_workgroup_memory: true, + ), + glsl: ( + version: Desktop(430), + writer_flags: (""), + binding_map: { }, + zero_initialize_workgroup_memory: true, + ), + hlsl: ( + shader_model: V6_0, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: None, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/spv/subgroup-operations-s.spv b/naga/tests/in/spv/subgroup-operations-s.spv new file mode 100644 index 0000000000..d4bf0191db Binary files /dev/null and b/naga/tests/in/spv/subgroup-operations-s.spv differ diff --git a/naga/tests/in/spv/subgroup-operations-s.spvasm b/naga/tests/in/spv/subgroup-operations-s.spvasm new file mode 100644 index 0000000000..72c68aa46c --- /dev/null +++ b/naga/tests/in/spv/subgroup-operations-s.spvasm @@ -0,0 +1,75 @@ +; SPIR-V +; Version: 1.3 +; Generator: rspirv +; Bound: 54 +OpCapability Shader +OpCapability GroupNonUniform +OpCapability GroupNonUniformBallot +OpCapability GroupNonUniformVote +OpCapability GroupNonUniformArithmetic +OpCapability GroupNonUniformShuffle +OpCapability GroupNonUniformShuffleRelative +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %15 "main" %6 %9 %11 %13 +OpExecutionMode %15 LocalSize 1 1 1 +OpDecorate %6 BuiltIn NumSubgroups +OpDecorate %9 BuiltIn SubgroupId +OpDecorate %11 BuiltIn SubgroupSize +OpDecorate %13 BuiltIn SubgroupLocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeInt 32 0 +%4 = OpTypeBool +%7 = OpTypePointer Input %3 +%6 = OpVariable %7 Input +%9 = OpVariable %7 Input +%11 = OpVariable %7 Input +%13 = OpVariable %7 Input +%16 = OpTypeFunction %2 +%17 = OpConstant %3 1 +%18 = OpConstant %3 0 +%19 = OpConstant %3 4 +%21 = OpConstant %3 3 +%22 = OpConstant %3 2 +%23 = OpConstant %3 8 +%26 = OpTypeVector %3 4 +%28 = OpConstantTrue %4 +%15 = OpFunction %2 None %16 +%5 = OpLabel +%8 = OpLoad %3 %6 +%10 = OpLoad %3 %9 +%12 = OpLoad %3 %11 +%14 = OpLoad %3 %13 +OpBranch %20 +%20 = OpLabel +OpControlBarrier %21 %22 %23 +%24 = OpBitwiseAnd %3 %14 %17 +%25 = OpIEqual %4 %24 %17 +%27 = OpGroupNonUniformBallot %26 %21 %25 +%29 = OpGroupNonUniformBallot %26 %21 %28 +%30 = OpINotEqual %4 %14 %18 +%31 = OpGroupNonUniformAll %4 %21 %30 +%32 = OpIEqual %4 %14 %18 +%33 = OpGroupNonUniformAny %4 %21 %32 +%34 = OpGroupNonUniformIAdd %3 %21 Reduce %14 +%35 = OpGroupNonUniformIMul %3 %21 Reduce %14 +%36 = OpGroupNonUniformUMin %3 %21 Reduce %14 +%37 = OpGroupNonUniformUMax %3 %21 Reduce %14 +%38 = OpGroupNonUniformBitwiseAnd %3 %21 Reduce %14 +%39 = OpGroupNonUniformBitwiseOr %3 %21 Reduce %14 +%40 = OpGroupNonUniformBitwiseXor %3 %21 Reduce %14 +%41 = OpGroupNonUniformIAdd %3 %21 ExclusiveScan %14 +%42 = OpGroupNonUniformIMul %3 %21 ExclusiveScan %14 +%43 = OpGroupNonUniformIAdd %3 %21 InclusiveScan %14 +%44 = OpGroupNonUniformIMul %3 %21 InclusiveScan %14 +%45 = OpGroupNonUniformBroadcastFirst %3 %21 %14 +%46 = OpGroupNonUniformBroadcast %3 %21 %14 %19 +%47 = OpISub %3 %12 %17 +%48 = OpISub %3 %47 %14 +%49 = OpGroupNonUniformShuffle %3 %21 %14 %48 +%50 = OpGroupNonUniformShuffleDown %3 %21 %14 %17 +%51 = OpGroupNonUniformShuffleUp %3 %21 %14 %17 +%52 = OpISub %3 %12 %17 +%53 = OpGroupNonUniformShuffleXor %3 %21 %14 %52 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/in/subgroup-operations.param.ron b/naga/tests/in/subgroup-operations.param.ron new file mode 100644 index 0000000000..122542d1f6 --- /dev/null +++ b/naga/tests/in/subgroup-operations.param.ron @@ -0,0 +1,27 @@ +( + god_mode: true, + spv: ( + version: (1, 3), + ), + msl: ( + lang_version: (2, 4), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: false, + zero_initialize_workgroup_memory: true, + ), + glsl: ( + version: Desktop(430), + writer_flags: (""), + binding_map: { }, + zero_initialize_workgroup_memory: true, + ), + hlsl: ( + shader_model: V6_0, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: None, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/subgroup-operations.wgsl b/naga/tests/in/subgroup-operations.wgsl new file mode 100644 index 0000000000..bb6eb47fb5 --- /dev/null +++ b/naga/tests/in/subgroup-operations.wgsl @@ -0,0 +1,37 @@ +struct Structure { + @builtin(num_subgroups) num_subgroups: u32, + @builtin(subgroup_size) subgroup_size: u32, +}; + +@compute @workgroup_size(1) +fn main( + sizes: Structure, + @builtin(subgroup_id) subgroup_id: u32, + @builtin(subgroup_invocation_id) subgroup_invocation_id: u32, +) { + subgroupBarrier(); + + subgroupBallot((subgroup_invocation_id & 1u) == 1u); + subgroupBallot(); + + subgroupAll(subgroup_invocation_id != 0u); + subgroupAny(subgroup_invocation_id == 0u); + subgroupAdd(subgroup_invocation_id); + subgroupMul(subgroup_invocation_id); + subgroupMin(subgroup_invocation_id); + subgroupMax(subgroup_invocation_id); + subgroupAnd(subgroup_invocation_id); + subgroupOr(subgroup_invocation_id); + subgroupXor(subgroup_invocation_id); + subgroupExclusiveAdd(subgroup_invocation_id); + subgroupExclusiveMul(subgroup_invocation_id); + subgroupInclusiveAdd(subgroup_invocation_id); + subgroupInclusiveMul(subgroup_invocation_id); + + subgroupBroadcastFirst(subgroup_invocation_id); + subgroupBroadcast(subgroup_invocation_id, 4u); + subgroupShuffle(subgroup_invocation_id, sizes.subgroup_size - 1u - subgroup_invocation_id); + subgroupShuffleDown(subgroup_invocation_id, 1u); + subgroupShuffleUp(subgroup_invocation_id, 1u); + subgroupShuffleXor(subgroup_invocation_id, sizes.subgroup_size - 1u); +} diff --git a/naga/tests/out/analysis/overrides.info.ron b/naga/tests/out/analysis/overrides.info.ron index 00d8ce1ea8..12fa4b339f 100644 --- a/naga/tests/out/analysis/overrides.info.ron +++ b/naga/tests/out/analysis/overrides.info.ron @@ -16,6 +16,7 @@ sampling_set: [], global_uses: [ ("READ"), + ("WRITE"), ], expressions: [ ( @@ -138,6 +139,27 @@ space: Function, )), ), + ( + uniformity: ( + non_uniform_result: Some(12), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(2), + ty: Value(Pointer( + base: 2, + space: Private, + )), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), ], sampling: [], dual_source_blending: false, diff --git a/naga/tests/out/glsl/overrides.main.Compute.glsl b/naga/tests/out/glsl/overrides.main.Compute.glsl index b6d86d50ba..d1170df962 100644 --- a/naga/tests/out/glsl/overrides.main.Compute.glsl +++ b/naga/tests/out/glsl/overrides.main.Compute.glsl @@ -15,6 +15,8 @@ const float inferred_f32_ = 2.718; float gain_x_10_ = 11.0; +float store_override = 0.0; + void main() { float t = 23.0; @@ -23,6 +25,7 @@ void main() { x = true; float _e9 = gain_x_10_; gain_x_100_ = (_e9 * 10.0); + store_override = gain; return; } diff --git a/naga/tests/out/glsl/spec-constants-issue-5598.fragment.Fragment.glsl b/naga/tests/out/glsl/spec-constants-issue-5598.fragment.Fragment.glsl new file mode 100644 index 0000000000..e81d8fa1b1 --- /dev/null +++ b/naga/tests/out/glsl/spec-constants-issue-5598.fragment.Fragment.glsl @@ -0,0 +1,34 @@ +#version 310 es + +precision highp float; +precision highp int; + +vec4 global = vec4(0.0); + +vec4 global_1 = vec4(0.0); + +layout(location = 0) out vec4 _fs2p_location0; + +void function() { + vec2 phi_52_ = vec2(0.0); + vec4 _e7 = global; + if (false) { + phi_52_ = vec2((_e7.x * 0.5), (_e7.y * 0.5)); + } else { + phi_52_ = vec2((_e7.x * 0.25), (_e7.y * 0.25)); + } + vec2 _e20 = phi_52_; + global_1[0u] = _e20.x; + global_1[1u] = _e20.y; + return; +} + +void main() { + vec4 param = gl_FragCoord; + global = param; + function(); + vec4 _e3 = global_1; + _fs2p_location0 = _e3; + return; +} + diff --git a/naga/tests/out/glsl/spec-constants-issue-5598.vertex.Vertex.glsl b/naga/tests/out/glsl/spec-constants-issue-5598.vertex.Vertex.glsl new file mode 100644 index 0000000000..256e9380ac --- /dev/null +++ b/naga/tests/out/glsl/spec-constants-issue-5598.vertex.Vertex.glsl @@ -0,0 +1,34 @@ +#version 310 es + +precision highp float; +precision highp int; + +uint global_2 = 0u; + +vec4 global_3 = vec4(0.0, 0.0, 0.0, 1.0); + +invariant gl_Position; + +void function_1() { + vec4 local[6] = vec4[6](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + uint _e5 = global_2; + local = vec4[6](vec4(-1.0, -1.0, 0.0, 1.0), vec4(1.0, -1.0, 0.0, 1.0), vec4(1.0, 1.0, 0.0, 1.0), vec4(1.0, 1.0, 0.0, 1.0), vec4(-1.0, 1.0, 0.0, 1.0), vec4(-1.0, -1.0, 0.0, 1.0)); + if ((_e5 < 6u)) { + vec4 _e8 = local[_e5]; + global_3 = _e8; + } + return; +} + +void main() { + uint param_1 = uint(gl_VertexID); + global_2 = param_1; + function_1(); + float _e4 = global_3.y; + global_3.y = -(_e4); + vec4 _e6 = global_3; + gl_Position = _e6; + gl_Position.yz = vec2(-gl_Position.y, gl_Position.z * 2.0 - gl_Position.w); + return; +} + diff --git a/naga/tests/out/glsl/subgroup-operations-s.main.Compute.glsl b/naga/tests/out/glsl/subgroup-operations-s.main.Compute.glsl new file mode 100644 index 0000000000..cc1aac5417 --- /dev/null +++ b/naga/tests/out/glsl/subgroup-operations-s.main.Compute.glsl @@ -0,0 +1,58 @@ +#version 430 core +#extension GL_ARB_compute_shader : require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +uint num_subgroups_1 = 0u; + +uint subgroup_id_1 = 0u; + +uint subgroup_size_1 = 0u; + +uint subgroup_invocation_id_1 = 0u; + + +void main_1() { + uint _e5 = subgroup_size_1; + uint _e6 = subgroup_invocation_id_1; + uvec4 _e9 = subgroupBallot(((_e6 & 1u) == 1u)); + uvec4 _e10 = subgroupBallot(true); + bool _e12 = subgroupAll((_e6 != 0u)); + bool _e14 = subgroupAny((_e6 == 0u)); + uint _e15 = subgroupAdd(_e6); + uint _e16 = subgroupMul(_e6); + uint _e17 = subgroupMin(_e6); + uint _e18 = subgroupMax(_e6); + uint _e19 = subgroupAnd(_e6); + uint _e20 = subgroupOr(_e6); + uint _e21 = subgroupXor(_e6); + uint _e22 = subgroupExclusiveAdd(_e6); + uint _e23 = subgroupExclusiveMul(_e6); + uint _e24 = subgroupInclusiveAdd(_e6); + uint _e25 = subgroupInclusiveMul(_e6); + uint _e26 = subgroupBroadcastFirst(_e6); + uint _e27 = subgroupBroadcast(_e6, 4u); + uint _e30 = subgroupShuffle(_e6, ((_e5 - 1u) - _e6)); + uint _e31 = subgroupShuffleDown(_e6, 1u); + uint _e32 = subgroupShuffleUp(_e6, 1u); + uint _e34 = subgroupShuffleXor(_e6, (_e5 - 1u)); + return; +} + +void main() { + uint num_subgroups = gl_NumSubgroups; + uint subgroup_id = gl_SubgroupID; + uint subgroup_size = gl_SubgroupSize; + uint subgroup_invocation_id = gl_SubgroupInvocationID; + num_subgroups_1 = num_subgroups; + subgroup_id_1 = subgroup_id; + subgroup_size_1 = subgroup_size; + subgroup_invocation_id_1 = subgroup_invocation_id; + main_1(); +} + diff --git a/naga/tests/out/glsl/subgroup-operations.main.Compute.glsl b/naga/tests/out/glsl/subgroup-operations.main.Compute.glsl new file mode 100644 index 0000000000..05ab403565 --- /dev/null +++ b/naga/tests/out/glsl/subgroup-operations.main.Compute.glsl @@ -0,0 +1,45 @@ +#version 430 core +#extension GL_ARB_compute_shader : require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Structure { + uint num_subgroups; + uint subgroup_size; +}; + +void main() { + Structure sizes = Structure(gl_NumSubgroups, gl_SubgroupSize); + uint subgroup_id = gl_SubgroupID; + uint subgroup_invocation_id = gl_SubgroupInvocationID; + subgroupMemoryBarrier(); + barrier(); + uvec4 _e7 = subgroupBallot(((subgroup_invocation_id & 1u) == 1u)); + uvec4 _e8 = subgroupBallot(true); + bool _e11 = subgroupAll((subgroup_invocation_id != 0u)); + bool _e14 = subgroupAny((subgroup_invocation_id == 0u)); + uint _e15 = subgroupAdd(subgroup_invocation_id); + uint _e16 = subgroupMul(subgroup_invocation_id); + uint _e17 = subgroupMin(subgroup_invocation_id); + uint _e18 = subgroupMax(subgroup_invocation_id); + uint _e19 = subgroupAnd(subgroup_invocation_id); + uint _e20 = subgroupOr(subgroup_invocation_id); + uint _e21 = subgroupXor(subgroup_invocation_id); + uint _e22 = subgroupExclusiveAdd(subgroup_invocation_id); + uint _e23 = subgroupExclusiveMul(subgroup_invocation_id); + uint _e24 = subgroupInclusiveAdd(subgroup_invocation_id); + uint _e25 = subgroupInclusiveMul(subgroup_invocation_id); + uint _e26 = subgroupBroadcastFirst(subgroup_invocation_id); + uint _e28 = subgroupBroadcast(subgroup_invocation_id, 4u); + uint _e33 = subgroupShuffle(subgroup_invocation_id, ((sizes.subgroup_size - 1u) - subgroup_invocation_id)); + uint _e35 = subgroupShuffleDown(subgroup_invocation_id, 1u); + uint _e37 = subgroupShuffleUp(subgroup_invocation_id, 1u); + uint _e41 = subgroupShuffleXor(subgroup_invocation_id, (sizes.subgroup_size - 1u)); + return; +} + diff --git a/naga/tests/out/hlsl/access.hlsl b/naga/tests/out/hlsl/access.hlsl index 47d9cc24f7..4f0cb4b839 100644 --- a/naga/tests/out/hlsl/access.hlsl +++ b/naga/tests/out/hlsl/access.hlsl @@ -158,10 +158,15 @@ MatCx2InArray ConstructMatCx2InArray(float4x2 arg0[2]) { return ret; } +typedef float4x2 ret_ZeroValuearray2_float4x2_[2]; +ret_ZeroValuearray2_float4x2_ ZeroValuearray2_float4x2_() { + return (float4x2[2])0; +} + void test_matrix_within_array_within_struct_accesses() { int idx_1 = 1; - MatCx2InArray t_1 = ConstructMatCx2InArray((float4x2[2])0); + MatCx2InArray t_1 = ConstructMatCx2InArray(ZeroValuearray2_float4x2_()); int _expr3 = idx_1; idx_1 = (_expr3 - 1); @@ -180,7 +185,7 @@ void test_matrix_within_array_within_struct_accesses() float l7_ = __get_col_of_mat4x2(nested_mat_cx2_.am[0], _expr46)[_expr48]; int _expr55 = idx_1; idx_1 = (_expr55 + 1); - t_1.am = (__mat4x2[2])(float4x2[2])0; + t_1.am = (__mat4x2[2])ZeroValuearray2_float4x2_(); t_1.am[0] = (__mat4x2)float4x2((8.0).xx, (7.0).xx, (6.0).xx, (5.0).xx); t_1.am[0]._0 = (9.0).xx; int _expr77 = idx_1; @@ -231,6 +236,11 @@ ret_Constructarray5_int_ Constructarray5_int_(int arg0, int arg1, int arg2, int return ret; } +typedef float ret_ZeroValuearray5_array10_float__[5][10]; +ret_ZeroValuearray5_array10_float__ ZeroValuearray5_array10_float__() { + return (float[5][10])0; +} + typedef uint2 ret_Constructarray2_uint2_[2]; ret_Constructarray2_uint2_ Constructarray2_uint2_(uint2 arg0, uint2 arg1) { uint2 ret[2] = { arg0, arg1 }; @@ -262,10 +272,14 @@ float4 foo_vert(uint vi : SV_VertexID) : SV_Position c2_ = Constructarray5_int_(a_1, int(b), 3, 4, 5); c2_[(vi + 1u)] = 42; int value = c2_[vi]; - const float _e47 = test_arr_as_arg((float[5][10])0); + const float _e47 = test_arr_as_arg(ZeroValuearray5_array10_float__()); return float4(mul(float4((value).xxxx), _matrix), 2.0); } +int2 ZeroValueint2() { + return (int2)0; +} + float4 foo_frag() : SV_Target0 { bar.Store(8+16+0, asuint(1.0)); @@ -282,7 +296,7 @@ float4 foo_frag() : SV_Target0 bar.Store2(144+8, asuint(_value2[1])); } bar.Store(0+8+160, asuint(1)); - qux.Store2(0, asuint((int2)0)); + qux.Store2(0, asuint(ZeroValueint2())); return (0.0).xxxx; } diff --git a/naga/tests/out/hlsl/constructors.hlsl b/naga/tests/out/hlsl/constructors.hlsl index 39f3137605..90d8db9a33 100644 --- a/naga/tests/out/hlsl/constructors.hlsl +++ b/naga/tests/out/hlsl/constructors.hlsl @@ -18,17 +18,50 @@ ret_Constructarray4_int_ Constructarray4_int_(int arg0, int arg1, int arg2, int return ret; } +bool ZeroValuebool() { + return (bool)0; +} + +int ZeroValueint() { + return (int)0; +} + +uint ZeroValueuint() { + return (uint)0; +} + +float ZeroValuefloat() { + return (float)0; +} + +uint2 ZeroValueuint2() { + return (uint2)0; +} + +float2x2 ZeroValuefloat2x2() { + return (float2x2)0; +} + +typedef Foo ret_ZeroValuearray3_Foo_[3]; +ret_ZeroValuearray3_Foo_ ZeroValuearray3_Foo_() { + return (Foo[3])0; +} + +Foo ZeroValueFoo() { + return (Foo)0; +} + static const float3 const2_ = float3(0.0, 1.0, 2.0); static const float2x2 const3_ = float2x2(float2(0.0, 1.0), float2(2.0, 3.0)); static const float2x2 const4_[1] = Constructarray1_float2x2_(float2x2(float2(0.0, 1.0), float2(2.0, 3.0))); -static const bool cz0_ = (bool)0; -static const int cz1_ = (int)0; -static const uint cz2_ = (uint)0; -static const float cz3_ = (float)0; -static const uint2 cz4_ = (uint2)0; -static const float2x2 cz5_ = (float2x2)0; -static const Foo cz6_[3] = (Foo[3])0; -static const Foo cz7_ = (Foo)0; +static const bool cz0_ = ZeroValuebool(); +static const int cz1_ = ZeroValueint(); +static const uint cz2_ = ZeroValueuint(); +static const float cz3_ = ZeroValuefloat(); +static const uint2 cz4_ = ZeroValueuint2(); +static const float2x2 cz5_ = ZeroValuefloat2x2(); +static const Foo cz6_[3] = ZeroValuearray3_Foo_(); +static const Foo cz7_ = ZeroValueFoo(); static const int cp3_[4] = Constructarray4_int_(0, 1, 2, 3); Foo ConstructFoo(float4 arg0, int arg1) { @@ -38,6 +71,10 @@ Foo ConstructFoo(float4 arg0, int arg1) { return ret; } +float2x3 ZeroValuefloat2x3() { + return (float2x3)0; +} + [numthreads(1, 1, 1)] void main() { diff --git a/naga/tests/out/hlsl/globals.hlsl b/naga/tests/out/hlsl/globals.hlsl index 55faf060d0..adf0b28b89 100644 --- a/naga/tests/out/hlsl/globals.hlsl +++ b/naga/tests/out/hlsl/globals.hlsl @@ -71,6 +71,10 @@ void test_msl_packed_vec3_as_arg(float3 arg) return; } +float3x3 ZeroValuefloat3x3() { + return (float3x3)0; +} + FooStruct ConstructFooStruct(float3 arg0, float arg1) { FooStruct ret = (FooStruct)0; ret.v3_ = arg0; @@ -91,8 +95,8 @@ void test_msl_packed_vec3_() float3 l0_ = data.v3_; float2 l1_ = data.v3_.zx; test_msl_packed_vec3_as_arg(data.v3_); - float3 mvm0_ = mul((float3x3)0, data.v3_); - float3 mvm1_ = mul(data.v3_, (float3x3)0); + float3 mvm0_ = mul(ZeroValuefloat3x3(), data.v3_); + float3 mvm1_ = mul(data.v3_, ZeroValuefloat3x3()); float3 svm0_ = (data.v3_ * 2.0); float3 svm1_ = (2.0 * data.v3_); } diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl index 61c59f00c1..c1a771c25d 100644 --- a/naga/tests/out/hlsl/math-functions.hlsl +++ b/naga/tests/out/hlsl/math-functions.hlsl @@ -63,6 +63,10 @@ _frexp_result_vec4_f32_ naga_frexp(float4 arg) { return result; } +int2 ZeroValueint2() { + return (int2)0; +} + void main() { float4 v = (0.0).xxxx; @@ -74,7 +78,7 @@ void main() float4 g = refract(v, v, 1.0); int4 sign_b = int4(-1, -1, -1, -1); float4 sign_d = float4(-1.0, -1.0, -1.0, -1.0); - int const_dot = dot((int2)0, (int2)0); + int const_dot = dot(ZeroValueint2(), ZeroValueint2()); uint first_leading_bit_abs = firstbithigh(0u); int flb_a = asint(firstbithigh(-1)); int2 flb_b = asint(firstbithigh((-1).xx)); diff --git a/naga/tests/out/hlsl/operators.hlsl b/naga/tests/out/hlsl/operators.hlsl index 58ec5a170d..eab1a8d9fa 100644 --- a/naga/tests/out/hlsl/operators.hlsl +++ b/naga/tests/out/hlsl/operators.hlsl @@ -55,6 +55,18 @@ void logical() bool4 bitwise_and1_ = ((true).xxxx & (false).xxxx); } +float3x3 ZeroValuefloat3x3() { + return (float3x3)0; +} + +float4x3 ZeroValuefloat4x3() { + return (float4x3)0; +} + +float3x4 ZeroValuefloat3x4() { + return (float3x4)0; +} + void arithmetic() { float neg0_1 = -(1.0); @@ -122,13 +134,13 @@ void arithmetic() float2 rem4_1 = fmod((2.0).xx, (1.0).xx); float2 rem5_1 = fmod((2.0).xx, (1.0).xx); } - float3x3 add = ((float3x3)0 + (float3x3)0); - float3x3 sub = ((float3x3)0 - (float3x3)0); - float3x3 mul_scalar0_ = mul(1.0, (float3x3)0); - float3x3 mul_scalar1_ = mul((float3x3)0, 2.0); - float3 mul_vector0_ = mul((1.0).xxxx, (float4x3)0); - float4 mul_vector1_ = mul((float4x3)0, (2.0).xxx); - float3x3 mul_ = mul((float3x4)0, (float4x3)0); + float3x3 add = (ZeroValuefloat3x3() + ZeroValuefloat3x3()); + float3x3 sub = (ZeroValuefloat3x3() - ZeroValuefloat3x3()); + float3x3 mul_scalar0_ = mul(1.0, ZeroValuefloat3x3()); + float3x3 mul_scalar1_ = mul(ZeroValuefloat3x3(), 2.0); + float3 mul_vector0_ = mul((1.0).xxxx, ZeroValuefloat4x3()); + float4 mul_vector1_ = mul(ZeroValuefloat4x3(), (2.0).xxx); + float3x3 mul_ = mul(ZeroValuefloat3x4(), ZeroValuefloat4x3()); } void bit() @@ -199,10 +211,14 @@ void comparison() bool4 gte5_ = ((2.0).xxxx >= (1.0).xxxx); } +int3 ZeroValueint3() { + return (int3)0; +} + void assignment() { int a_1 = (int)0; - int3 vec0_ = (int3)0; + int3 vec0_ = ZeroValueint3(); a_1 = 1; int _expr5 = a_1; diff --git a/naga/tests/out/hlsl/overrides.hlsl b/naga/tests/out/hlsl/overrides.hlsl index b0582d544e..a7c49f9ba1 100644 --- a/naga/tests/out/hlsl/overrides.hlsl +++ b/naga/tests/out/hlsl/overrides.hlsl @@ -7,6 +7,7 @@ static const float height = 4.6; static const float inferred_f32_ = 2.718; static float gain_x_10_ = 11.0; +static float store_override = (float)0; [numthreads(1, 1, 1)] void main() @@ -18,5 +19,6 @@ void main() x = true; float _expr9 = gain_x_10_; gain_x_100_ = (_expr9 * 10.0); + store_override = gain; return; } diff --git a/naga/tests/out/hlsl/quad-vert.hlsl b/naga/tests/out/hlsl/quad-vert.hlsl index 4505858a4f..5c4eeb7ecc 100644 --- a/naga/tests/out/hlsl/quad-vert.hlsl +++ b/naga/tests/out/hlsl/quad-vert.hlsl @@ -20,9 +20,14 @@ gl_PerVertex Constructgl_PerVertex(float4 arg0, float arg1, float arg2[1], float return ret; } +typedef float ret_ZeroValuearray1_float_[1]; +ret_ZeroValuearray1_float_ ZeroValuearray1_float_() { + return (float[1])0; +} + static float2 v_uv = (float2)0; static float2 a_uv_1 = (float2)0; -static gl_PerVertex unnamed = Constructgl_PerVertex(float4(0.0, 0.0, 0.0, 1.0), 1.0, (float[1])0, (float[1])0); +static gl_PerVertex unnamed = Constructgl_PerVertex(float4(0.0, 0.0, 0.0, 1.0), 1.0, ZeroValuearray1_float_(), ZeroValuearray1_float_()); static float2 a_pos_1 = (float2)0; struct VertexOutput_main { diff --git a/naga/tests/out/hlsl/subgroup-operations-s.hlsl b/naga/tests/out/hlsl/subgroup-operations-s.hlsl new file mode 100644 index 0000000000..d963e91503 --- /dev/null +++ b/naga/tests/out/hlsl/subgroup-operations-s.hlsl @@ -0,0 +1,50 @@ +static uint num_subgroups_1 = (uint)0; +static uint subgroup_id_1 = (uint)0; +static uint subgroup_size_1 = (uint)0; +static uint subgroup_invocation_id_1 = (uint)0; + +struct ComputeInput_main { + uint __local_invocation_index : SV_GroupIndex; +}; + +void main_1() +{ + uint _expr5 = subgroup_size_1; + uint _expr6 = subgroup_invocation_id_1; + const uint4 _e9 = WaveActiveBallot(((_expr6 & 1u) == 1u)); + const uint4 _e10 = WaveActiveBallot(true); + const bool _e12 = WaveActiveAllTrue((_expr6 != 0u)); + const bool _e14 = WaveActiveAnyTrue((_expr6 == 0u)); + const uint _e15 = WaveActiveSum(_expr6); + const uint _e16 = WaveActiveProduct(_expr6); + const uint _e17 = WaveActiveMin(_expr6); + const uint _e18 = WaveActiveMax(_expr6); + const uint _e19 = WaveActiveBitAnd(_expr6); + const uint _e20 = WaveActiveBitOr(_expr6); + const uint _e21 = WaveActiveBitXor(_expr6); + const uint _e22 = WavePrefixSum(_expr6); + const uint _e23 = WavePrefixProduct(_expr6); + const uint _e24 = _expr6 + WavePrefixSum(_expr6); + const uint _e25 = _expr6 * WavePrefixProduct(_expr6); + const uint _e26 = WaveReadLaneFirst(_expr6); + const uint _e27 = WaveReadLaneAt(_expr6, 4u); + const uint _e30 = WaveReadLaneAt(_expr6, ((_expr5 - 1u) - _expr6)); + const uint _e31 = WaveReadLaneAt(_expr6, WaveGetLaneIndex() + 1u); + const uint _e32 = WaveReadLaneAt(_expr6, WaveGetLaneIndex() - 1u); + const uint _e34 = WaveReadLaneAt(_expr6, WaveGetLaneIndex() ^ (_expr5 - 1u)); + return; +} + +[numthreads(1, 1, 1)] +void main(ComputeInput_main computeinput_main) +{ + uint num_subgroups = (1u + WaveGetLaneCount() - 1u) / WaveGetLaneCount(); + uint subgroup_id = computeinput_main.__local_invocation_index / WaveGetLaneCount(); + uint subgroup_size = WaveGetLaneCount(); + uint subgroup_invocation_id = WaveGetLaneIndex(); + num_subgroups_1 = num_subgroups; + subgroup_id_1 = subgroup_id; + subgroup_size_1 = subgroup_size; + subgroup_invocation_id_1 = subgroup_invocation_id; + main_1(); +} diff --git a/naga/tests/out/hlsl/subgroup-operations-s.ron b/naga/tests/out/hlsl/subgroup-operations-s.ron new file mode 100644 index 0000000000..b973fe3da1 --- /dev/null +++ b/naga/tests/out/hlsl/subgroup-operations-s.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_0", + ), + ], +) diff --git a/naga/tests/out/hlsl/subgroup-operations.hlsl b/naga/tests/out/hlsl/subgroup-operations.hlsl new file mode 100644 index 0000000000..839b1fa6b2 --- /dev/null +++ b/naga/tests/out/hlsl/subgroup-operations.hlsl @@ -0,0 +1,38 @@ +struct Structure { + uint num_subgroups; + uint subgroup_size; +}; + +struct ComputeInput_main { + uint __local_invocation_index : SV_GroupIndex; +}; + +[numthreads(1, 1, 1)] +void main(ComputeInput_main computeinput_main) +{ + Structure sizes = { (1u + WaveGetLaneCount() - 1u) / WaveGetLaneCount(), WaveGetLaneCount() }; + uint subgroup_id = computeinput_main.__local_invocation_index / WaveGetLaneCount(); + uint subgroup_invocation_id = WaveGetLaneIndex(); + const uint4 _e7 = WaveActiveBallot(((subgroup_invocation_id & 1u) == 1u)); + const uint4 _e8 = WaveActiveBallot(true); + const bool _e11 = WaveActiveAllTrue((subgroup_invocation_id != 0u)); + const bool _e14 = WaveActiveAnyTrue((subgroup_invocation_id == 0u)); + const uint _e15 = WaveActiveSum(subgroup_invocation_id); + const uint _e16 = WaveActiveProduct(subgroup_invocation_id); + const uint _e17 = WaveActiveMin(subgroup_invocation_id); + const uint _e18 = WaveActiveMax(subgroup_invocation_id); + const uint _e19 = WaveActiveBitAnd(subgroup_invocation_id); + const uint _e20 = WaveActiveBitOr(subgroup_invocation_id); + const uint _e21 = WaveActiveBitXor(subgroup_invocation_id); + const uint _e22 = WavePrefixSum(subgroup_invocation_id); + const uint _e23 = WavePrefixProduct(subgroup_invocation_id); + const uint _e24 = subgroup_invocation_id + WavePrefixSum(subgroup_invocation_id); + const uint _e25 = subgroup_invocation_id * WavePrefixProduct(subgroup_invocation_id); + const uint _e26 = WaveReadLaneFirst(subgroup_invocation_id); + const uint _e28 = WaveReadLaneAt(subgroup_invocation_id, 4u); + const uint _e33 = WaveReadLaneAt(subgroup_invocation_id, ((sizes.subgroup_size - 1u) - subgroup_invocation_id)); + const uint _e35 = WaveReadLaneAt(subgroup_invocation_id, WaveGetLaneIndex() + 1u); + const uint _e37 = WaveReadLaneAt(subgroup_invocation_id, WaveGetLaneIndex() - 1u); + const uint _e41 = WaveReadLaneAt(subgroup_invocation_id, WaveGetLaneIndex() ^ (sizes.subgroup_size - 1u)); + return; +} diff --git a/naga/tests/out/hlsl/subgroup-operations.ron b/naga/tests/out/hlsl/subgroup-operations.ron new file mode 100644 index 0000000000..b973fe3da1 --- /dev/null +++ b/naga/tests/out/hlsl/subgroup-operations.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_0", + ), + ], +) diff --git a/naga/tests/out/hlsl/unnamed-gl-per-vertex.hlsl b/naga/tests/out/hlsl/unnamed-gl-per-vertex.hlsl index 8270ad4e5d..f0f330e7cc 100644 --- a/naga/tests/out/hlsl/unnamed-gl-per-vertex.hlsl +++ b/naga/tests/out/hlsl/unnamed-gl-per-vertex.hlsl @@ -15,7 +15,12 @@ type_4 Constructtype_4(float4 arg0, float arg1, float arg2[1], float arg3[1]) { return ret; } -static type_4 global = Constructtype_4(float4(0.0, 0.0, 0.0, 1.0), 1.0, (float[1])0, (float[1])0); +typedef float ret_ZeroValuearray1_float_[1]; +ret_ZeroValuearray1_float_ ZeroValuearray1_float_() { + return (float[1])0; +} + +static type_4 global = Constructtype_4(float4(0.0, 0.0, 0.0, 1.0), 1.0, ZeroValuearray1_float_(), ZeroValuearray1_float_()); static int global_1 = (int)0; void function() diff --git a/naga/tests/out/ir/overrides.compact.ron b/naga/tests/out/ir/overrides.compact.ron index bc25af3bce..111a134890 100644 --- a/naga/tests/out/ir/overrides.compact.ron +++ b/naga/tests/out/ir/overrides.compact.ron @@ -73,6 +73,13 @@ ty: 2, init: Some(10), ), + ( + name: Some("store_override"), + space: Private, + binding: None, + ty: 2, + init: None, + ), ], global_expressions: [ Literal(Bool(true)), @@ -147,6 +154,8 @@ right: 9, ), LocalVariable(3), + GlobalVariable(2), + Override(3), ], named_expressions: { 5: "a", @@ -176,6 +185,10 @@ pointer: 11, value: 10, ), + Store( + pointer: 12, + value: 13, + ), Return( value: None, ), diff --git a/naga/tests/out/ir/overrides.ron b/naga/tests/out/ir/overrides.ron index bc25af3bce..111a134890 100644 --- a/naga/tests/out/ir/overrides.ron +++ b/naga/tests/out/ir/overrides.ron @@ -73,6 +73,13 @@ ty: 2, init: Some(10), ), + ( + name: Some("store_override"), + space: Private, + binding: None, + ty: 2, + init: None, + ), ], global_expressions: [ Literal(Bool(true)), @@ -147,6 +154,8 @@ right: 9, ), LocalVariable(3), + GlobalVariable(2), + Override(3), ], named_expressions: { 5: "a", @@ -176,6 +185,10 @@ pointer: 11, value: 10, ), + Store( + pointer: 12, + value: 13, + ), Return( value: None, ), diff --git a/naga/tests/out/msl/overrides.msl b/naga/tests/out/msl/overrides.msl index d9e95d0704..d3638dd4cd 100644 --- a/naga/tests/out/msl/overrides.msl +++ b/naga/tests/out/msl/overrides.msl @@ -15,11 +15,13 @@ constant float inferred_f32_ = 2.718; kernel void main_( ) { float gain_x_10_ = 11.0; + float store_override = {}; float t = 23.0; bool x = {}; float gain_x_100_ = {}; x = true; float _e9 = gain_x_10_; gain_x_100_ = _e9 * 10.0; + store_override = gain; return; } diff --git a/naga/tests/out/msl/subgroup-operations-s.msl b/naga/tests/out/msl/subgroup-operations-s.msl new file mode 100644 index 0000000000..3a6f30231c --- /dev/null +++ b/naga/tests/out/msl/subgroup-operations-s.msl @@ -0,0 +1,55 @@ +// language: metal2.4 +#include +#include + +using metal::uint; + + +void main_1( + thread uint& subgroup_size_1, + thread uint& subgroup_invocation_id_1 +) { + uint _e5 = subgroup_size_1; + uint _e6 = subgroup_invocation_id_1; + metal::uint4 unnamed = uint4((uint64_t)metal::simd_ballot((_e6 & 1u) == 1u), 0, 0, 0); + metal::uint4 unnamed_1 = uint4((uint64_t)metal::simd_ballot(true), 0, 0, 0); + bool unnamed_2 = metal::simd_all(_e6 != 0u); + bool unnamed_3 = metal::simd_any(_e6 == 0u); + uint unnamed_4 = metal::simd_sum(_e6); + uint unnamed_5 = metal::simd_product(_e6); + uint unnamed_6 = metal::simd_min(_e6); + uint unnamed_7 = metal::simd_max(_e6); + uint unnamed_8 = metal::simd_and(_e6); + uint unnamed_9 = metal::simd_or(_e6); + uint unnamed_10 = metal::simd_xor(_e6); + uint unnamed_11 = metal::simd_prefix_exclusive_sum(_e6); + uint unnamed_12 = metal::simd_prefix_exclusive_product(_e6); + uint unnamed_13 = metal::simd_prefix_inclusive_sum(_e6); + uint unnamed_14 = metal::simd_prefix_inclusive_product(_e6); + uint unnamed_15 = metal::simd_broadcast_first(_e6); + uint unnamed_16 = metal::simd_broadcast(_e6, 4u); + uint unnamed_17 = metal::simd_shuffle(_e6, (_e5 - 1u) - _e6); + uint unnamed_18 = metal::simd_shuffle_down(_e6, 1u); + uint unnamed_19 = metal::simd_shuffle_up(_e6, 1u); + uint unnamed_20 = metal::simd_shuffle_xor(_e6, _e5 - 1u); + return; +} + +struct main_Input { +}; +kernel void main_( + uint num_subgroups [[simdgroups_per_threadgroup]] +, uint subgroup_id [[simdgroup_index_in_threadgroup]] +, uint subgroup_size [[threads_per_simdgroup]] +, uint subgroup_invocation_id [[thread_index_in_simdgroup]] +) { + uint num_subgroups_1 = {}; + uint subgroup_id_1 = {}; + uint subgroup_size_1 = {}; + uint subgroup_invocation_id_1 = {}; + num_subgroups_1 = num_subgroups; + subgroup_id_1 = subgroup_id; + subgroup_size_1 = subgroup_size; + subgroup_invocation_id_1 = subgroup_invocation_id; + main_1(subgroup_size_1, subgroup_invocation_id_1); +} diff --git a/naga/tests/out/msl/subgroup-operations.msl b/naga/tests/out/msl/subgroup-operations.msl new file mode 100644 index 0000000000..980dea47f8 --- /dev/null +++ b/naga/tests/out/msl/subgroup-operations.msl @@ -0,0 +1,44 @@ +// language: metal2.4 +#include +#include + +using metal::uint; + +struct Structure { + uint num_subgroups; + uint subgroup_size; +}; + +struct main_Input { +}; +kernel void main_( + uint num_subgroups [[simdgroups_per_threadgroup]] +, uint subgroup_size [[threads_per_simdgroup]] +, uint subgroup_id [[simdgroup_index_in_threadgroup]] +, uint subgroup_invocation_id [[thread_index_in_simdgroup]] +) { + const Structure sizes = { num_subgroups, subgroup_size }; + metal::simdgroup_barrier(metal::mem_flags::mem_threadgroup); + metal::uint4 unnamed = uint4((uint64_t)metal::simd_ballot((subgroup_invocation_id & 1u) == 1u), 0, 0, 0); + metal::uint4 unnamed_1 = uint4((uint64_t)metal::simd_ballot(true), 0, 0, 0); + bool unnamed_2 = metal::simd_all(subgroup_invocation_id != 0u); + bool unnamed_3 = metal::simd_any(subgroup_invocation_id == 0u); + uint unnamed_4 = metal::simd_sum(subgroup_invocation_id); + uint unnamed_5 = metal::simd_product(subgroup_invocation_id); + uint unnamed_6 = metal::simd_min(subgroup_invocation_id); + uint unnamed_7 = metal::simd_max(subgroup_invocation_id); + uint unnamed_8 = metal::simd_and(subgroup_invocation_id); + uint unnamed_9 = metal::simd_or(subgroup_invocation_id); + uint unnamed_10 = metal::simd_xor(subgroup_invocation_id); + uint unnamed_11 = metal::simd_prefix_exclusive_sum(subgroup_invocation_id); + uint unnamed_12 = metal::simd_prefix_exclusive_product(subgroup_invocation_id); + uint unnamed_13 = metal::simd_prefix_inclusive_sum(subgroup_invocation_id); + uint unnamed_14 = metal::simd_prefix_inclusive_product(subgroup_invocation_id); + uint unnamed_15 = metal::simd_broadcast_first(subgroup_invocation_id); + uint unnamed_16 = metal::simd_broadcast(subgroup_invocation_id, 4u); + uint unnamed_17 = metal::simd_shuffle(subgroup_invocation_id, (sizes.subgroup_size - 1u) - subgroup_invocation_id); + uint unnamed_18 = metal::simd_shuffle_down(subgroup_invocation_id, 1u); + uint unnamed_19 = metal::simd_shuffle_up(subgroup_invocation_id, 1u); + uint unnamed_20 = metal::simd_shuffle_xor(subgroup_invocation_id, sizes.subgroup_size - 1u); + return; +} diff --git a/naga/tests/out/spv/overrides.main.spvasm b/naga/tests/out/spv/overrides.main.spvasm index d21eb7c674..5c748a01b2 100644 --- a/naga/tests/out/spv/overrides.main.spvasm +++ b/naga/tests/out/spv/overrides.main.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 31 +; Bound: 33 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %18 "main" -OpExecutionMode %18 LocalSize 1 1 1 +OpEntryPoint GLCompute %20 "main" +OpExecutionMode %20 LocalSize 1 1 1 %2 = OpTypeVoid %3 = OpTypeBool %4 = OpTypeFloat 32 @@ -22,22 +22,25 @@ OpExecutionMode %18 LocalSize 1 1 1 %14 = OpConstant %4 11.0 %16 = OpTypePointer Private %4 %15 = OpVariable %16 Private %14 -%19 = OpTypeFunction %2 -%20 = OpConstant %4 23.0 -%22 = OpTypePointer Function %4 -%24 = OpTypePointer Function %3 -%25 = OpConstantNull %3 -%27 = OpConstantNull %4 -%18 = OpFunction %2 None %19 -%17 = OpLabel -%21 = OpVariable %22 Function %20 -%23 = OpVariable %24 Function %25 -%26 = OpVariable %22 Function %27 -OpBranch %28 -%28 = OpLabel -OpStore %23 %5 -%29 = OpLoad %4 %15 -%30 = OpFMul %4 %29 %13 -OpStore %26 %30 +%18 = OpConstantNull %4 +%17 = OpVariable %16 Private %18 +%21 = OpTypeFunction %2 +%22 = OpConstant %4 23.0 +%24 = OpTypePointer Function %4 +%26 = OpTypePointer Function %3 +%27 = OpConstantNull %3 +%29 = OpConstantNull %4 +%20 = OpFunction %2 None %21 +%19 = OpLabel +%23 = OpVariable %24 Function %22 +%25 = OpVariable %26 Function %27 +%28 = OpVariable %24 Function %29 +OpBranch %30 +%30 = OpLabel +OpStore %25 %5 +%31 = OpLoad %4 %15 +%32 = OpFMul %4 %31 %13 +OpStore %28 %32 +OpStore %17 %9 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/subgroup-operations.spvasm b/naga/tests/out/spv/subgroup-operations.spvasm new file mode 100644 index 0000000000..fb60aae5bc --- /dev/null +++ b/naga/tests/out/spv/subgroup-operations.spvasm @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.3 +; Generator: rspirv +; Bound: 58 +OpCapability Shader +OpCapability GroupNonUniform +OpCapability GroupNonUniformBallot +OpCapability GroupNonUniformVote +OpCapability GroupNonUniformArithmetic +OpCapability GroupNonUniformShuffle +OpCapability GroupNonUniformShuffleRelative +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %17 "main" %8 %11 %13 %15 +OpExecutionMode %17 LocalSize 1 1 1 +OpMemberDecorate %4 0 Offset 0 +OpMemberDecorate %4 1 Offset 4 +OpDecorate %8 BuiltIn NumSubgroups +OpDecorate %11 BuiltIn SubgroupSize +OpDecorate %13 BuiltIn SubgroupId +OpDecorate %15 BuiltIn SubgroupLocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeInt 32 0 +%4 = OpTypeStruct %3 %3 +%5 = OpTypeBool +%9 = OpTypePointer Input %3 +%8 = OpVariable %9 Input +%11 = OpVariable %9 Input +%13 = OpVariable %9 Input +%15 = OpVariable %9 Input +%18 = OpTypeFunction %2 +%19 = OpConstant %3 1 +%20 = OpConstant %3 0 +%21 = OpConstant %3 4 +%23 = OpConstant %3 3 +%24 = OpConstant %3 2 +%25 = OpConstant %3 8 +%28 = OpTypeVector %3 4 +%30 = OpConstantTrue %5 +%17 = OpFunction %2 None %18 +%6 = OpLabel +%10 = OpLoad %3 %8 +%12 = OpLoad %3 %11 +%7 = OpCompositeConstruct %4 %10 %12 +%14 = OpLoad %3 %13 +%16 = OpLoad %3 %15 +OpBranch %22 +%22 = OpLabel +OpControlBarrier %23 %24 %25 +%26 = OpBitwiseAnd %3 %16 %19 +%27 = OpIEqual %5 %26 %19 +%29 = OpGroupNonUniformBallot %28 %23 %27 +%31 = OpGroupNonUniformBallot %28 %23 %30 +%32 = OpINotEqual %5 %16 %20 +%33 = OpGroupNonUniformAll %5 %23 %32 +%34 = OpIEqual %5 %16 %20 +%35 = OpGroupNonUniformAny %5 %23 %34 +%36 = OpGroupNonUniformIAdd %3 %23 Reduce %16 +%37 = OpGroupNonUniformIMul %3 %23 Reduce %16 +%38 = OpGroupNonUniformUMin %3 %23 Reduce %16 +%39 = OpGroupNonUniformUMax %3 %23 Reduce %16 +%40 = OpGroupNonUniformBitwiseAnd %3 %23 Reduce %16 +%41 = OpGroupNonUniformBitwiseOr %3 %23 Reduce %16 +%42 = OpGroupNonUniformBitwiseXor %3 %23 Reduce %16 +%43 = OpGroupNonUniformIAdd %3 %23 ExclusiveScan %16 +%44 = OpGroupNonUniformIMul %3 %23 ExclusiveScan %16 +%45 = OpGroupNonUniformIAdd %3 %23 InclusiveScan %16 +%46 = OpGroupNonUniformIMul %3 %23 InclusiveScan %16 +%47 = OpGroupNonUniformBroadcastFirst %3 %23 %16 +%48 = OpGroupNonUniformShuffle %3 %23 %16 %21 +%49 = OpCompositeExtract %3 %7 1 +%50 = OpISub %3 %49 %19 +%51 = OpISub %3 %50 %16 +%52 = OpGroupNonUniformShuffle %3 %23 %16 %51 +%53 = OpGroupNonUniformShuffleDown %3 %23 %16 %19 +%54 = OpGroupNonUniformShuffleUp %3 %23 %16 %19 +%55 = OpCompositeExtract %3 %7 1 +%56 = OpISub %3 %55 %19 +%57 = OpGroupNonUniformShuffleXor %3 %23 %16 %56 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/subgroup-operations-s.wgsl b/naga/tests/out/wgsl/subgroup-operations-s.wgsl new file mode 100644 index 0000000000..c61e2dfc57 --- /dev/null +++ b/naga/tests/out/wgsl/subgroup-operations-s.wgsl @@ -0,0 +1,40 @@ +var num_subgroups_1: u32; +var subgroup_id_1: u32; +var subgroup_size_1: u32; +var subgroup_invocation_id_1: u32; + +fn main_1() { + let _e5 = subgroup_size_1; + let _e6 = subgroup_invocation_id_1; + let _e9 = subgroupBallot(((_e6 & 1u) == 1u)); + let _e10 = subgroupBallot(); + let _e12 = subgroupAll((_e6 != 0u)); + let _e14 = subgroupAny((_e6 == 0u)); + let _e15 = subgroupAdd(_e6); + let _e16 = subgroupMul(_e6); + let _e17 = subgroupMin(_e6); + let _e18 = subgroupMax(_e6); + let _e19 = subgroupAnd(_e6); + let _e20 = subgroupOr(_e6); + let _e21 = subgroupXor(_e6); + let _e22 = subgroupExclusiveAdd(_e6); + let _e23 = subgroupExclusiveMul(_e6); + let _e24 = subgroupInclusiveAdd(_e6); + let _e25 = subgroupInclusiveMul(_e6); + let _e26 = subgroupBroadcastFirst(_e6); + let _e27 = subgroupBroadcast(_e6, 4u); + let _e30 = subgroupShuffle(_e6, ((_e5 - 1u) - _e6)); + let _e31 = subgroupShuffleDown(_e6, 1u); + let _e32 = subgroupShuffleUp(_e6, 1u); + let _e34 = subgroupShuffleXor(_e6, (_e5 - 1u)); + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main(@builtin(num_subgroups) num_subgroups: u32, @builtin(subgroup_id) subgroup_id: u32, @builtin(subgroup_size) subgroup_size: u32, @builtin(subgroup_invocation_id) subgroup_invocation_id: u32) { + num_subgroups_1 = num_subgroups; + subgroup_id_1 = subgroup_id; + subgroup_size_1 = subgroup_size; + subgroup_invocation_id_1 = subgroup_invocation_id; + main_1(); +} diff --git a/naga/tests/out/wgsl/subgroup-operations.wgsl b/naga/tests/out/wgsl/subgroup-operations.wgsl new file mode 100644 index 0000000000..25f713b357 --- /dev/null +++ b/naga/tests/out/wgsl/subgroup-operations.wgsl @@ -0,0 +1,31 @@ +struct Structure { + @builtin(num_subgroups) num_subgroups: u32, + @builtin(subgroup_size) subgroup_size: u32, +} + +@compute @workgroup_size(1, 1, 1) +fn main(sizes: Structure, @builtin(subgroup_id) subgroup_id: u32, @builtin(subgroup_invocation_id) subgroup_invocation_id: u32) { + subgroupBarrier(); + let _e7 = subgroupBallot(((subgroup_invocation_id & 1u) == 1u)); + let _e8 = subgroupBallot(); + let _e11 = subgroupAll((subgroup_invocation_id != 0u)); + let _e14 = subgroupAny((subgroup_invocation_id == 0u)); + let _e15 = subgroupAdd(subgroup_invocation_id); + let _e16 = subgroupMul(subgroup_invocation_id); + let _e17 = subgroupMin(subgroup_invocation_id); + let _e18 = subgroupMax(subgroup_invocation_id); + let _e19 = subgroupAnd(subgroup_invocation_id); + let _e20 = subgroupOr(subgroup_invocation_id); + let _e21 = subgroupXor(subgroup_invocation_id); + let _e22 = subgroupExclusiveAdd(subgroup_invocation_id); + let _e23 = subgroupExclusiveMul(subgroup_invocation_id); + let _e24 = subgroupInclusiveAdd(subgroup_invocation_id); + let _e25 = subgroupInclusiveMul(subgroup_invocation_id); + let _e26 = subgroupBroadcastFirst(subgroup_invocation_id); + let _e28 = subgroupBroadcast(subgroup_invocation_id, 4u); + let _e33 = subgroupShuffle(subgroup_invocation_id, ((sizes.subgroup_size - 1u) - subgroup_invocation_id)); + let _e35 = subgroupShuffleDown(subgroup_invocation_id, 1u); + let _e37 = subgroupShuffleUp(subgroup_invocation_id, 1u); + let _e41 = subgroupShuffleXor(subgroup_invocation_id, (sizes.subgroup_size - 1u)); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 3e45faeb16..ee775a3e63 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -269,10 +269,18 @@ fn check_targets( let params = input.read_parameters(); let name = &input.file_name; - let capabilities = if params.god_mode { - naga::valid::Capabilities::all() + let (capabilities, subgroup_stages, subgroup_operations) = if params.god_mode { + ( + naga::valid::Capabilities::all(), + naga::valid::ShaderStages::all(), + naga::valid::SubgroupOperationSet::all(), + ) } else { - naga::valid::Capabilities::default() + ( + naga::valid::Capabilities::default(), + naga::valid::ShaderStages::empty(), + naga::valid::SubgroupOperationSet::empty(), + ) }; #[cfg(feature = "serialize")] @@ -285,6 +293,8 @@ fn check_targets( } let info = naga::valid::Validator::new(naga::valid::ValidationFlags::all(), capabilities) + .subgroup_stages(subgroup_stages) + .subgroup_operations(subgroup_operations) .validate(module) .unwrap_or_else(|err| { panic!( @@ -308,6 +318,8 @@ fn check_targets( } naga::valid::Validator::new(naga::valid::ValidationFlags::all(), capabilities) + .subgroup_stages(subgroup_stages) + .subgroup_operations(subgroup_operations) .validate(module) .unwrap_or_else(|err| { panic!( @@ -850,6 +862,10 @@ fn convert_wgsl() { "int64", Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL, ), + ( + "subgroup-operations", + Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, + ), ( "overrides", Targets::IR @@ -957,6 +973,12 @@ fn convert_spv_all() { ); convert_spv("builtin-accessed-outside-entrypoint", true, Targets::WGSL); convert_spv("spec-constants", true, Targets::IR); + convert_spv("spec-constants-issue-5598", true, Targets::GLSL); + convert_spv( + "subgroup-operations-s", + false, + Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, + ); } #[cfg(feature = "glsl-in")] diff --git a/player/src/lib.rs b/player/src/lib.rs index 0ea491ea20..5777f4d7a6 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -99,7 +99,7 @@ impl GlobalPlay for wgc::global::Global { base, timestamp_writes, } => { - self.command_encoder_run_compute_pass_impl::( + self.command_encoder_run_compute_pass_with_unresolved_commands::( encoder, base.as_ref(), timestamp_writes.as_ref(), diff --git a/player/tests/data/bind-group.ron b/player/tests/data/bind-group.ron index 92415e4ff3..9da7abe097 100644 --- a/player/tests/data/bind-group.ron +++ b/player/tests/data/bind-group.ron @@ -58,6 +58,7 @@ module: Id(0, 1, Empty), entry_point: None, constants: {}, + zero_initialize_workgroup_memory: true, ), ), ), diff --git a/player/tests/data/pipeline-statistics-query.ron b/player/tests/data/pipeline-statistics-query.ron index 3c672f4e56..f0f96d42cb 100644 --- a/player/tests/data/pipeline-statistics-query.ron +++ b/player/tests/data/pipeline-statistics-query.ron @@ -31,6 +31,7 @@ module: Id(0, 1, Empty), entry_point: None, constants: {}, + zero_initialize_workgroup_memory: true, ), ), ), diff --git a/player/tests/data/quad.ron b/player/tests/data/quad.ron index 9d6b4a25f6..1a8b4028bb 100644 --- a/player/tests/data/quad.ron +++ b/player/tests/data/quad.ron @@ -59,6 +59,7 @@ module: Id(0, 1, Empty), entry_point: None, constants: {}, + zero_initialize_workgroup_memory: true, ), buffers: [], ), @@ -67,6 +68,7 @@ module: Id(0, 1, Empty), entry_point: None, constants: {}, + zero_initialize_workgroup_memory: true, ), targets: [ Some(( diff --git a/player/tests/data/zero-init-buffer.ron b/player/tests/data/zero-init-buffer.ron index 5697a2555e..1ce7924ddd 100644 --- a/player/tests/data/zero-init-buffer.ron +++ b/player/tests/data/zero-init-buffer.ron @@ -135,6 +135,7 @@ module: Id(0, 1, Empty), entry_point: None, constants: {}, + zero_initialize_workgroup_memory: true, ), ), ), diff --git a/player/tests/data/zero-init-texture-binding.ron b/player/tests/data/zero-init-texture-binding.ron index 340cb0cfa2..2aeaf22c7d 100644 --- a/player/tests/data/zero-init-texture-binding.ron +++ b/player/tests/data/zero-init-texture-binding.ron @@ -136,6 +136,7 @@ module: Id(0, 1, Empty), entry_point: None, constants: {}, + zero_initialize_workgroup_memory: true, ), ), ), diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000000..6afa344709 --- /dev/null +++ b/shell.nix @@ -0,0 +1,69 @@ +# This file is only relevant for Nix and NixOS users. +# What's actually meant by "Nix" here is not UNIX, but the *package manager* Nix, see https://nixos.org/. +# If you are +# on macOS (and not using nix-darwin) +# or on Windows (and not using Nix in WSL), +# you can carelessly ignore this file. +# +# Otherwise, if you *do* use Nix the package manager, +# this file declares +# common dependencies +# and some nice tools +# which you'll most likely need when working with wgpu. +# Feel free to copy it into your own project if deemed useful. +# +# To use this file, just run `nix-shell` in this folder, +# which will drop you into a shell +# with all the deps needed for building wgpu available. +# +# Or if you're using direnv (https://direnv.net/), +# use `direnv allow` to automatically always use this file +# if you're navigating into this or a subfolder. + +{ pkgs ? import {} }: + +pkgs.mkShell rec { + buildInputs = with pkgs; [ + # necessary for building wgpu in 3rd party packages (in most cases) + libxkbcommon + wayland xorg.libX11 xorg.libXcursor xorg.libXrandr xorg.libXi + alsa-lib + fontconfig freetype + shaderc directx-shader-compiler + pkg-config cmake + mold # could use any linker, needed for rustix (but mold is fast) + + libGL + vulkan-headers vulkan-loader + vulkan-tools vulkan-tools-lunarg + vulkan-extension-layer + vulkan-validation-layers # don't need them *strictly* but immensely helpful + + # necessary for developing (all of) wgpu itself + cargo-nextest cargo-fuzz + + # nice for developing wgpu itself + typos + + # if you don't already have rust installed through other means, + # this shell.nix can do that for you with this below + yq # for tomlq below + rustup + + # nice tools + gdb rr + evcxr + valgrind + renderdoc + ]; + + shellHook = '' + export RUSTC_VERSION="$(tomlq -r .toolchain.channel rust-toolchain.toml)" + export PATH="$PATH:''${CARGO_HOME:-~/.cargo}/bin" + export PATH="$PATH:''${RUSTUP_HOME:-~/.rustup/toolchains/$RUSTC_VERSION-x86_64-unknown-linux/bin}" + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${builtins.toString (pkgs.lib.makeLibraryPath buildInputs)}"; + + rustup default $RUSTC_VERSION + rustup component add rust-src rust-analyzer + ''; +} diff --git a/tests/src/config.rs b/tests/src/config.rs index fa96adbc1d..62d3e56091 100644 --- a/tests/src/config.rs +++ b/tests/src/config.rs @@ -1,4 +1,4 @@ -use std::{future::Future, pin::Pin, sync::Arc}; +use std::{future::Future, panic::Location, pin::Pin, sync::Arc}; use crate::{TestParameters, TestingContext}; @@ -26,14 +26,17 @@ cfg_if::cfg_if! { #[derive(Clone)] pub struct GpuTestConfiguration { pub(crate) name: String, + pub(crate) location: &'static Location<'static>, pub(crate) params: TestParameters, pub(crate) test: Option, } impl GpuTestConfiguration { + #[track_caller] pub fn new() -> Self { Self { name: String::new(), + location: Location::caller(), params: TestParameters::default(), test: None, } diff --git a/tests/src/image.rs b/tests/src/image.rs index 98310233c9..8996f361cd 100644 --- a/tests/src/image.rs +++ b/tests/src/image.rs @@ -369,7 +369,7 @@ fn copy_via_compute( layout: Some(&pll), module: &sm, entry_point: "copy_texture_to_buffer", - constants: &Default::default(), + compilation_options: Default::default(), }); { diff --git a/tests/src/run.rs b/tests/src/run.rs index f56651b574..82ddb93399 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -116,7 +116,10 @@ pub async fn execute_test( // The call to matches_failure will log. if expectations_match_failures(&test_info.failures, failures) == ExpectationMatchResult::Panic { - panic!(); + panic!( + "{}: test {:?} did not behave as expected", + config.location, config.name + ); } // Print the name of the test. log::info!("TEST FINISHED: {}", config.name); diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs index c3913e5df8..17082a9ed4 100644 --- a/tests/tests/bgra8unorm_storage.rs +++ b/tests/tests/bgra8unorm_storage.rs @@ -96,7 +96,7 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new() label: None, layout: Some(&pl), entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), module: &module, }); diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs index 519cfbda29..3466e1e244 100644 --- a/tests/tests/bind_group_layout_dedup.rs +++ b/tests/tests/bind_group_layout_dedup.rs @@ -90,7 +90,7 @@ async fn bgl_dedupe(ctx: TestingContext) { layout: Some(&pipeline_layout), module: &module, entry_point: "no_resources", - constants: &Default::default(), + compilation_options: Default::default(), }; let pipeline = ctx.device.create_compute_pipeline(&desc); @@ -219,7 +219,7 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) { layout: Some(&pipeline_layout), module: &module, entry_point: "no_resources", - constants: &Default::default(), + compilation_options: Default::default(), }); let mut encoder = ctx.device.create_command_encoder(&Default::default()); @@ -265,7 +265,7 @@ fn bgl_dedupe_derived(ctx: TestingContext) { layout: None, module: &module, entry_point: "resources", - constants: &Default::default(), + compilation_options: Default::default(), }); // We create two bind groups, pulling the bind_group_layout from the pipeline each time. @@ -336,7 +336,7 @@ fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) { layout: None, module: &module, entry_point: "resources", - constants: &Default::default(), + compilation_options: Default::default(), }; // Create two pipelines, creating a BG from the second. let pipeline1 = ctx.device.create_compute_pipeline(&desc); @@ -398,7 +398,7 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) { layout: None, module: &module, entry_point: "resources", - constants: &Default::default(), + compilation_options: Default::default(), }); // Create a matching BGL diff --git a/tests/tests/buffer.rs b/tests/tests/buffer.rs index 1622995c35..0693877d00 100644 --- a/tests/tests/buffer.rs +++ b/tests/tests/buffer.rs @@ -224,7 +224,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_LAYOUT: GpuTestConfiguration = GpuTestConfigu layout: Some(&pipeline_layout), module: &shader_module, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); }); }); @@ -293,7 +293,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_DISPATCH: GpuTestConfiguration = GpuTestConfi layout: Some(&pipeline_layout), module: &shader_module, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { diff --git a/tests/tests/device.rs b/tests/tests/device.rs index 82e3f71a1c..649a850fa9 100644 --- a/tests/tests/device.rs +++ b/tests/tests/device.rs @@ -480,7 +480,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne vertex: wgpu::VertexState { module: &shader_module, entry_point: "", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, primitive: wgpu::PrimitiveState::default(), @@ -499,7 +499,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne layout: None, module: &shader_module, entry_point: "", - constants: &Default::default(), + compilation_options: Default::default(), }); }); @@ -736,7 +736,7 @@ fn vs_main() -> @builtin(position) vec4 { fragment: Some(wgpu::FragmentState { module: &trivial_shaders_with_some_reversed_bindings, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgt::ColorTargetState { format: wgt::TextureFormat::Bgra8Unorm, blend: None, @@ -750,7 +750,7 @@ fn vs_main() -> @builtin(position) vec4 { vertex: wgpu::VertexState { module: &trivial_shaders_with_some_reversed_bindings, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, primitive: wgt::PrimitiveState::default(), diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs index 949b4d96ce..7002ebabe0 100644 --- a/tests/tests/mem_leaks.rs +++ b/tests/tests/mem_leaks.rs @@ -97,7 +97,7 @@ async fn draw_test_with_reports( buffers: &[], module: &shader, entry_point: "vs_main_builtin", - constants: &Default::default(), + compilation_options: Default::default(), }, primitive: wgpu::PrimitiveState::default(), depth_stencil: None, @@ -105,7 +105,7 @@ async fn draw_test_with_reports( fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: wgpu::TextureFormat::Rgba8Unorm, blend: None, diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs index 0f4ba16f25..70ee849831 100644 --- a/tests/tests/nv12_texture/mod.rs +++ b/tests/tests/nv12_texture/mod.rs @@ -24,13 +24,13 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(target_format.into())], }), primitive: wgpu::PrimitiveState { diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs index 2db035bfb2..1a68ecf79d 100644 --- a/tests/tests/occlusion_query/mod.rs +++ b/tests/tests/occlusion_query/mod.rs @@ -37,7 +37,7 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new() vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: None, diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs index b93e900a9c..11eee5b207 100644 --- a/tests/tests/partially_bounded_arrays/mod.rs +++ b/tests/tests/partially_bounded_arrays/mod.rs @@ -69,7 +69,7 @@ static PARTIALLY_BOUNDED_ARRAY: GpuTestConfiguration = GpuTestConfiguration::new layout: Some(&pipeline_layout), module: &cs_module, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { diff --git a/tests/tests/pipeline.rs b/tests/tests/pipeline.rs index c8814e25f7..a07e158a53 100644 --- a/tests/tests/pipeline.rs +++ b/tests/tests/pipeline.rs @@ -28,7 +28,7 @@ static PIPELINE_DEFAULT_LAYOUT_BAD_MODULE: GpuTestConfiguration = GpuTestConfigu layout: None, module: &module, entry_point: "doesn't exist", - constants: &Default::default(), + compilation_options: Default::default(), }); pipeline.get_bind_group_layout(0); diff --git a/tests/tests/push_constants.rs b/tests/tests/push_constants.rs index d1119476c3..04d9a00f7d 100644 --- a/tests/tests/push_constants.rs +++ b/tests/tests/push_constants.rs @@ -103,7 +103,7 @@ async fn partial_update_test(ctx: TestingContext) { layout: Some(&pipeline_layout), module: &sm, entry_point: "main", - constants: &Default::default(), + compilation_options: Default::default(), }); let mut encoder = ctx diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs index 93b91b9d7b..74c466b45a 100644 --- a/tests/tests/regression/issue_3349.rs +++ b/tests/tests/regression/issue_3349.rs @@ -102,13 +102,13 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) { vertex: wgpu::VertexState { module: &vs_sm, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &fs_sm, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: wgpu::TextureFormat::Rgba8Unorm, blend: None, diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs index 0fca44b0c9..f18d681ae1 100644 --- a/tests/tests/regression/issue_3457.rs +++ b/tests/tests/regression/issue_3457.rs @@ -52,7 +52,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration = vertex: VertexState { module: &module, entry_point: "double_buffer_vert", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[ VertexBufferLayout { array_stride: 16, @@ -72,7 +72,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration = fragment: Some(FragmentState { module: &module, entry_point: "double_buffer_frag", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(ColorTargetState { format: TextureFormat::Rgba8Unorm, blend: None, @@ -90,7 +90,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration = vertex: VertexState { module: &module, entry_point: "single_buffer_vert", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[VertexBufferLayout { array_stride: 16, step_mode: VertexStepMode::Vertex, @@ -103,7 +103,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration = fragment: Some(FragmentState { module: &module, entry_point: "single_buffer_frag", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(ColorTargetState { format: TextureFormat::Rgba8Unorm, blend: None, diff --git a/tests/tests/root.rs b/tests/tests/root.rs index ec58927d16..82b74717eb 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -34,6 +34,7 @@ mod scissor_tests; mod shader; mod shader_primitive_index; mod shader_view_format; +mod subgroup_operations; mod texture_bounds; mod texture_view_creation; mod transfer; diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs index efc658501d..15c35644e5 100644 --- a/tests/tests/scissor_tests/mod.rs +++ b/tests/tests/scissor_tests/mod.rs @@ -44,7 +44,7 @@ async fn scissor_test_impl( vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, primitive: wgpu::PrimitiveState::default(), @@ -53,7 +53,7 @@ async fn scissor_test_impl( fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: wgpu::TextureFormat::Rgba8Unorm, blend: None, diff --git a/tests/tests/shader/compilation_messages/error_shader.wgsl b/tests/tests/shader/compilation_messages/error_shader.wgsl new file mode 100644 index 0000000000..c57bdbe8f0 --- /dev/null +++ b/tests/tests/shader/compilation_messages/error_shader.wgsl @@ -0,0 +1,2 @@ +/*🐈🐈🐈🐈🐈🐈🐈*/? +// Expected Error: invalid character found \ No newline at end of file diff --git a/tests/tests/shader/compilation_messages/mod.rs b/tests/tests/shader/compilation_messages/mod.rs new file mode 100644 index 0000000000..09000205a2 --- /dev/null +++ b/tests/tests/shader/compilation_messages/mod.rs @@ -0,0 +1,49 @@ +use wgpu::include_wgsl; + +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters}; + +#[gpu_test] +static SHADER_COMPILE_SUCCESS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default()) + .run_async(|ctx| async move { + let sm = ctx + .device + .create_shader_module(include_wgsl!("successful_shader.wgsl")); + + let compilation_info = sm.get_compilation_info().await; + for message in compilation_info.messages.iter() { + assert!(message.message_type != wgpu::CompilationMessageType::Error); + } + }); + +#[gpu_test] +static SHADER_COMPILE_ERROR: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default()) + .run_async(|ctx| async move { + ctx.device.push_error_scope(wgpu::ErrorFilter::Validation); + let sm = ctx + .device + .create_shader_module(include_wgsl!("error_shader.wgsl")); + assert!(pollster::block_on(ctx.device.pop_error_scope()).is_some()); + + let compilation_info = sm.get_compilation_info().await; + let error_message = compilation_info + .messages + .iter() + .find(|message| message.message_type == wgpu::CompilationMessageType::Error) + .expect("Expected error message not found"); + let span = error_message.location.expect("Expected span not found"); + assert_eq!( + span.offset, 32, + "Expected the offset to be 32, because we're counting UTF-8 bytes" + ); + assert_eq!(span.length, 1, "Expected length to roughly be 1"); // Could be relaxed, depending on the parser requirements. + assert_eq!( + span.line_number, 1, + "Expected the line number to be 1, because we're counting lines from 1" + ); + assert_eq!( + span.line_position, 33, + "Expected the column number to be 33, because we're counting lines from 1" + ); + }); diff --git a/tests/tests/shader/compilation_messages/successful_shader.wgsl b/tests/tests/shader/compilation_messages/successful_shader.wgsl new file mode 100644 index 0000000000..638b89edab --- /dev/null +++ b/tests/tests/shader/compilation_messages/successful_shader.wgsl @@ -0,0 +1,31 @@ +const array_size = 512u; + +struct WStruct { + arr: array, + atom: atomic +} + +var w_mem: WStruct; + +@group(0) @binding(0) +var output: array; + +@compute @workgroup_size(1) +fn read(@builtin(workgroup_id) wgid: vec3, @builtin(num_workgroups) num_workgroups: vec3) { + var is_zero = true; + for(var i = 0u; i < array_size; i++) { + is_zero &= w_mem.arr[i] == 0u; + } + is_zero &= atomicLoad(&w_mem.atom) == 0u; + + let idx = wgid.x + (wgid.y * num_workgroups.x) + (wgid.z * num_workgroups.x * num_workgroups.y); + output[idx] = u32(!is_zero); +} + +@compute @workgroup_size(1) +fn write() { + for(var i = 0u; i < array_size; i++) { + w_mem.arr[i] = i; + } + atomicStore(&w_mem.atom, 3u); +} diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index bb93c690e8..6ece08652f 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -15,6 +15,7 @@ use wgpu::{ use wgpu_test::TestingContext; +pub mod compilation_messages; pub mod numeric_builtins; pub mod struct_layout; pub mod zero_init_workgroup_mem; @@ -307,7 +308,7 @@ async fn shader_input_output_test( layout: Some(&pll), module: &sm, entry_point: "cs_main", - constants: &Default::default(), + compilation_options: Default::default(), }); // -- Initializing data -- diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs index 2bbcd87d90..cb9f341ee5 100644 --- a/tests/tests/shader/zero_init_workgroup_mem.rs +++ b/tests/tests/shader/zero_init_workgroup_mem.rs @@ -87,7 +87,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration:: layout: Some(&pll), module: &sm, entry_point: "read", - constants: &Default::default(), + compilation_options: Default::default(), }); let pipeline_write = ctx @@ -97,7 +97,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration:: layout: None, module: &sm, entry_point: "write", - constants: &Default::default(), + compilation_options: Default::default(), }); // -- Initializing data -- diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs index fa6bbcfb53..fb43397830 100644 --- a/tests/tests/shader_primitive_index/mod.rs +++ b/tests/tests/shader_primitive_index/mod.rs @@ -122,7 +122,7 @@ async fn pulling_common( vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[wgpu::VertexBufferLayout { array_stride: 8, step_mode: wgpu::VertexStepMode::Vertex, @@ -139,7 +139,7 @@ async fn pulling_common( fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: wgpu::TextureFormat::Rgba8Unorm, blend: None, diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs index 60efa0130f..53c642bf7a 100644 --- a/tests/tests/shader_view_format/mod.rs +++ b/tests/tests/shader_view_format/mod.rs @@ -93,13 +93,13 @@ async fn reinterpret( vertex: wgpu::VertexState { module: shader, entry_point: "vs_main", - constants: &Default::default(), + compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: shader, entry_point: "fs_main", - constants: &Default::default(), + compilation_options: Default::default(), targets: &[Some(src_format.into())], }), primitive: wgpu::PrimitiveState { diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs new file mode 100644 index 0000000000..2c518a9d93 --- /dev/null +++ b/tests/tests/subgroup_operations/mod.rs @@ -0,0 +1,138 @@ +use std::{borrow::Cow, num::NonZeroU64}; + +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters}; + +const THREAD_COUNT: u64 = 128; +const TEST_COUNT: u32 = 32; + +#[gpu_test] +static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgpu::Features::SUBGROUP) + .limits(wgpu::Limits::downlevel_defaults()) + // Expect metal to fail on tests involving operations in divergent control flow + // + // Newlines are included in the panic message to ensure that _additional_ failures + // are not matched against. + .expect_fail( + wgpu_test::FailureCase::molten_vk() + // 14.3 doesn't fail test 29 + .panic("thread 0 failed tests: 27,\nthread 1 failed tests: 27, 28,\n") + // Prior versions do. + .panic("thread 0 failed tests: 27, 29,\nthread 1 failed tests: 27, 28, 29,\n"), + ) + .expect_fail( + wgpu_test::FailureCase::backend(wgpu::Backends::METAL) + // 14.3 doesn't fail test 29 + .panic("thread 0 failed tests: 27,\nthread 1 failed tests: 27, 28,\n") + // Prior versions do. + .panic("thread 0 failed tests: 27, 29,\nthread 1 failed tests: 27, 28, 29,\n"), + ), + ) + .run_sync(|ctx| { + let device = &ctx.device; + + let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: THREAD_COUNT * std::mem::size_of::() as u64, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("bind group layout"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: NonZeroU64::new( + THREAD_COUNT * std::mem::size_of::() as u64, + ), + }, + count: None, + }], + }); + + let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("main"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &cs_module, + entry_point: "main", + compilation_options: Default::default(), + }); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer.as_entire_binding(), + }], + layout: &bind_group_layout, + label: Some("bind group"), + }); + + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&compute_pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + cpass.dispatch_workgroups(1, 1, 1); + } + ctx.queue.submit(Some(encoder.finish())); + + wgpu::util::DownloadBuffer::read_buffer( + device, + &ctx.queue, + &storage_buffer.slice(..), + |mapping_buffer_view| { + let mapping_buffer_view = mapping_buffer_view.unwrap(); + let result: &[u32; THREAD_COUNT as usize] = + bytemuck::from_bytes(&mapping_buffer_view); + let expected_mask = (1u64 << (TEST_COUNT)) - 1; // generate full mask + let expected_array = [expected_mask as u32; THREAD_COUNT as usize]; + if result != &expected_array { + use std::fmt::Write; + let mut msg = String::new(); + writeln!( + &mut msg, + "Got from GPU:\n{:x?}\n expected:\n{:x?}", + result, &expected_array, + ) + .unwrap(); + for (thread, (result, expected)) in result + .iter() + .zip(expected_array) + .enumerate() + .filter(|(_, (r, e))| *r != e) + { + write!(&mut msg, "thread {} failed tests:", thread).unwrap(); + let difference = result ^ expected; + for i in (0..u32::BITS).filter(|i| (difference & (1 << i)) != 0) { + write!(&mut msg, " {},", i).unwrap(); + } + writeln!(&mut msg).unwrap(); + } + panic!("{}", msg); + } + }, + ); + }); diff --git a/tests/tests/subgroup_operations/shader.wgsl b/tests/tests/subgroup_operations/shader.wgsl new file mode 100644 index 0000000000..77cb81ce75 --- /dev/null +++ b/tests/tests/subgroup_operations/shader.wgsl @@ -0,0 +1,161 @@ +@group(0) +@binding(0) +var storage_buffer: array; + +var workgroup_buffer: u32; + +fn add_result_to_mask(mask: ptr, index: u32, value: bool) { + (*mask) |= u32(value) << index; +} + +@compute +@workgroup_size(128) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(num_subgroups) num_subgroups: u32, + @builtin(subgroup_id) subgroup_id: u32, + @builtin(subgroup_size) subgroup_size: u32, + @builtin(subgroup_invocation_id) subgroup_invocation_id: u32, +) { + var passed = 0u; + var expected: u32; + + add_result_to_mask(&passed, 0u, num_subgroups == 128u / subgroup_size); + add_result_to_mask(&passed, 1u, subgroup_id == global_id.x / subgroup_size); + add_result_to_mask(&passed, 2u, subgroup_invocation_id == global_id.x % subgroup_size); + + var expected_ballot = vec4(0u); + for(var i = 0u; i < subgroup_size; i += 1u) { + expected_ballot[i / 32u] |= ((global_id.x - subgroup_invocation_id + i) & 1u) << (i % 32u); + } + add_result_to_mask(&passed, 3u, dot(vec4(1u), vec4(subgroupBallot((subgroup_invocation_id & 1u) == 1u) == expected_ballot)) == 4u); + + add_result_to_mask(&passed, 4u, subgroupAll(true)); + add_result_to_mask(&passed, 5u, !subgroupAll(subgroup_invocation_id != 0u)); + + add_result_to_mask(&passed, 6u, subgroupAny(subgroup_invocation_id == 0u)); + add_result_to_mask(&passed, 7u, !subgroupAny(false)); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected += global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 8u, subgroupAdd(global_id.x + 1u) == expected); + + expected = 1u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected *= global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 9u, subgroupMul(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected = max(expected, global_id.x - subgroup_invocation_id + i + 1u); + } + add_result_to_mask(&passed, 10u, subgroupMax(global_id.x + 1u) == expected); + + expected = 0xFFFFFFFFu; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected = min(expected, global_id.x - subgroup_invocation_id + i + 1u); + } + add_result_to_mask(&passed, 11u, subgroupMin(global_id.x + 1u) == expected); + + expected = 0xFFFFFFFFu; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected &= global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 12u, subgroupAnd(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected |= global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 13u, subgroupOr(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected ^= global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 14u, subgroupXor(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_invocation_id; i += 1u) { + expected += global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 15u, subgroupExclusiveAdd(global_id.x + 1u) == expected); + + expected = 1u; + for(var i = 0u; i < subgroup_invocation_id; i += 1u) { + expected *= global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 16u, subgroupExclusiveMul(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i <= subgroup_invocation_id; i += 1u) { + expected += global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 17u, subgroupInclusiveAdd(global_id.x + 1u) == expected); + + expected = 1u; + for(var i = 0u; i <= subgroup_invocation_id; i += 1u) { + expected *= global_id.x - subgroup_invocation_id + i + 1u; + } + add_result_to_mask(&passed, 18u, subgroupInclusiveMul(global_id.x + 1u) == expected); + + add_result_to_mask(&passed, 19u, subgroupBroadcastFirst(u32(subgroup_invocation_id != 0u)) == 0u); + add_result_to_mask(&passed, 20u, subgroupBroadcastFirst(u32(subgroup_invocation_id == 0u)) == 1u); + add_result_to_mask(&passed, 21u, subgroupBroadcast(subgroup_invocation_id, 1u) == 1u); + add_result_to_mask(&passed, 22u, subgroupShuffle(subgroup_invocation_id, subgroup_invocation_id) == subgroup_invocation_id); + add_result_to_mask(&passed, 23u, subgroupShuffle(subgroup_invocation_id, subgroup_size - 1u - subgroup_invocation_id) == subgroup_size - 1u - subgroup_invocation_id); + add_result_to_mask(&passed, 24u, subgroup_invocation_id == subgroup_size - 1u || subgroupShuffleDown(subgroup_invocation_id, 1u) == subgroup_invocation_id + 1u); + add_result_to_mask(&passed, 25u, subgroup_invocation_id == 0u || subgroupShuffleUp(subgroup_invocation_id, 1u) == subgroup_invocation_id - 1u); + add_result_to_mask(&passed, 26u, subgroupShuffleXor(subgroup_invocation_id, subgroup_size - 1u) == (subgroup_invocation_id ^ (subgroup_size - 1u))); + + // Mac/Apple will fail this test. + var passed_27 = false; + if subgroup_invocation_id % 2u == 0u { + passed_27 |= subgroupAdd(1u) == (subgroup_size / 2u); + } else { + passed_27 |= subgroupAdd(1u) == (subgroup_size / 2u); + } + add_result_to_mask(&passed, 27u, passed_27); + + // Mac/Apple will fail this test. + var passed_28 = false; + switch subgroup_invocation_id % 3u { + case 0u: { + passed_28 = subgroupBroadcastFirst(subgroup_invocation_id) == 0u; + } + case 1u: { + passed_28 = subgroupBroadcastFirst(subgroup_invocation_id) == 1u; + } + case 2u: { + passed_28 = subgroupBroadcastFirst(subgroup_invocation_id) == 2u; + } + default { } + } + add_result_to_mask(&passed, 28u, passed_28); + + // Mac/Apple will sometimes fail this test. MacOS 14.3 passes it, so the bug in the metal compiler seems to be fixed. + expected = 0u; + for (var i = subgroup_size; i >= 0u; i -= 1u) { + expected = subgroupAdd(1u); + if i == subgroup_invocation_id { + break; + } + } + add_result_to_mask(&passed, 29u, expected == (subgroup_invocation_id + 1u)); + + if global_id.x == 0u { + workgroup_buffer = subgroup_size; + } + workgroupBarrier(); + add_result_to_mask(&passed, 30u, workgroup_buffer == subgroup_size); + + // Keep this test last, verify we are still convergent after running other tests + add_result_to_mask(&passed, 31u, subgroupAdd(1u) == subgroup_size); + + // Increment TEST_COUNT in subgroup_operations/mod.rs if adding more tests + + storage_buffer[global_id.x] = passed; +} diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs index 77e08489bf..cad7e731d1 100644 --- a/tests/tests/vertex_indices/mod.rs +++ b/tests/tests/vertex_indices/mod.rs @@ -272,7 +272,6 @@ async fn vertex_index_common(ctx: TestingContext) { push_constant_ranges: &[], }); - let constants = &Default::default(); let mut pipeline_desc = wgpu::RenderPipelineDescriptor { label: None, layout: Some(&ppl), @@ -280,7 +279,7 @@ async fn vertex_index_common(ctx: TestingContext) { buffers: &[], module: &shader, entry_point: "vs_main_builtin", - constants, + compilation_options: Default::default(), }, primitive: wgpu::PrimitiveState::default(), depth_stencil: None, @@ -288,7 +287,7 @@ async fn vertex_index_common(ctx: TestingContext) { fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", - constants, + compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: wgpu::TextureFormat::Rgba8Unorm, blend: None, diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index ef5f56d067..7f099da5ca 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-core" -version = "0.19.3" +version = "0.20.0" authors = ["gfx-rs developers"] edition = "2021" description = "WebGPU core logic on wgpu-hal" @@ -100,7 +100,6 @@ arrayvec = "0.7" bit-vec = "0.6" bitflags = "2" bytemuck = { version = "1.14", optional = true } -codespan-reporting = "0.11" document-features.workspace = true indexmap = "2" log = "0.4" @@ -117,17 +116,17 @@ thiserror = "1" [dependencies.naga] path = "../naga" -version = "0.19.2" +version = "0.20.0" [dependencies.wgt] package = "wgpu-types" path = "../wgpu-types" -version = "0.19.2" +version = "0.20.0" [dependencies.hal] package = "wgpu-hal" path = "../wgpu-hal" -version = "0.19.3" +version = "0.20.0" default_features = false [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] diff --git a/wgpu-core/src/any_surface.rs b/wgpu-core/src/any_surface.rs deleted file mode 100644 index 94edfc4433..0000000000 --- a/wgpu-core/src/any_surface.rs +++ /dev/null @@ -1,95 +0,0 @@ -use wgt::Backend; - -/// The `AnySurface` type: a `Arc` of a `A::Surface` for any backend `A`. -use crate::hal_api::HalApi; - -use std::fmt; -use std::mem::ManuallyDrop; -use std::ptr::NonNull; - -struct AnySurfaceVtable { - // We oppurtunistically store the backend here, since we now it will be used - // with backend selection and it can be stored in static memory. - backend: Backend, - // Drop glue which knows how to drop the stored data. - drop: unsafe fn(*mut ()), -} - -/// An `A::Surface`, for any backend `A`. -/// -/// Any `AnySurface` is just like an `A::Surface`, except that the `A` type -/// parameter is erased. To access the `Surface`, you must downcast to a -/// particular backend with the \[`downcast_ref`\] or \[`take`\] methods. -pub struct AnySurface { - data: NonNull<()>, - vtable: &'static AnySurfaceVtable, -} - -impl AnySurface { - /// Construct an `AnySurface` that owns an `A::Surface`. - pub fn new(surface: A::Surface) -> AnySurface { - unsafe fn drop_glue(ptr: *mut ()) { - unsafe { - _ = Box::from_raw(ptr.cast::()); - } - } - - let data = NonNull::from(Box::leak(Box::new(surface))); - - AnySurface { - data: data.cast(), - vtable: &AnySurfaceVtable { - backend: A::VARIANT, - drop: drop_glue::, - }, - } - } - - /// Get the backend this surface was created through. - pub fn backend(&self) -> Backend { - self.vtable.backend - } - - /// If `self` refers to an `A::Surface`, returns a reference to it. - pub fn downcast_ref(&self) -> Option<&A::Surface> { - if A::VARIANT != self.vtable.backend { - return None; - } - - // SAFETY: We just checked the instance above implicitly by the backend - // that it was statically constructed through. - Some(unsafe { &*self.data.as_ptr().cast::() }) - } - - /// If `self` is an `Arc`, returns that. - pub fn take(self) -> Option { - if A::VARIANT != self.vtable.backend { - return None; - } - - // Disable drop glue, since we're returning the owned surface. The - // caller will be responsible for dropping it. - let this = ManuallyDrop::new(self); - - // SAFETY: We just checked the instance above implicitly by the backend - // that it was statically constructed through. - Some(unsafe { *Box::from_raw(this.data.as_ptr().cast::()) }) - } -} - -impl Drop for AnySurface { - fn drop(&mut self) { - unsafe { (self.vtable.drop)(self.data.as_ptr()) } - } -} - -impl fmt::Debug for AnySurface { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "AnySurface<{}>", self.vtable.backend) - } -} - -#[cfg(send_sync)] -unsafe impl Send for AnySurface {} -#[cfg(send_sync)] -unsafe impl Sync for AnySurface {} diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs new file mode 100644 index 0000000000..e17fd08d76 --- /dev/null +++ b/wgpu-core/src/command/allocator.rs @@ -0,0 +1,67 @@ +use crate::hal_api::HalApi; +use crate::resource_log; +use hal::Device as _; + +use crate::lock::{rank, Mutex}; + +/// A pool of free [`wgpu_hal::CommandEncoder`]s, owned by a `Device`. +/// +/// Each encoder in this list is in the "closed" state. +/// +/// Since a raw [`CommandEncoder`][ce] is itself a pool for allocating +/// raw [`CommandBuffer`][cb]s, this is a pool of pools. +/// +/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder +/// [ce]: hal::CommandEncoder +/// [cb]: hal::Api::CommandBuffer +pub(crate) struct CommandAllocator { + free_encoders: Mutex>, +} + +impl CommandAllocator { + pub(crate) fn new() -> Self { + Self { + free_encoders: Mutex::new(rank::COMMAND_ALLOCATOR_FREE_ENCODERS, Vec::new()), + } + } + + /// Return a fresh [`wgpu_hal::CommandEncoder`] in the "closed" state. + /// + /// If we have free encoders in the pool, take one of those. Otherwise, + /// create a new one on `device`. + /// + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder + pub(crate) fn acquire_encoder( + &self, + device: &A::Device, + queue: &A::Queue, + ) -> Result { + let mut free_encoders = self.free_encoders.lock(); + match free_encoders.pop() { + Some(encoder) => Ok(encoder), + None => unsafe { + let hal_desc = hal::CommandEncoderDescriptor { label: None, queue }; + device.create_command_encoder(&hal_desc) + }, + } + } + + /// Add `encoder` back to the free pool. + pub(crate) fn release_encoder(&self, encoder: A::CommandEncoder) { + let mut free_encoders = self.free_encoders.lock(); + free_encoders.push(encoder); + } + + /// Free the pool of command encoders. + /// + /// This is only called when the `Device` is dropped. + pub(crate) fn dispose(&self, device: &A::Device) { + let mut free_encoders = self.free_encoders.lock(); + resource_log!("CommandAllocator::dispose encoders {}", free_encoders.len()); + for cmd_encoder in free_encoders.drain(..) { + unsafe { + device.destroy_command_encoder(cmd_encoder); + } + } + } +} diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 47beda8ec6..d9d821c533 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -73,7 +73,7 @@ index format changes. [Gdcrbe]: crate::global::Global::device_create_render_bundle_encoder [Grbef]: crate::global::Global::render_bundle_encoder_finish -[wrpeb]: crate::command::render_ffi::wgpu_render_pass_execute_bundles +[wrpeb]: crate::command::render::render_commands::wgpu_render_pass_execute_bundles !*/ #![allow(clippy::reversed_empty_ranges)] @@ -113,7 +113,7 @@ use hal::CommandEncoder as _; use super::ArcRenderCommand; -/// https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw +/// fn validate_draw( vertex: &[Option>], step: &[VertexStep], @@ -1548,15 +1548,14 @@ pub mod bundle_ffi { offsets: *const DynamicOffset, offset_length: usize, ) { - let redundant = unsafe { - bundle.current_bind_groups.set_and_check_redundant( - bind_group_id, - index, - &mut bundle.base.dynamic_offsets, - offsets, - offset_length, - ) - }; + let offsets = unsafe { slice::from_raw_parts(offsets, offset_length) }; + + let redundant = bundle.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut bundle.base.dynamic_offsets, + offsets, + ); if redundant { return; diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index 72c923f82e..faff177928 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -104,6 +104,11 @@ impl Global { let dst_buffer = buffer_guard .get(dst) .map_err(|_| ClearError::InvalidBuffer(dst))?; + + if dst_buffer.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + cmd_buf_data .trackers .buffers @@ -200,6 +205,10 @@ impl Global { .get(dst) .map_err(|_| ClearError::InvalidTexture(dst))?; + if dst_texture.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + // Check if subresource aspects are valid. let clear_aspects = hal::FormatAspects::new(dst_texture.desc.format, subresource_range.aspect); diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 67cec2d006..046d0df9ff 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -1,3 +1,4 @@ +use crate::command::compute_command::{ArcComputeCommand, ComputeCommand}; use crate::device::DeviceError; use crate::resource::Resource; use crate::snatch::SnatchGuard; @@ -20,7 +21,6 @@ use crate::{ hal_label, id, id::DeviceId, init_tracker::MemoryInitKind, - pipeline, resource::{self}, storage::Storage, track::{Tracker, UsageConflict, UsageScope}, @@ -39,59 +39,6 @@ use thiserror::Error; use std::sync::Arc; use std::{fmt, mem, str}; -#[doc(hidden)] -#[derive(Clone, Copy, Debug)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum ComputeCommand { - SetBindGroup { - index: u32, - num_dynamic_offsets: usize, - bind_group_id: id::BindGroupId, - }, - SetPipeline(id::ComputePipelineId), - - /// Set a range of push constants to values stored in [`BasePass::push_constant_data`]. - SetPushConstant { - /// The byte offset within the push constant storage to write to. This - /// must be a multiple of four. - offset: u32, - - /// The number of bytes to write. This must be a multiple of four. - size_bytes: u32, - - /// Index in [`BasePass::push_constant_data`] of the start of the data - /// to be written. - /// - /// Note: this is not a byte offset like `offset`. Rather, it is the - /// index of the first `u32` element in `push_constant_data` to read. - values_offset: u32, - }, - - Dispatch([u32; 3]), - DispatchIndirect { - buffer_id: id::BufferId, - offset: wgt::BufferAddress, - }, - PushDebugGroup { - color: u32, - len: usize, - }, - PopDebugGroup, - InsertDebugMarker { - color: u32, - len: usize, - }, - WriteTimestamp { - query_set_id: id::QuerySetId, - query_index: u32, - }, - BeginPipelineStatisticsQuery { - query_set_id: id::QuerySetId, - query_index: u32, - }, - EndPipelineStatisticsQuery, -} - #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct ComputePass { base: BasePass, @@ -185,7 +132,7 @@ pub enum ComputePassErrorInner { #[error(transparent)] Encoder(#[from] CommandEncoderError), #[error("Bind group at index {0:?} is invalid")] - InvalidBindGroup(usize), + InvalidBindGroup(u32), #[error("Device {0:?} is invalid")] InvalidDevice(DeviceId), #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] @@ -250,7 +197,7 @@ impl PrettyError for ComputePassErrorInner { pub struct ComputePassError { pub scope: PassErrorScope, #[source] - inner: ComputePassErrorInner, + pub(super) inner: ComputePassErrorInner, } impl PrettyError for ComputePassError { fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { @@ -347,7 +294,8 @@ impl Global { encoder_id: id::CommandEncoderId, pass: &ComputePass, ) -> Result<(), ComputePassError> { - self.command_encoder_run_compute_pass_impl::( + // TODO: This should go directly to `command_encoder_run_compute_pass_impl` by means of storing `ArcComputeCommand` internally. + self.command_encoder_run_compute_pass_with_unresolved_commands::( encoder_id, pass.base.as_ref(), pass.timestamp_writes.as_ref(), @@ -355,11 +303,33 @@ impl Global { } #[doc(hidden)] - pub fn command_encoder_run_compute_pass_impl( + pub fn command_encoder_run_compute_pass_with_unresolved_commands( &self, encoder_id: id::CommandEncoderId, base: BasePassRef, timestamp_writes: Option<&ComputePassTimestampWrites>, + ) -> Result<(), ComputePassError> { + let resolved_commands = + ComputeCommand::resolve_compute_command_ids(A::hub(self), base.commands)?; + + self.command_encoder_run_compute_pass_impl::( + encoder_id, + BasePassRef { + label: base.label, + commands: &resolved_commands, + dynamic_offsets: base.dynamic_offsets, + string_data: base.string_data, + push_constant_data: base.push_constant_data, + }, + timestamp_writes, + ) + } + + fn command_encoder_run_compute_pass_impl( + &self, + encoder_id: id::CommandEncoderId, + base: BasePassRef>, + timestamp_writes: Option<&ComputePassTimestampWrites>, ) -> Result<(), ComputePassError> { profiling::scope!("CommandEncoder::run_compute_pass"); let pass_scope = PassErrorScope::Pass(encoder_id); @@ -382,7 +352,13 @@ impl Global { #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf_data.commands { list.push(crate::device::trace::Command::RunComputePass { - base: BasePass::from_ref(base), + base: BasePass { + label: base.label.map(str::to_string), + commands: base.commands.iter().map(Into::into).collect(), + dynamic_offsets: base.dynamic_offsets.to_vec(), + string_data: base.string_data.to_vec(), + push_constant_data: base.push_constant_data.to_vec(), + }, timestamp_writes: timestamp_writes.cloned(), }); } @@ -402,7 +378,6 @@ impl Global { let raw = encoder.open().map_pass_err(pass_scope)?; let bind_group_guard = hub.bind_groups.read(); - let pipeline_guard = hub.compute_pipelines.read(); let query_set_guard = hub.query_sets.read(); let buffer_guard = hub.buffers.read(); let tlas_guard = hub.tlas_s.read(); @@ -484,19 +459,21 @@ impl Global { // be inserted before texture reads. let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + // TODO: We should be draining the commands here, avoiding extra copies in the process. + // (A command encoder can't be executed twice!) for command in base.commands { - match *command { - ComputeCommand::SetBindGroup { + match command { + ArcComputeCommand::SetBindGroup { index, num_dynamic_offsets, - bind_group_id, + bind_group, } => { - let scope = PassErrorScope::SetBindGroup(bind_group_id); + let scope = PassErrorScope::SetBindGroup(bind_group.as_info().id()); let max_bind_groups = cmd_buf.limits.max_bind_groups; - if index >= max_bind_groups { + if index >= &max_bind_groups { return Err(ComputePassErrorInner::BindGroupIndexOutOfRange { - index, + index: *index, max: max_bind_groups, }) .map_pass_err(scope); @@ -509,13 +486,9 @@ impl Global { ); dynamic_offset_count += num_dynamic_offsets; - let bind_group = tracker - .bind_groups - .add_single(&*bind_group_guard, bind_group_id) - .ok_or(ComputePassErrorInner::InvalidBindGroup(index as usize)) - .map_pass_err(scope)?; + let bind_group = tracker.bind_groups.insert_single(bind_group.clone()); bind_group - .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits) + .validate_dynamic_bindings(*index, &temp_offsets, &cmd_buf.limits) .map_pass_err(scope)?; buffer_memory_init_actions.extend( @@ -551,14 +524,14 @@ impl Global { let entries = state .binder - .assign_group(index as usize, bind_group, &temp_offsets); + .assign_group(*index as usize, bind_group, &temp_offsets); if !entries.is_empty() && pipeline_layout.is_some() { let pipeline_layout = pipeline_layout.as_ref().unwrap().raw(); for (i, e) in entries.iter().enumerate() { if let Some(group) = e.group.as_ref() { let raw_bg = group .raw(&snatch_guard) - .ok_or(ComputePassErrorInner::InvalidBindGroup(i)) + .ok_or(ComputePassErrorInner::InvalidBindGroup(i as u32)) .map_pass_err(scope)?; unsafe { raw.set_bind_group( @@ -572,16 +545,13 @@ impl Global { } } } - ComputeCommand::SetPipeline(pipeline_id) => { + ArcComputeCommand::SetPipeline(pipeline) => { + let pipeline_id = pipeline.as_info().id(); let scope = PassErrorScope::SetPipelineCompute(pipeline_id); state.pipeline = Some(pipeline_id); - let pipeline: &pipeline::ComputePipeline = tracker - .compute_pipelines - .add_single(&*pipeline_guard, pipeline_id) - .ok_or(ComputePassErrorInner::InvalidPipeline(pipeline_id)) - .map_pass_err(scope)?; + tracker.compute_pipelines.insert_single(pipeline.clone()); unsafe { raw.set_compute_pipeline(pipeline.raw()); @@ -605,7 +575,7 @@ impl Global { if let Some(group) = e.group.as_ref() { let raw_bg = group .raw(&snatch_guard) - .ok_or(ComputePassErrorInner::InvalidBindGroup(i)) + .ok_or(ComputePassErrorInner::InvalidBindGroup(i as u32)) .map_pass_err(scope)?; unsafe { raw.set_bind_group( @@ -641,7 +611,7 @@ impl Global { } } } - ComputeCommand::SetPushConstant { + ArcComputeCommand::SetPushConstant { offset, size_bytes, values_offset, @@ -652,7 +622,7 @@ impl Global { let values_end_offset = (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; let data_slice = - &base.push_constant_data[(values_offset as usize)..values_end_offset]; + &base.push_constant_data[(*values_offset as usize)..values_end_offset]; let pipeline_layout = state .binder @@ -667,7 +637,7 @@ impl Global { pipeline_layout .validate_push_constant_ranges( wgt::ShaderStages::COMPUTE, - offset, + *offset, end_offset_bytes, ) .map_pass_err(scope)?; @@ -676,12 +646,12 @@ impl Global { raw.set_push_constants( pipeline_layout.raw(), wgt::ShaderStages::COMPUTE, - offset, + *offset, data_slice, ); } } - ComputeCommand::Dispatch(groups) => { + ArcComputeCommand::Dispatch(groups) => { let scope = PassErrorScope::Dispatch { indirect: false, pipeline: state.pipeline, @@ -706,7 +676,7 @@ impl Global { { return Err(ComputePassErrorInner::Dispatch( DispatchError::InvalidGroupSize { - current: groups, + current: *groups, limit: groups_size_limit, }, )) @@ -714,10 +684,11 @@ impl Global { } unsafe { - raw.dispatch(groups); + raw.dispatch(*groups); } } - ComputeCommand::DispatchIndirect { buffer_id, offset } => { + ArcComputeCommand::DispatchIndirect { buffer, offset } => { + let buffer_id = buffer.as_info().id(); let scope = PassErrorScope::Dispatch { indirect: true, pipeline: state.pipeline, @@ -729,29 +700,25 @@ impl Global { .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) .map_pass_err(scope)?; - let indirect_buffer = state + state .scope .buffers - .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .insert_merge_single(buffer.clone(), hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(buffer_id, buffer.usage, wgt::BufferUsages::INDIRECT) .map_pass_err(scope)?; - check_buffer_usage( - buffer_id, - indirect_buffer.usage, - wgt::BufferUsages::INDIRECT, - ) - .map_pass_err(scope)?; let end_offset = offset + mem::size_of::() as u64; - if end_offset > indirect_buffer.size { + if end_offset > buffer.size { return Err(ComputePassErrorInner::IndirectBufferOverrun { - offset, + offset: *offset, end_offset, - buffer_size: indirect_buffer.size, + buffer_size: buffer.size, }) .map_pass_err(scope); } - let buf_raw = indirect_buffer + let buf_raw = buffer .raw .get(&snatch_guard) .ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) @@ -760,9 +727,9 @@ impl Global { let stride = 3 * 4; // 3 integers, x/y/z group size buffer_memory_init_actions.extend( - indirect_buffer.initialization_status.read().create_action( - indirect_buffer, - offset..(offset + stride), + buffer.initialization_status.read().create_action( + buffer, + *offset..(*offset + stride), MemoryInitKind::NeedsInitializedMemory, ), ); @@ -772,15 +739,15 @@ impl Global { raw, &mut intermediate_trackers, &*bind_group_guard, - Some(indirect_buffer.as_info().tracker_index()), + Some(buffer.as_info().tracker_index()), &snatch_guard, ) .map_pass_err(scope)?; unsafe { - raw.dispatch_indirect(buf_raw, offset); + raw.dispatch_indirect(buf_raw, *offset); } } - ComputeCommand::PushDebugGroup { color: _, len } => { + ArcComputeCommand::PushDebugGroup { color: _, len } => { state.debug_scope_depth += 1; if !discard_hal_labels { let label = @@ -792,7 +759,7 @@ impl Global { } string_offset += len; } - ComputeCommand::PopDebugGroup => { + ArcComputeCommand::PopDebugGroup => { let scope = PassErrorScope::PopDebugGroup; if state.debug_scope_depth == 0 { @@ -806,7 +773,7 @@ impl Global { } } } - ComputeCommand::InsertDebugMarker { color: _, len } => { + ArcComputeCommand::InsertDebugMarker { color: _, len } => { if !discard_hal_labels { let label = str::from_utf8(&base.string_data[string_offset..string_offset + len]) @@ -815,49 +782,43 @@ impl Global { } string_offset += len; } - ComputeCommand::WriteTimestamp { - query_set_id, + ArcComputeCommand::WriteTimestamp { + query_set, query_index, } => { + let query_set_id = query_set.as_info().id(); let scope = PassErrorScope::WriteTimestamp; device .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES) .map_pass_err(scope)?; - let query_set: &resource::QuerySet = tracker - .query_sets - .add_single(&*query_set_guard, query_set_id) - .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) - .map_pass_err(scope)?; + let query_set = tracker.query_sets.insert_single(query_set.clone()); query_set - .validate_and_write_timestamp(raw, query_set_id, query_index, None) + .validate_and_write_timestamp(raw, query_set_id, *query_index, None) .map_pass_err(scope)?; } - ComputeCommand::BeginPipelineStatisticsQuery { - query_set_id, + ArcComputeCommand::BeginPipelineStatisticsQuery { + query_set, query_index, } => { + let query_set_id = query_set.as_info().id(); let scope = PassErrorScope::BeginPipelineStatisticsQuery; - let query_set: &resource::QuerySet = tracker - .query_sets - .add_single(&*query_set_guard, query_set_id) - .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) - .map_pass_err(scope)?; + let query_set = tracker.query_sets.insert_single(query_set.clone()); query_set .validate_and_begin_pipeline_statistics_query( raw, query_set_id, - query_index, + *query_index, None, &mut active_query, ) .map_pass_err(scope)?; } - ComputeCommand::EndPipelineStatisticsQuery => { + ArcComputeCommand::EndPipelineStatisticsQuery => { let scope = PassErrorScope::EndPipelineStatisticsQuery; end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) @@ -901,33 +862,24 @@ impl Global { } } -pub mod compute_ffi { +pub mod compute_commands { use super::{ComputeCommand, ComputePass}; - use crate::{id, RawString}; - use std::{convert::TryInto, ffi, slice}; + use crate::id; + use std::convert::TryInto; use wgt::{BufferAddress, DynamicOffset}; - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `offset_length` elements. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_set_bind_group( + pub fn wgpu_compute_pass_set_bind_group( pass: &mut ComputePass, index: u32, bind_group_id: id::BindGroupId, - offsets: *const DynamicOffset, - offset_length: usize, + offsets: &[DynamicOffset], ) { - let redundant = unsafe { - pass.current_bind_groups.set_and_check_redundant( - bind_group_id, - index, - &mut pass.base.dynamic_offsets, - offsets, - offset_length, - ) - }; + let redundant = pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + ); if redundant { return; @@ -935,13 +887,12 @@ pub mod compute_ffi { pass.base.commands.push(ComputeCommand::SetBindGroup { index, - num_dynamic_offsets: offset_length, + num_dynamic_offsets: offsets.len(), bind_group_id, }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_set_pipeline( + pub fn wgpu_compute_pass_set_pipeline( pass: &mut ComputePass, pipeline_id: id::ComputePipelineId, ) { @@ -954,47 +905,34 @@ pub mod compute_ffi { .push(ComputeCommand::SetPipeline(pipeline_id)); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `size_bytes` bytes. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_set_push_constant( - pass: &mut ComputePass, - offset: u32, - size_bytes: u32, - data: *const u8, - ) { + pub fn wgpu_compute_pass_set_push_constant(pass: &mut ComputePass, offset: u32, data: &[u8]) { assert_eq!( offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), 0, "Push constant offset must be aligned to 4 bytes." ); assert_eq!( - size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + data.len() as u32 & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), 0, "Push constant size must be aligned to 4 bytes." ); - let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; let value_offset = pass.base.push_constant_data.len().try_into().expect( "Ran out of push constant space. Don't set 4gb of push constants per ComputePass.", ); pass.base.push_constant_data.extend( - data_slice - .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + data.chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), ); pass.base.commands.push(ComputeCommand::SetPushConstant { offset, - size_bytes, + size_bytes: data.len() as u32, values_offset: value_offset, }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_dispatch_workgroups( + pub fn wgpu_compute_pass_dispatch_workgroups( pass: &mut ComputePass, groups_x: u32, groups_y: u32, @@ -1005,8 +943,7 @@ pub mod compute_ffi { .push(ComputeCommand::Dispatch([groups_x, groups_y, groups_z])); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_dispatch_workgroups_indirect( + pub fn wgpu_compute_pass_dispatch_workgroups_indirect( pass: &mut ComputePass, buffer_id: id::BufferId, offset: BufferAddress, @@ -1016,17 +953,8 @@ pub mod compute_ffi { .push(ComputeCommand::DispatchIndirect { buffer_id, offset }); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_push_debug_group( - pass: &mut ComputePass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_compute_pass_push_debug_group(pass: &mut ComputePass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(ComputeCommand::PushDebugGroup { @@ -1035,22 +963,12 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { + pub fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { pass.base.commands.push(ComputeCommand::PopDebugGroup); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_insert_debug_marker( - pass: &mut ComputePass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_compute_pass_insert_debug_marker(pass: &mut ComputePass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(ComputeCommand::InsertDebugMarker { @@ -1059,8 +977,7 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_write_timestamp( + pub fn wgpu_compute_pass_write_timestamp( pass: &mut ComputePass, query_set_id: id::QuerySetId, query_index: u32, @@ -1071,8 +988,7 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_begin_pipeline_statistics_query( + pub fn wgpu_compute_pass_begin_pipeline_statistics_query( pass: &mut ComputePass, query_set_id: id::QuerySetId, query_index: u32, @@ -1085,8 +1001,7 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_end_pipeline_statistics_query(pass: &mut ComputePass) { + pub fn wgpu_compute_pass_end_pipeline_statistics_query(pass: &mut ComputePass) { pass.base .commands .push(ComputeCommand::EndPipelineStatisticsQuery); diff --git a/wgpu-core/src/command/compute_command.rs b/wgpu-core/src/command/compute_command.rs new file mode 100644 index 0000000000..49fdbbec24 --- /dev/null +++ b/wgpu-core/src/command/compute_command.rs @@ -0,0 +1,322 @@ +use std::sync::Arc; + +use crate::{ + binding_model::BindGroup, + hal_api::HalApi, + id, + pipeline::ComputePipeline, + resource::{Buffer, QuerySet}, +}; + +use super::{ComputePassError, ComputePassErrorInner, PassErrorScope}; + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum ComputeCommand { + SetBindGroup { + index: u32, + num_dynamic_offsets: usize, + bind_group_id: id::BindGroupId, + }, + + SetPipeline(id::ComputePipelineId), + + /// Set a range of push constants to values stored in `push_constant_data`. + SetPushConstant { + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in `push_constant_data` of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + values_offset: u32, + }, + + Dispatch([u32; 3]), + + DispatchIndirect { + buffer_id: id::BufferId, + offset: wgt::BufferAddress, + }, + + PushDebugGroup { + color: u32, + len: usize, + }, + + PopDebugGroup, + + InsertDebugMarker { + color: u32, + len: usize, + }, + + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + + EndPipelineStatisticsQuery, +} + +impl ComputeCommand { + /// Resolves all ids in a list of commands into the corresponding resource Arc. + /// + // TODO: Once resolving is done on-the-fly during recording, this function should be only needed with the replay feature: + // #[cfg(feature = "replay")] + pub fn resolve_compute_command_ids( + hub: &crate::hub::Hub, + commands: &[ComputeCommand], + ) -> Result>, ComputePassError> { + let buffers_guard = hub.buffers.read(); + let bind_group_guard = hub.bind_groups.read(); + let query_set_guard = hub.query_sets.read(); + let pipelines_guard = hub.compute_pipelines.read(); + + let resolved_commands: Vec> = commands + .iter() + .map(|c| -> Result, ComputePassError> { + Ok(match *c { + ComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => ArcComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group: bind_group_guard.get_owned(bind_group_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::SetBindGroup(bind_group_id), + inner: ComputePassErrorInner::InvalidBindGroup(index), + } + })?, + }, + + ComputeCommand::SetPipeline(pipeline_id) => ArcComputeCommand::SetPipeline( + pipelines_guard + .get_owned(pipeline_id) + .map_err(|_| ComputePassError { + scope: PassErrorScope::SetPipelineCompute(pipeline_id), + inner: ComputePassErrorInner::InvalidPipeline(pipeline_id), + })?, + ), + + ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => ArcComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + }, + + ComputeCommand::Dispatch(dim) => ArcComputeCommand::Dispatch(dim), + + ComputeCommand::DispatchIndirect { buffer_id, offset } => { + ArcComputeCommand::DispatchIndirect { + buffer: buffers_guard.get_owned(buffer_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::Dispatch { + indirect: true, + pipeline: None, // TODO: not used right now, but once we do the resolve during recording we can use this again. + }, + inner: ComputePassErrorInner::InvalidBuffer(buffer_id), + } + })?, + offset, + } + } + + ComputeCommand::PushDebugGroup { color, len } => { + ArcComputeCommand::PushDebugGroup { color, len } + } + + ComputeCommand::PopDebugGroup => ArcComputeCommand::PopDebugGroup, + + ComputeCommand::InsertDebugMarker { color, len } => { + ArcComputeCommand::InsertDebugMarker { color, len } + } + + ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + } => ArcComputeCommand::WriteTimestamp { + query_set: query_set_guard.get_owned(query_set_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::WriteTimestamp, + inner: ComputePassErrorInner::InvalidQuerySet(query_set_id), + } + })?, + query_index, + }, + + ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => ArcComputeCommand::BeginPipelineStatisticsQuery { + query_set: query_set_guard.get_owned(query_set_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::BeginPipelineStatisticsQuery, + inner: ComputePassErrorInner::InvalidQuerySet(query_set_id), + } + })?, + query_index, + }, + + ComputeCommand::EndPipelineStatisticsQuery => { + ArcComputeCommand::EndPipelineStatisticsQuery + } + }) + }) + .collect::, ComputePassError>>()?; + Ok(resolved_commands) + } +} + +/// Equivalent to `ComputeCommand` but the Ids resolved into resource Arcs. +#[derive(Clone, Debug)] +pub enum ArcComputeCommand { + SetBindGroup { + index: u32, + num_dynamic_offsets: usize, + bind_group: Arc>, + }, + + SetPipeline(Arc>), + + /// Set a range of push constants to values stored in `push_constant_data`. + SetPushConstant { + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in `push_constant_data` of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + values_offset: u32, + }, + + Dispatch([u32; 3]), + + DispatchIndirect { + buffer: Arc>, + offset: wgt::BufferAddress, + }, + + PushDebugGroup { + color: u32, + len: usize, + }, + + PopDebugGroup, + + InsertDebugMarker { + color: u32, + len: usize, + }, + + WriteTimestamp { + query_set: Arc>, + query_index: u32, + }, + + BeginPipelineStatisticsQuery { + query_set: Arc>, + query_index: u32, + }, + + EndPipelineStatisticsQuery, +} + +#[cfg(feature = "trace")] +impl From<&ArcComputeCommand> for ComputeCommand { + fn from(value: &ArcComputeCommand) -> Self { + use crate::resource::Resource as _; + + match value { + ArcComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group, + } => ComputeCommand::SetBindGroup { + index: *index, + num_dynamic_offsets: *num_dynamic_offsets, + bind_group_id: bind_group.as_info().id(), + }, + + ArcComputeCommand::SetPipeline(pipeline) => { + ComputeCommand::SetPipeline(pipeline.as_info().id()) + } + + ArcComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => ComputeCommand::SetPushConstant { + offset: *offset, + size_bytes: *size_bytes, + values_offset: *values_offset, + }, + + ArcComputeCommand::Dispatch(dim) => ComputeCommand::Dispatch(*dim), + + ArcComputeCommand::DispatchIndirect { buffer, offset } => { + ComputeCommand::DispatchIndirect { + buffer_id: buffer.as_info().id(), + offset: *offset, + } + } + + ArcComputeCommand::PushDebugGroup { color, len } => ComputeCommand::PushDebugGroup { + color: *color, + len: *len, + }, + + ArcComputeCommand::PopDebugGroup => ComputeCommand::PopDebugGroup, + + ArcComputeCommand::InsertDebugMarker { color, len } => { + ComputeCommand::InsertDebugMarker { + color: *color, + len: *len, + } + } + + ArcComputeCommand::WriteTimestamp { + query_set, + query_index, + } => ComputeCommand::WriteTimestamp { + query_set_id: query_set.as_info().id(), + query_index: *query_index, + }, + + ArcComputeCommand::BeginPipelineStatisticsQuery { + query_set, + query_index, + } => ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id: query_set.as_info().id(), + query_index: *query_index, + }, + + ArcComputeCommand::EndPipelineStatisticsQuery => { + ComputeCommand::EndPipelineStatisticsQuery + } + } + } +} diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 6e6c1abacf..2a6298d91d 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -1,7 +1,9 @@ +mod allocator; mod bind; mod bundle; mod clear; mod compute; +mod compute_command; mod draw; mod memory_init; mod query; @@ -9,13 +11,14 @@ mod ray_tracing; mod render; mod transfer; -use std::slice; use std::sync::Arc; pub(crate) use self::clear::clear_texture; pub use self::{ - bundle::*, clear::ClearError, compute::*, draw::*, query::*, render::*, transfer::*, + bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*, query::*, + render::*, transfer::*, }; +pub(crate) use allocator::CommandAllocator; use self::memory_init::CommandBufferTextureMemoryActions; @@ -23,6 +26,7 @@ use crate::device::{Device, DeviceError}; use crate::error::{ErrorFormatter, PrettyError}; use crate::hub::Hub; use crate::id::CommandBufferId; +use crate::lock::{rank, Mutex}; use crate::snatch::SnatchGuard; use crate::init_tracker::BufferInitTrackerAction; @@ -32,7 +36,6 @@ use crate::track::{Tracker, UsageScope}; use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label}; use hal::CommandEncoder as _; -use parking_lot::Mutex; use thiserror::Error; #[cfg(feature = "trace")] @@ -40,23 +43,122 @@ use crate::device::trace::Command as TraceCommand; const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64]; +/// The current state of a [`CommandBuffer`]. #[derive(Debug)] pub(crate) enum CommandEncoderStatus { + /// Ready to record commands. An encoder's initial state. + /// + /// Command building methods like [`command_encoder_clear_buffer`] and + /// [`command_encoder_run_compute_pass`] require the encoder to be in this + /// state. + /// + /// [`command_encoder_clear_buffer`]: Global::command_encoder_clear_buffer + /// [`command_encoder_run_compute_pass`]: Global::command_encoder_run_compute_pass Recording, + + /// Command recording is complete, and the buffer is ready for submission. + /// + /// [`Global::command_encoder_finish`] transitions a + /// `CommandBuffer` from the `Recording` state into this state. + /// + /// [`Global::queue_submit`] drops command buffers unless they are + /// in this state. Finished, + + /// An error occurred while recording a compute or render pass. + /// + /// When a `CommandEncoder` is left in this state, we have also + /// returned an error result from the function that encountered + /// the problem. Future attempts to use the encoder (that is, + /// calls to [`CommandBuffer::get_encoder`]) will also return + /// errors. + /// + /// Calling [`Global::command_encoder_finish`] in this state + /// discards the command buffer under construction. Error, } +/// A raw [`CommandEncoder`][rce], and the raw [`CommandBuffer`][rcb]s built from it. +/// +/// Each wgpu-core [`CommandBuffer`] owns an instance of this type, which is +/// where the commands are actually stored. +/// +/// This holds a `Vec` of raw [`CommandBuffer`][rcb]s, not just one. We are not +/// always able to record commands in the order in which they must ultimately be +/// submitted to the queue, but raw command buffers don't permit inserting new +/// commands into the middle of a recorded stream. However, hal queue submission +/// accepts a series of command buffers at once, so we can simply break the +/// stream up into multiple buffers, and then reorder the buffers. See +/// [`CommandEncoder::close_and_swap`] for a specific example of this. +/// +/// Note that a [`CommandEncoderId`] actually refers to a [`CommandBuffer`]. +/// Methods that take a command encoder id actually look up the command buffer, +/// and then use its encoder. +/// +/// [rce]: hal::Api::CommandEncoder +/// [rcb]: hal::Api::CommandBuffer +/// [`CommandEncoderId`]: crate::id::CommandEncoderId pub(crate) struct CommandEncoder { + /// The underlying `wgpu_hal` [`CommandEncoder`]. + /// + /// Successfully executed command buffers' encoders are saved in a + /// [`CommandAllocator`] for recycling. + /// + /// [`CommandEncoder`]: hal::Api::CommandEncoder + /// [`CommandAllocator`]: crate::command::CommandAllocator raw: A::CommandEncoder, + + /// All the raw command buffers for our owning [`CommandBuffer`], in + /// submission order. + /// + /// These command buffers were all constructed with `raw`. The + /// [`wgpu_hal::CommandEncoder`] trait forbids these from outliving `raw`, + /// and requires that we provide all of these when we call + /// [`raw.reset_all()`][CE::ra], so the encoder and its buffers travel + /// together. + /// + /// [CE::ra]: hal::CommandEncoder::reset_all + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder list: Vec, + + /// True if `raw` is in the "recording" state. + /// + /// See the documentation for [`wgpu_hal::CommandEncoder`] for + /// details on the states `raw` can be in. + /// + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder is_open: bool, + label: Option, } //TODO: handle errors better impl CommandEncoder { - /// Closes the live encoder + /// Finish the current command buffer, if any, and place it + /// at the second-to-last position in our list. + /// + /// If we have opened this command encoder, finish its current + /// command buffer, and insert it just before the last element in + /// [`self.list`][l]. If this command buffer is closed, do nothing. + /// + /// On return, the underlying hal encoder is closed. + /// + /// What is this for? + /// + /// The `wgpu_hal` contract requires that each render or compute pass's + /// commands be preceded by calls to [`transition_buffers`] and + /// [`transition_textures`], to put the resources the pass operates on in + /// the appropriate state. Unfortunately, we don't know which transitions + /// are needed until we're done recording the pass itself. Rather than + /// iterating over the pass twice, we note the necessary transitions as we + /// record its commands, finish the raw command buffer for the actual pass, + /// record a new raw command buffer for the transitions, and jam that buffer + /// in just before the pass's. This is the function that jams in the + /// transitions' command buffer. + /// + /// [l]: CommandEncoder::list + /// [`transition_buffers`]: hal::CommandEncoder::transition_buffers + /// [`transition_textures`]: hal::CommandEncoder::transition_textures fn close_and_swap(&mut self) -> Result<(), DeviceError> { if self.is_open { self.is_open = false; @@ -67,6 +169,16 @@ impl CommandEncoder { Ok(()) } + /// Finish the current command buffer, if any, and add it to the + /// end of [`self.list`][l]. + /// + /// If we have opened this command encoder, finish its current + /// command buffer, and push it onto the end of [`self.list`][l]. + /// If this command buffer is closed, do nothing. + /// + /// On return, the underlying hal encoder is closed. + /// + /// [l]: CommandEncoder::list fn close(&mut self) -> Result<(), DeviceError> { if self.is_open { self.is_open = false; @@ -77,6 +189,9 @@ impl CommandEncoder { Ok(()) } + /// Discard the command buffer under construction, if any. + /// + /// The underlying hal encoder is closed, if it was recording. pub(crate) fn discard(&mut self) { if self.is_open { self.is_open = false; @@ -84,6 +199,9 @@ impl CommandEncoder { } } + /// Begin recording a new command buffer, if we haven't already. + /// + /// The underlying hal encoder is put in the "recording" state. pub(crate) fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> { if !self.is_open { self.is_open = true; @@ -94,6 +212,10 @@ impl CommandEncoder { Ok(&mut self.raw) } + /// Begin recording a new command buffer for a render pass, with + /// its own label. + /// + /// The underlying hal encoder is put in the "recording" state. fn open_pass(&mut self, label: Option<&str>) -> Result<(), DeviceError> { self.is_open = true; unsafe { self.raw.begin_encoding(label)? }; @@ -115,12 +237,29 @@ pub(crate) struct BakedCommands { pub(crate) struct DestroyedBufferError(pub id::BufferId); pub(crate) struct DestroyedTextureError(pub id::TextureId); +/// The mutable state of a [`CommandBuffer`]. pub struct CommandBufferMutable { + /// The [`wgpu_hal::Api::CommandBuffer`]s we've built so far, and the encoder + /// they belong to. + /// + /// [`wgpu_hal::Api::CommandBuffer`]: hal::Api::CommandBuffer pub(crate) encoder: CommandEncoder, + + /// The current state of this command buffer's encoder. status: CommandEncoderStatus, + + /// All the resources that the commands recorded so far have referred to. pub(crate) trackers: Tracker, + + /// The regions of buffers and textures these commands will read and write. + /// + /// This is used to determine which portions of which + /// buffers/textures we actually need to initialize. If we're + /// definitely going to write to something before we read from it, + /// we don't need to clear its contents. buffer_memory_init_actions: Vec>, texture_memory_actions: CommandBufferTextureMemoryActions, + pub(crate) pending_query_resets: QueryResetMap, blas_actions: Vec, tlas_actions: Vec, @@ -139,11 +278,36 @@ impl CommandBufferMutable { } } +/// A buffer of commands to be submitted to the GPU for execution. +/// +/// Whereas the WebGPU API uses two separate types for command buffers and +/// encoders, this type is a fusion of the two: +/// +/// - During command recording, this holds a [`CommandEncoder`] accepting this +/// buffer's commands. In this state, the [`CommandBuffer`] type behaves like +/// a WebGPU `GPUCommandEncoder`. +/// +/// - Once command recording is finished by calling +/// [`Global::command_encoder_finish`], no further recording is allowed. The +/// internal [`CommandEncoder`] is retained solely as a storage pool for the +/// raw command buffers. In this state, the value behaves like a WebGPU +/// `GPUCommandBuffer`. +/// +/// - Once a command buffer is submitted to the queue, it is removed from the id +/// registry, and its contents are taken to construct a [`BakedCommands`], +/// whose contents eventually become the property of the submission queue. pub struct CommandBuffer { pub(crate) device: Arc>, limits: wgt::Limits, support_clear_texture: bool, pub(crate) info: ResourceInfo>, + + /// The mutable state of this command buffer. + /// + /// This `Option` is populated when the command buffer is first created. + /// When this is submitted, dropped, or destroyed, its contents are + /// extracted into a [`BakedCommands`] by + /// [`CommandBuffer::extract_baked_commands`]. pub(crate) data: Mutex>>, } @@ -183,27 +347,30 @@ impl CommandBuffer { None, ), //Todo come back - data: Mutex::new(Some(CommandBufferMutable { - encoder: CommandEncoder { - raw: encoder, - is_open: false, - list: Vec::new(), - label, - }, - status: CommandEncoderStatus::Recording, - trackers: Tracker::new(), - buffer_memory_init_actions: Default::default(), - texture_memory_actions: Default::default(), - pending_query_resets: QueryResetMap::new(), - blas_actions: Default::default(), - tlas_actions: Default::default(), - #[cfg(feature = "trace")] - commands: if enable_tracing { - Some(Vec::new()) - } else { - None - }, - })), + data: Mutex::new( + rank::COMMAND_BUFFER_DATA, + Some(CommandBufferMutable { + encoder: CommandEncoder { + raw: encoder, + is_open: false, + list: Vec::new(), + label, + }, + status: CommandEncoderStatus::Recording, + trackers: Tracker::new(), + buffer_memory_init_actions: Default::default(), + texture_memory_actions: Default::default(), + pending_query_resets: QueryResetMap::new(), + blas_actions: Default::default(), + tlas_actions: Default::default() + #[cfg(feature = "trace")] + commands: if enable_tracing { + Some(Vec::new()) + } else { + None + }, + }), + ), } } @@ -257,6 +424,12 @@ impl CommandBuffer { } impl CommandBuffer { + /// Return the [`CommandBuffer`] for `id`, for recording new commands. + /// + /// In `wgpu_core`, the [`CommandBuffer`] type serves both as encoder and + /// buffer, which is why this function takes an [`id::CommandEncoderId`] but + /// returns a [`CommandBuffer`]. The returned command buffer must be in the + /// "recording" state. Otherwise, an error is returned. fn get_encoder( hub: &Hub, id: id::CommandEncoderId, @@ -608,16 +781,15 @@ impl BindGroupStateChange { } } - unsafe fn set_and_check_redundant( + fn set_and_check_redundant( &mut self, bind_group_id: id::BindGroupId, index: u32, dynamic_offsets: &mut Vec, - offsets: *const wgt::DynamicOffset, - offset_length: usize, + offsets: &[wgt::DynamicOffset], ) -> bool { // For now never deduplicate bind groups with dynamic offsets. - if offset_length == 0 { + if offsets.is_empty() { // If this get returns None, that means we're well over the limit, // so let the call through to get a proper error if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { @@ -633,8 +805,7 @@ impl BindGroupStateChange { if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { current_bind_group.reset(); } - dynamic_offsets - .extend_from_slice(unsafe { slice::from_raw_parts(offsets, offset_length) }); + dynamic_offsets.extend_from_slice(offsets); } false } diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs index 89cba6fbf3..fd3360cc00 100644 --- a/wgpu-core/src/command/query.rs +++ b/wgpu-core/src/command/query.rs @@ -9,7 +9,7 @@ use crate::{ hal_api::HalApi, id::{self, Id}, init_tracker::MemoryInitKind, - resource::QuerySet, + resource::{QuerySet, Resource}, storage::Storage, Epoch, FastHashMap, Index, }; @@ -429,11 +429,20 @@ impl Global { .add_single(&*query_set_guard, query_set_id) .ok_or(QueryError::InvalidQuerySet(query_set_id))?; + if query_set.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + let (dst_buffer, dst_pending) = { let buffer_guard = hub.buffers.read(); let dst_buffer = buffer_guard .get(destination) .map_err(|_| QueryError::InvalidBuffer(destination))?; + + if dst_buffer.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + tracker .buffers .set_single(dst_buffer, hal::BufferUses::COPY_DST) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index d128f38cf6..93fb7b5622 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -2476,36 +2476,27 @@ impl Global { } } -pub mod render_ffi { +pub mod render_commands { use super::{ super::{Rect, RenderCommand}, RenderPass, }; - use crate::{id, RawString}; - use std::{convert::TryInto, ffi, num::NonZeroU32, slice}; + use crate::id; + use std::{convert::TryInto, num::NonZeroU32}; use wgt::{BufferAddress, BufferSize, Color, DynamicOffset, IndexFormat}; - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `offset_length` elements. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_set_bind_group( + pub fn wgpu_render_pass_set_bind_group( pass: &mut RenderPass, index: u32, bind_group_id: id::BindGroupId, - offsets: *const DynamicOffset, - offset_length: usize, + offsets: &[DynamicOffset], ) { - let redundant = unsafe { - pass.current_bind_groups.set_and_check_redundant( - bind_group_id, - index, - &mut pass.base.dynamic_offsets, - offsets, - offset_length, - ) - }; + let redundant = pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + ); if redundant { return; @@ -2513,16 +2504,12 @@ pub mod render_ffi { pass.base.commands.push(RenderCommand::SetBindGroup { index, - num_dynamic_offsets: offset_length, + num_dynamic_offsets: offsets.len(), bind_group_id, }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_pipeline( - pass: &mut RenderPass, - pipeline_id: id::RenderPipelineId, - ) { + pub fn wgpu_render_pass_set_pipeline(pass: &mut RenderPass, pipeline_id: id::RenderPipelineId) { if pass.current_pipeline.set_and_check_redundant(pipeline_id) { return; } @@ -2532,8 +2519,7 @@ pub mod render_ffi { .push(RenderCommand::SetPipeline(pipeline_id)); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_vertex_buffer( + pub fn wgpu_render_pass_set_vertex_buffer( pass: &mut RenderPass, slot: u32, buffer_id: id::BufferId, @@ -2548,8 +2534,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_index_buffer( + pub fn wgpu_render_pass_set_index_buffer( pass: &mut RenderPass, buffer: id::BufferId, index_format: IndexFormat, @@ -2559,22 +2544,19 @@ pub mod render_ffi { pass.set_index_buffer(buffer, index_format, offset, size); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) { + pub fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) { pass.base .commands .push(RenderCommand::SetBlendConstant(*color)); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) { + pub fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) { pass.base .commands .push(RenderCommand::SetStencilReference(value)); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_viewport( + pub fn wgpu_render_pass_set_viewport( pass: &mut RenderPass, x: f32, y: f32, @@ -2590,8 +2572,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_scissor_rect( + pub fn wgpu_render_pass_set_scissor_rect( pass: &mut RenderPass, x: u32, y: u32, @@ -2603,17 +2584,11 @@ pub mod render_ffi { .push(RenderCommand::SetScissor(Rect { x, y, w, h })); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `size_bytes` bytes. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_set_push_constants( + pub fn wgpu_render_pass_set_push_constants( pass: &mut RenderPass, stages: wgt::ShaderStages, offset: u32, - size_bytes: u32, - data: *const u8, + data: &[u8], ) { assert_eq!( offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), @@ -2621,31 +2596,28 @@ pub mod render_ffi { "Push constant offset must be aligned to 4 bytes." ); assert_eq!( - size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + data.len() as u32 & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), 0, "Push constant size must be aligned to 4 bytes." ); - let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; let value_offset = pass.base.push_constant_data.len().try_into().expect( "Ran out of push constant space. Don't set 4gb of push constants per RenderPass.", ); pass.base.push_constant_data.extend( - data_slice - .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + data.chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), ); pass.base.commands.push(RenderCommand::SetPushConstant { stages, offset, - size_bytes, + size_bytes: data.len() as u32, values_offset: Some(value_offset), }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw( + pub fn wgpu_render_pass_draw( pass: &mut RenderPass, vertex_count: u32, instance_count: u32, @@ -2660,8 +2632,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw_indexed( + pub fn wgpu_render_pass_draw_indexed( pass: &mut RenderPass, index_count: u32, instance_count: u32, @@ -2678,8 +2649,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw_indirect( + pub fn wgpu_render_pass_draw_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2692,8 +2662,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw_indexed_indirect( + pub fn wgpu_render_pass_draw_indexed_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2706,8 +2675,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indirect( + pub fn wgpu_render_pass_multi_draw_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2721,8 +2689,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect( + pub fn wgpu_render_pass_multi_draw_indexed_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2736,8 +2703,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indirect_count( + pub fn wgpu_render_pass_multi_draw_indirect_count( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2757,8 +2723,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect_count( + pub fn wgpu_render_pass_multi_draw_indexed_indirect_count( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2778,17 +2743,8 @@ pub mod render_ffi { }); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_push_debug_group( - pass: &mut RenderPass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_render_pass_push_debug_group(pass: &mut RenderPass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(RenderCommand::PushDebugGroup { @@ -2797,22 +2753,12 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) { + pub fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) { pass.base.commands.push(RenderCommand::PopDebugGroup); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_insert_debug_marker( - pass: &mut RenderPass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_render_pass_insert_debug_marker(pass: &mut RenderPass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(RenderCommand::InsertDebugMarker { @@ -2821,8 +2767,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_write_timestamp( + pub fn wgpu_render_pass_write_timestamp( pass: &mut RenderPass, query_set_id: id::QuerySetId, query_index: u32, @@ -2833,23 +2778,17 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_begin_occlusion_query( - pass: &mut RenderPass, - query_index: u32, - ) { + pub fn wgpu_render_pass_begin_occlusion_query(pass: &mut RenderPass, query_index: u32) { pass.base .commands .push(RenderCommand::BeginOcclusionQuery { query_index }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) { + pub fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) { pass.base.commands.push(RenderCommand::EndOcclusionQuery); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query( + pub fn wgpu_render_pass_begin_pipeline_statistics_query( pass: &mut RenderPass, query_set_id: id::QuerySetId, query_index: u32, @@ -2862,26 +2801,17 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { + pub fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { pass.base .commands .push(RenderCommand::EndPipelineStatisticsQuery); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `render_bundle_ids_length` elements. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_execute_bundles( + pub fn wgpu_render_pass_execute_bundles( pass: &mut RenderPass, - render_bundle_ids: *const id::RenderBundleId, - render_bundle_ids_length: usize, + render_bundle_ids: &[id::RenderBundleId], ) { - for &bundle_id in - unsafe { slice::from_raw_parts(render_bundle_ids, render_bundle_ids_length) } - { + for &bundle_id in render_bundle_ids { pass.base .commands .push(RenderCommand::ExecuteBundle(bundle_id)); diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 8e98a4c9b9..84bc88e723 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -607,6 +607,11 @@ impl Global { let src_buffer = buffer_guard .get(source) .map_err(|_| TransferError::InvalidBuffer(source))?; + + if src_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + cmd_buf_data .trackers .buffers @@ -628,6 +633,11 @@ impl Global { let dst_buffer = buffer_guard .get(destination) .map_err(|_| TransferError::InvalidBuffer(destination))?; + + if dst_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + cmd_buf_data .trackers .buffers @@ -777,6 +787,10 @@ impl Global { .get(destination.texture) .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + if dst_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + let (hal_copy_size, array_layer_count) = validate_texture_copy_range( destination, &dst_texture.desc, @@ -807,6 +821,11 @@ impl Global { let src_buffer = buffer_guard .get(source.buffer) .map_err(|_| TransferError::InvalidBuffer(source.buffer))?; + + if src_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + tracker .buffers .set_single(src_buffer, hal::BufferUses::COPY_SRC) @@ -938,6 +957,10 @@ impl Global { .get(source.texture) .map_err(|_| TransferError::InvalidTexture(source.texture))?; + if src_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + let (hal_copy_size, array_layer_count) = validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; @@ -989,6 +1012,11 @@ impl Global { let dst_buffer = buffer_guard .get(destination.buffer) .map_err(|_| TransferError::InvalidBuffer(destination.buffer))?; + + if dst_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + tracker .buffers .set_single(dst_buffer, hal::BufferUses::COPY_DST) @@ -1117,6 +1145,13 @@ impl Global { .get(destination.texture) .map_err(|_| TransferError::InvalidTexture(source.texture))?; + if src_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + if dst_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + // src and dst texture format must be copy-compatible // https://gpuweb.github.io/gpuweb/#copy-compatible if src_texture.desc.format.remove_srgb_suffix() diff --git a/wgpu-core/src/device/any_device.rs b/wgpu-core/src/device/any_device.rs index 693155a753..9e459c1a94 100644 --- a/wgpu-core/src/device/any_device.rs +++ b/wgpu-core/src/device/any_device.rs @@ -34,7 +34,7 @@ impl AnyDevice { unsafe fn drop_glue(ptr: *mut ()) { // Drop the arc this instance is holding. unsafe { - _ = Arc::from_raw(ptr.cast::()); + _ = Arc::from_raw(ptr.cast::()); } } diff --git a/wgpu-core/src/device/bgl.rs b/wgpu-core/src/device/bgl.rs index d606f049a3..911ac8a435 100644 --- a/wgpu-core/src/device/bgl.rs +++ b/wgpu-core/src/device/bgl.rs @@ -58,7 +58,7 @@ impl EntryMap { assert!(self.sorted); } - /// Create a new [`BindGroupLayoutEntryMap`] from a slice of [`wgt::BindGroupLayoutEntry`]s. + /// Create a new [`EntryMap`] from a slice of [`wgt::BindGroupLayoutEntry`]s. /// /// Errors if there are duplicate bindings or if any binding index is greater than /// the device's limits. diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 9c54dfc193..9f78878cc7 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -11,6 +11,7 @@ use crate::{ id::{self, AdapterId, DeviceId, QueueId, SurfaceId}, init_tracker::TextureInitTracker, instance::{self, Adapter, Surface}, + lock::{rank, RwLock}, pipeline, present, resource::{self, BufferAccessResult}, resource::{BufferAccessError, BufferMapOperation, CreateBufferError, Resource}, @@ -20,7 +21,6 @@ use crate::{ use arrayvec::ArrayVec; use hal::Device as _; -use parking_lot::RwLock; use wgt::{BufferAddress, TextureFormat}; @@ -643,8 +643,10 @@ impl Global { texture.hal_usage |= hal::TextureUses::COPY_DST; } - texture.initialization_status = - RwLock::new(TextureInitTracker::new(desc.mip_level_count, 0)); + texture.initialization_status = RwLock::new( + rank::TEXTURE_INITIALIZATION_STATUS, + TextureInitTracker::new(desc.mip_level_count, 0), + ); let (id, resource) = fid.assign(Arc::new(texture)); api_log!("Device::create_texture({desc:?}) -> {id:?}"); @@ -1351,9 +1353,6 @@ impl Global { }; let encoder = match device .command_allocator - .lock() - .as_mut() - .unwrap() .acquire_encoder(device.raw(), queue.raw.as_ref().unwrap()) { Ok(raw) => raw, @@ -1972,7 +1971,7 @@ impl Global { }; let caps = unsafe { - let suf = A::get_surface(surface); + let suf = A::surface_as_hal(surface); let adapter = &device.adapter; match adapter.raw.adapter.surface_capabilities(suf.unwrap()) { Some(caps) => caps, @@ -2058,7 +2057,7 @@ impl Global { // https://github.com/gfx-rs/wgpu/issues/4105 match unsafe { - A::get_surface(surface) + A::surface_as_hal(surface) .unwrap() .configure(device.raw(), &hal_config) } { diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 187e6099b9..24f41e7ed9 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -7,6 +7,7 @@ use crate::{ }, hal_api::HalApi, id, + lock::Mutex, pipeline::{ComputePipeline, RenderPipeline}, resource::{ self, Buffer, DestroyedBuffer, DestroyedTexture, QuerySet, Resource, Sampler, @@ -24,7 +25,6 @@ use std::sync::Arc; use thiserror::Error; /// A struct that keeps lists of resources that are no longer needed by the user. -#[derive(Default)] pub(crate) struct ResourceMaps { pub buffers: FastHashMap>>, pub staging_buffers: FastHashMap>>, @@ -141,7 +141,37 @@ impl ResourceMaps { } } -/// Resources used by a queue submission, and work to be done once it completes. +/// A command submitted to the GPU for execution. +/// +/// ## Keeping resources alive while the GPU is using them +/// +/// [`wgpu_hal`] requires that, when a command is submitted to a queue, all the +/// resources it uses must remain alive until it has finished executing. +/// +/// The natural way to satisfy this would be for `ActiveSubmission` to hold +/// strong references to all the resources used by its commands. However, that +/// would entail dropping those strong references every time a queue submission +/// finishes, adjusting the reference counts of all the resources it used. This +/// is usually needless work: it's rare for the active submission queue to be +/// the final reference to an object. Usually the user is still holding on to +/// it. +/// +/// To avoid this, an `ActiveSubmission` does not initially hold any strong +/// references to its commands' resources. Instead, each resource tracks the +/// most recent submission index at which it has been used in +/// [`ResourceInfo::submission_index`]. When the user drops a resource, if the +/// submission in which it was last used is still present in the device's queue, +/// we add the resource to [`ActiveSubmission::last_resources`]. Finally, when +/// this `ActiveSubmission` is dequeued and dropped in +/// [`LifetimeTracker::triage_submissions`], we drop `last_resources` along with +/// it. Thus, unless a resource is dropped by the user, it doesn't need to be +/// touched at all when processing completed work. +/// +/// However, it's not clear that this is effective. See [#5560]. +/// +/// [`wgpu_hal`]: hal +/// [`ResourceInfo::submission_index`]: crate::resource::ResourceInfo +/// [#5560]: https://github.com/gfx-rs/wgpu/issues/5560 struct ActiveSubmission { /// The index of the submission we track. /// @@ -163,6 +193,18 @@ struct ActiveSubmission { /// Buffers to be mapped once this submission has completed. mapped: Vec>>, + /// Command buffers used by this submission, and the encoder that owns them. + /// + /// [`wgpu_hal::Queue::submit`] requires the submitted command buffers to + /// remain alive until the submission has completed execution. Command + /// encoders double as allocation pools for command buffers, so holding them + /// here and cleaning them up in [`LifetimeTracker::triage_submissions`] + /// satisfies that requirement. + /// + /// Once this submission has completed, the command buffers are reset and + /// the command encoder is recycled. + /// + /// [`wgpu_hal::Queue::submit`]: hal::Queue::submit encoders: Vec>, /// List of queue "on_submitted_work_done" closures to be called once this @@ -353,28 +395,25 @@ impl LifetimeTracker { /// /// Assume that all submissions up through `last_done` have completed. /// - /// - Buffers used by those submissions are now ready to map, if - /// requested. Add any buffers in the submission's [`mapped`] list to - /// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`] will find - /// them. + /// - Buffers used by those submissions are now ready to map, if requested. + /// Add any buffers in the submission's [`mapped`] list to + /// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`] + /// will find them. /// /// - Resources whose final use was in those submissions are now ready to - /// free. Add any resources in the submission's [`last_resources`] table - /// to [`self.free_resources`], where [`LifetimeTracker::cleanup`] will find - /// them. + /// free. Dropping the submission's [`last_resources`] table does so. /// /// Return a list of [`SubmittedWorkDoneClosure`]s to run. /// /// [`mapped`]: ActiveSubmission::mapped /// [`self.ready_to_map`]: LifetimeTracker::ready_to_map /// [`last_resources`]: ActiveSubmission::last_resources - /// [`self.free_resources`]: LifetimeTracker::free_resources /// [`SubmittedWorkDoneClosure`]: crate::device::queue::SubmittedWorkDoneClosure #[must_use] pub fn triage_submissions( &mut self, last_done: SubmissionIndex, - command_allocator: &mut super::CommandAllocator, + command_allocator: &crate::command::CommandAllocator, ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> { profiling::scope!("triage_submissions"); @@ -751,13 +790,10 @@ impl LifetimeTracker { /// Identify resources to free, according to `trackers` and `self.suspected_resources`. /// - /// Given `trackers`, the [`Tracker`] belonging to same [`Device`] as - /// `self`, and `hub`, the [`Hub`] to which that `Device` belongs: - /// - /// Remove from `trackers` each resource mentioned in - /// [`self.suspected_resources`]. If `trackers` held the final reference to - /// that resource, add it to the appropriate free list, to be destroyed by - /// the hal: + /// Remove from `trackers`, the [`Tracker`] belonging to same [`Device`] as + /// `self`, each resource mentioned in [`self.suspected_resources`]. If + /// `trackers` held the final reference to that resource, add it to the + /// appropriate free list, to be destroyed by the hal: /// /// - Add resources used by queue submissions still in flight to the /// [`last_resources`] table of the last such submission's entry in @@ -859,29 +895,33 @@ impl LifetimeTracker { *buffer.map_state.lock() = resource::BufferMapState::Idle; log::trace!("Buffer ready to map {tracker_index:?} is not tracked anymore"); } else { - let mapping = match std::mem::replace( + // This _cannot_ be inlined into the match. If it is, the lock will be held + // open through the whole match, resulting in a deadlock when we try to re-lock + // the buffer back to active. + let mapping = std::mem::replace( &mut *buffer.map_state.lock(), resource::BufferMapState::Idle, - ) { + ); + let pending_mapping = match mapping { resource::BufferMapState::Waiting(pending_mapping) => pending_mapping, // Mapping cancelled resource::BufferMapState::Idle => continue, // Mapping queued at least twice by map -> unmap -> map // and was already successfully mapped below - active @ resource::BufferMapState::Active { .. } => { - *buffer.map_state.lock() = active; + resource::BufferMapState::Active { .. } => { + *buffer.map_state.lock() = mapping; continue; } _ => panic!("No pending mapping."), }; - let status = if mapping.range.start != mapping.range.end { + let status = if pending_mapping.range.start != pending_mapping.range.end { log::debug!("Buffer {tracker_index:?} map state -> Active"); - let host = mapping.op.host; - let size = mapping.range.end - mapping.range.start; + let host = pending_mapping.op.host; + let size = pending_mapping.range.end - pending_mapping.range.start; match super::map_buffer( raw, &buffer, - mapping.range.start, + pending_mapping.range.start, size, host, snatch_guard, @@ -889,7 +929,8 @@ impl LifetimeTracker { Ok(ptr) => { *buffer.map_state.lock() = resource::BufferMapState::Active { ptr, - range: mapping.range.start..mapping.range.start + size, + range: pending_mapping.range.start + ..pending_mapping.range.start + size, host, }; Ok(()) @@ -902,12 +943,12 @@ impl LifetimeTracker { } else { *buffer.map_state.lock() = resource::BufferMapState::Active { ptr: std::ptr::NonNull::dangling(), - range: mapping.range, - host: mapping.op.host, + range: pending_mapping.range, + host: pending_mapping.op.host, }; Ok(()) }; - pending_callbacks.push((mapping.op, status)); + pending_callbacks.push((pending_mapping.op, status)); } } pending_callbacks diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 944e10bf12..3120537544 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -4,7 +4,6 @@ use crate::{ hub::Hub, id::{BindGroupLayoutId, PipelineLayoutId}, resource::{Buffer, BufferAccessError, BufferAccessResult, BufferMapOperation}, - resource_log, snatch::SnatchGuard, Label, DOWNLEVEL_ERROR_MESSAGE, }; @@ -378,42 +377,6 @@ fn map_buffer( Ok(mapping.ptr) } -pub(crate) struct CommandAllocator { - free_encoders: Vec, -} - -impl CommandAllocator { - fn acquire_encoder( - &mut self, - device: &A::Device, - queue: &A::Queue, - ) -> Result { - match self.free_encoders.pop() { - Some(encoder) => Ok(encoder), - None => unsafe { - let hal_desc = hal::CommandEncoderDescriptor { label: None, queue }; - device.create_command_encoder(&hal_desc) - }, - } - } - - fn release_encoder(&mut self, encoder: A::CommandEncoder) { - self.free_encoders.push(encoder); - } - - fn dispose(self, device: &A::Device) { - resource_log!( - "CommandAllocator::dispose encoders {}", - self.free_encoders.len() - ); - for cmd_encoder in self.free_encoders { - unsafe { - device.destroy_command_encoder(cmd_encoder); - } - } - } -} - #[derive(Clone, Debug, Error)] #[error("Device is invalid")] pub struct InvalidDevice; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 8a9c52fa4f..a43414af1a 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -4,7 +4,7 @@ use crate::{ api_log, command::{ extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, - ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, + ClearError, CommandAllocator, CommandBuffer, CopySide, ImageCopyTexture, TransferError, }, conv, device::{life::ResourceMaps, DeviceError, WaitIdleError}, @@ -14,6 +14,7 @@ use crate::{ hal_label, id::{self, DeviceId, QueueId}, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, + lock::{rank, Mutex}, resource::{ Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedTexture, Resource, ResourceInfo, ResourceType, StagingBuffer, Texture, TextureInner, @@ -22,7 +23,6 @@ use crate::{ }; use hal::{CommandEncoder as _, Device as _, Queue as _}; -use parking_lot::Mutex; use smallvec::SmallVec; use crate::resource::{Blas, Tlas}; @@ -155,13 +155,21 @@ pub enum TempResource { Blas(Arc>), } -/// A queue execution for a particular command encoder. +/// A series of raw [`CommandBuffer`]s that have been submitted to a +/// queue, and the [`wgpu_hal::CommandEncoder`] that built them. +/// +/// [`CommandBuffer`]: hal::Api::CommandBuffer +/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder pub(crate) struct EncoderInFlight { raw: A::CommandEncoder, cmd_buffers: Vec, } impl EncoderInFlight { + /// Free all of our command buffers. + /// + /// Return the command encoder, fully reset and ready to be + /// reused. pub(crate) unsafe fn land(mut self) -> A::CommandEncoder { unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) }; self.raw @@ -195,6 +203,8 @@ pub(crate) struct PendingWrites { /// True if `command_encoder` is in the "recording" state, as /// described in the docs for the [`wgpu_hal::CommandEncoder`] /// trait. + /// + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder pub is_recording: bool, pub temp_resources: Vec>, @@ -256,7 +266,7 @@ impl PendingWrites { #[must_use] fn post_submit( &mut self, - command_allocator: &mut super::CommandAllocator, + command_allocator: &CommandAllocator, device: &A::Device, queue: &A::Queue, ) -> Option> { @@ -310,7 +320,7 @@ fn prepare_staging_buffer( let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?; let staging_buffer = StagingBuffer { - raw: Mutex::new(Some(buffer)), + raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(buffer)), device: device.clone(), size, info: ResourceInfo::new( @@ -1558,7 +1568,7 @@ impl Global { profiling::scope!("cleanup"); if let Some(pending_execution) = pending_writes.post_submit( - device.command_allocator.lock().as_mut().unwrap(), + &device.command_allocator, device.raw(), queue.raw.as_ref().unwrap(), ) { diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index c735da6282..58947ef196 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -7,18 +7,20 @@ use crate::{ bgl, life::{LifetimeTracker, WaitIdleError}, queue::PendingWrites, - AttachmentData, CommandAllocator, DeviceLostInvocation, MissingDownlevelFlags, - MissingFeatures, RenderPassContext, CLEANUP_WAIT_MS, + AttachmentData, DeviceLostInvocation, MissingDownlevelFlags, MissingFeatures, + RenderPassContext, CLEANUP_WAIT_MS, }, hal_api::HalApi, hal_label, hub::Hub, + id, init_tracker::{ BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange, TextureInitTracker, TextureInitTrackerAction, }, instance::Adapter, - pipeline, + lock::{rank, Mutex, MutexGuard, RwLock}, + pipeline::{self}, pool::ResourcePool, registry::Registry, resource::{ @@ -41,7 +43,6 @@ use crate::{ use arrayvec::ArrayVec; use hal::{CommandEncoder as _, Device as _}; use once_cell::sync::OnceCell; -use parking_lot::{Mutex, MutexGuard, RwLock}; use smallvec::SmallVec; use thiserror::Error; @@ -97,7 +98,7 @@ pub struct Device { pub(crate) zero_buffer: Option, pub(crate) info: ResourceInfo>, - pub(crate) command_allocator: Mutex>>, + pub(crate) command_allocator: command::CommandAllocator, //Note: The submission index here corresponds to the last submission that is done. pub(crate) active_submission_index: AtomicU64, //SubmissionIndex, // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the @@ -138,10 +139,10 @@ pub struct Device { pub(crate) instance_flags: wgt::InstanceFlags, pub(crate) pending_writes: Mutex>>, pub(crate) deferred_destroy: Mutex>>, - pub(crate) last_acceleration_structure_build_command_index: AtomicU64, #[cfg(feature = "trace")] pub(crate) trace: Mutex>, pub(crate) usage_scopes: UsageScopePool, + pub(crate) last_acceleration_structure_build_command_index: AtomicU64, } pub(crate) enum DeferredDestroy { @@ -166,7 +167,7 @@ impl Drop for Device { let raw = self.raw.take().unwrap(); let pending_writes = self.pending_writes.lock().take().unwrap(); pending_writes.dispose(&raw); - self.command_allocator.lock().take().unwrap().dispose(&raw); + self.command_allocator.dispose(&raw); unsafe { raw.destroy_buffer(self.zero_buffer.take().unwrap()); raw.destroy_fence(self.fence.write().take().unwrap()); @@ -224,10 +225,8 @@ impl Device { let fence = unsafe { raw_device.create_fence() }.map_err(|_| CreateDeviceError::OutOfMemory)?; - let mut com_alloc = CommandAllocator { - free_encoders: Vec::new(), - }; - let pending_encoder = com_alloc + let command_allocator = command::CommandAllocator::new(); + let pending_encoder = command_allocator .acquire_encoder(&raw_device, raw_queue) .map_err(|_| CreateDeviceError::OutOfMemory)?; let mut pending_writes = queue::PendingWrites::::new(pending_encoder); @@ -272,39 +271,45 @@ impl Device { queue_to_drop: OnceCell::new(), zero_buffer: Some(zero_buffer), info: ResourceInfo::new("", None), - command_allocator: Mutex::new(Some(com_alloc)), + command_allocator, active_submission_index: AtomicU64::new(0), - fence: RwLock::new(Some(fence)), - snatchable_lock: unsafe { SnatchLock::new() }, + fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)), + snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) }, valid: AtomicBool::new(true), - trackers: Mutex::new(Tracker::new()), + trackers: Mutex::new(rank::DEVICE_TRACKERS, Tracker::new()), tracker_indices: TrackerIndexAllocators::new(), - life_tracker: Mutex::new(life::LifetimeTracker::new()), - temp_suspected: Mutex::new(Some(life::ResourceMaps::new())), + life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, life::LifetimeTracker::new()), + temp_suspected: Mutex::new( + rank::DEVICE_TEMP_SUSPECTED, + Some(life::ResourceMaps::new()), + ), bgl_pool: ResourcePool::new(), #[cfg(feature = "trace")] - trace: Mutex::new(trace_path.and_then(|path| match trace::Trace::new(path) { - Ok(mut trace) => { - trace.add(trace::Action::Init { - desc: desc.clone(), - backend: A::VARIANT, - }); - Some(trace) - } - Err(e) => { - log::error!("Unable to start a trace in '{path:?}': {e}"); - None - } - })), + trace: Mutex::new( + rank::DEVICE_TRACE, + trace_path.and_then(|path| match trace::Trace::new(path) { + Ok(mut trace) => { + trace.add(trace::Action::Init { + desc: desc.clone(), + backend: A::VARIANT, + }); + Some(trace) + } + Err(e) => { + log::error!("Unable to start a trace in '{path:?}': {e}"); + None + } + }), + ), alignments, limits: desc.required_limits.clone(), features: desc.required_features, downlevel, instance_flags, - pending_writes: Mutex::new(Some(pending_writes)), - deferred_destroy: Mutex::new(Vec::new()), + pending_writes: Mutex::new(rank::DEVICE_PENDING_WRITES, Some(pending_writes)), + deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()), + usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()), last_acceleration_structure_build_command_index: AtomicU64::new(0), - usage_scopes: Default::default(), }) } @@ -427,10 +432,8 @@ impl Device { }; let mut life_tracker = self.lock_life(); - let submission_closures = life_tracker.triage_submissions( - last_done_index, - self.command_allocator.lock().as_mut().unwrap(), - ); + let submission_closures = + life_tracker.triage_submissions(last_done_index, &self.command_allocator); { // Normally, `temp_suspected` exists only to save heap @@ -669,14 +672,17 @@ impl Device { device: self.clone(), usage: desc.usage, size: desc.size, - initialization_status: RwLock::new(BufferInitTracker::new(aligned_size)), - sync_mapped_writes: Mutex::new(None), - map_state: Mutex::new(resource::BufferMapState::Idle), + initialization_status: RwLock::new( + rank::BUFFER_INITIALIZATION_STATUS, + BufferInitTracker::new(aligned_size), + ), + sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None), + map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle), info: ResourceInfo::new( desc.label.borrow_or_default(), Some(self.tracker_indices.buffers.clone()), ), - bind_groups: Mutex::new(Vec::new()), + bind_groups: Mutex::new(rank::BUFFER_BIND_GROUPS, Vec::new()), }) } @@ -696,10 +702,10 @@ impl Device { desc: desc.map_label(|_| ()), hal_usage, format_features, - initialization_status: RwLock::new(TextureInitTracker::new( - desc.mip_level_count, - desc.array_layer_count(), - )), + initialization_status: RwLock::new( + rank::TEXTURE_INITIALIZATION_STATUS, + TextureInitTracker::new(desc.mip_level_count, desc.array_layer_count()), + ), full_range: TextureSelector { mips: 0..desc.mip_level_count, layers: 0..desc.array_layer_count(), @@ -708,9 +714,9 @@ impl Device { desc.label.borrow_or_default(), Some(self.tracker_indices.textures.clone()), ), - clear_mode: RwLock::new(clear_mode), - views: Mutex::new(Vec::new()), - bind_groups: Mutex::new(Vec::new()), + clear_mode: RwLock::new(rank::TEXTURE_CLEAR_MODE, clear_mode), + views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()), + bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()), } } @@ -726,14 +732,17 @@ impl Device { device: self.clone(), usage: desc.usage, size: desc.size, - initialization_status: RwLock::new(BufferInitTracker::new(0)), - sync_mapped_writes: Mutex::new(None), - map_state: Mutex::new(resource::BufferMapState::Idle), + initialization_status: RwLock::new( + rank::BUFFER_INITIALIZATION_STATUS, + BufferInitTracker::new(0), + ), + sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None), + map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle), info: ResourceInfo::new( desc.label.borrow_or_default(), Some(self.tracker_indices.buffers.clone()), ), - bind_groups: Mutex::new(Vec::new()), + bind_groups: Mutex::new(rank::BUFFER_BIND_GROUPS, Vec::new()), } } @@ -1437,7 +1446,7 @@ impl Device { pipeline::ShaderModuleSource::Wgsl(code) => { profiling::scope!("naga::front::wgsl::parse_str"); let module = naga::front::wgsl::parse_str(&code).map_err(|inner| { - pipeline::CreateShaderModuleError::Parsing(pipeline::ShaderError { + pipeline::CreateShaderModuleError::Parsing(naga::error::ShaderError { source: code.to_string(), label: desc.label.as_ref().map(|l| l.to_string()), inner: Box::new(inner), @@ -1450,7 +1459,7 @@ impl Device { let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options); profiling::scope!("naga::front::spv::Frontend"); let module = parser.parse().map_err(|inner| { - pipeline::CreateShaderModuleError::ParsingSpirV(pipeline::ShaderError { + pipeline::CreateShaderModuleError::ParsingSpirV(naga::error::ShaderError { source: String::new(), label: desc.label.as_ref().map(|l| l.to_string()), inner: Box::new(inner), @@ -1463,7 +1472,7 @@ impl Device { let mut parser = naga::front::glsl::Frontend::default(); profiling::scope!("naga::front::glsl::Frontend.parse"); let module = parser.parse(&options, &code).map_err(|inner| { - pipeline::CreateShaderModuleError::ParsingGlsl(pipeline::ShaderError { + pipeline::CreateShaderModuleError::ParsingGlsl(naga::error::ShaderError { source: code.to_string(), label: desc.label.as_ref().map(|l| l.to_string()), inner: Box::new(inner), @@ -1487,9 +1496,78 @@ impl Device { }; } - use naga::valid::Capabilities as Caps; profiling::scope!("naga::validate"); + let debug_source = + if self.instance_flags.contains(wgt::InstanceFlags::DEBUG) && !source.is_empty() { + Some(hal::DebugSource { + file_name: Cow::Owned( + desc.label + .as_ref() + .map_or("shader".to_string(), |l| l.to_string()), + ), + source_code: Cow::Owned(source.clone()), + }) + } else { + None + }; + + let info = self + .create_validator(naga::valid::ValidationFlags::all()) + .validate(&module) + .map_err(|inner| { + pipeline::CreateShaderModuleError::Validation(naga::error::ShaderError { + source, + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + + let interface = + validation::Interface::new(&module, &info, self.limits.clone(), self.features); + let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { + module, + info, + debug_source, + }); + let hal_desc = hal::ShaderModuleDescriptor { + label: desc.label.to_hal(self.instance_flags), + runtime_checks: desc.shader_bound_checks.runtime_checks(), + }; + let raw = match unsafe { + self.raw + .as_ref() + .unwrap() + .create_shader_module(&hal_desc, hal_shader) + } { + Ok(raw) => raw, + Err(error) => { + return Err(match error { + hal::ShaderError::Device(error) => { + pipeline::CreateShaderModuleError::Device(error.into()) + } + hal::ShaderError::Compilation(ref msg) => { + log::error!("Shader error: {}", msg); + pipeline::CreateShaderModuleError::Generation + } + }) + } + }; + + Ok(pipeline::ShaderModule { + raw: Some(raw), + device: self.clone(), + interface: Some(interface), + info: ResourceInfo::new(desc.label.borrow_or_default(), None), + label: desc.label.borrow_or_default().to_string(), + }) + } + /// Create a validator with the given validation flags. + pub fn create_validator( + self: &Arc, + flags: naga::valid::ValidationFlags, + ) -> naga::valid::Validator { + use naga::valid::Capabilities as Caps; let mut caps = Caps::empty(); caps.set( Caps::PUSH_CONSTANT, @@ -1561,69 +1639,36 @@ impl Device { .flags .contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES), ); + caps.set( + Caps::SUBGROUP, + self.features + .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX), + ); + caps.set( + Caps::SUBGROUP_BARRIER, + self.features.intersects(wgt::Features::SUBGROUP_BARRIER), + ); - let debug_source = - if self.instance_flags.contains(wgt::InstanceFlags::DEBUG) && !source.is_empty() { - Some(hal::DebugSource { - file_name: Cow::Owned( - desc.label - .as_ref() - .map_or("shader".to_string(), |l| l.to_string()), - ), - source_code: Cow::Owned(source.clone()), - }) - } else { - None - }; - - let info = naga::valid::Validator::new(naga::valid::ValidationFlags::all(), caps) - .validate(&module) - .map_err(|inner| { - pipeline::CreateShaderModuleError::Validation(pipeline::ShaderError { - source, - label: desc.label.as_ref().map(|l| l.to_string()), - inner: Box::new(inner), - }) - })?; + let mut subgroup_stages = naga::valid::ShaderStages::empty(); + subgroup_stages.set( + naga::valid::ShaderStages::COMPUTE | naga::valid::ShaderStages::FRAGMENT, + self.features.contains(wgt::Features::SUBGROUP), + ); + subgroup_stages.set( + naga::valid::ShaderStages::VERTEX, + self.features.contains(wgt::Features::SUBGROUP_VERTEX), + ); - let interface = - validation::Interface::new(&module, &info, self.limits.clone(), self.features); - let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { - module, - info, - debug_source, - }); - let hal_desc = hal::ShaderModuleDescriptor { - label: desc.label.to_hal(self.instance_flags), - runtime_checks: desc.shader_bound_checks.runtime_checks(), - }; - let raw = match unsafe { - self.raw - .as_ref() - .unwrap() - .create_shader_module(&hal_desc, hal_shader) - } { - Ok(raw) => raw, - Err(error) => { - return Err(match error { - hal::ShaderError::Device(error) => { - pipeline::CreateShaderModuleError::Device(error.into()) - } - hal::ShaderError::Compilation(ref msg) => { - log::error!("Shader error: {}", msg); - pipeline::CreateShaderModuleError::Generation - } - }) - } + let subgroup_operations = if caps.contains(Caps::SUBGROUP) { + use naga::valid::SubgroupOperationSet as S; + S::BASIC | S::VOTE | S::ARITHMETIC | S::BALLOT | S::SHUFFLE | S::SHUFFLE_RELATIVE + } else { + naga::valid::SubgroupOperationSet::empty() }; - - Ok(pipeline::ShaderModule { - raw: Some(raw), - device: self.clone(), - interface: Some(interface), - info: ResourceInfo::new(desc.label.borrow_or_default(), None), - label: desc.label.borrow_or_default().to_string(), - }) + let mut validator = naga::valid::Validator::new(flags, caps); + validator.subgroup_stages(subgroup_stages); + validator.subgroup_operations(subgroup_operations); + validator } #[allow(unused_unsafe)] @@ -1933,6 +1978,7 @@ impl Device { used: &mut BindGroupStates, storage: &'a Storage>, limits: &wgt::Limits, + device_id: id::Id, snatch_guard: &'a SnatchGuard<'a>, ) -> Result, binding_model::CreateBindGroupError> { use crate::binding_model::CreateBindGroupError as Error; @@ -1951,6 +1997,7 @@ impl Device { }) } }; + let (pub_usage, internal_use, range_limit) = match binding_ty { wgt::BufferBindingType::Uniform => ( wgt::BufferUsages::UNIFORM, @@ -1983,6 +2030,10 @@ impl Device { .add_single(storage, bb.buffer_id, internal_use) .ok_or(Error::InvalidBuffer(bb.buffer_id))?; + if buffer.device.as_info().id() != device_id { + return Err(DeviceError::WrongDevice.into()); + } + check_buffer_usage(bb.buffer_id, buffer.usage, pub_usage)?; let raw_buffer = buffer .raw @@ -2061,13 +2112,53 @@ impl Device { }) } - pub(crate) fn create_texture_binding( - view: &TextureView, - internal_use: hal::TextureUses, - pub_usage: wgt::TextureUsages, + fn create_sampler_binding<'a>( + used: &BindGroupStates, + storage: &'a Storage>, + id: id::Id, + device_id: id::Id, + ) -> Result<&'a Sampler, binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + + let sampler = used + .samplers + .add_single(storage, id) + .ok_or(Error::InvalidSampler(id))?; + + if sampler.device.as_info().id() != device_id { + return Err(DeviceError::WrongDevice.into()); + } + + Ok(sampler) + } + + pub(crate) fn create_texture_binding<'a>( + self: &Arc, + binding: u32, + decl: &wgt::BindGroupLayoutEntry, + storage: &'a Storage>, + id: id::Id, used: &mut BindGroupStates, used_texture_ranges: &mut Vec>, - ) -> Result<(), binding_model::CreateBindGroupError> { + snatch_guard: &'a SnatchGuard<'a>, + ) -> Result, binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + + let view = used + .views + .add_single(storage, id) + .ok_or(Error::InvalidTextureView(id))?; + + if view.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + let (pub_usage, internal_use) = self.texture_use_parameters( + binding, + decl, + view, + "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", + )?; let texture = &view.parent; let texture_id = texture.as_info().id(); // Careful here: the texture may no longer have its own ref count, @@ -2097,7 +2188,12 @@ impl Device { kind: MemoryInitKind::NeedsInitializedMemory, }); - Ok(()) + Ok(hal::TextureBinding { + view: view + .raw(snatch_guard) + .ok_or(Error::InvalidTextureView(id))?, + usage: internal_use, + }) } // This function expects the provided bind group layout to be resolved @@ -2161,6 +2257,7 @@ impl Device { &mut used, &*buffer_guard, &self.limits, + self.as_info().id(), &snatch_guard, )?; @@ -2184,105 +2281,86 @@ impl Device { &mut used, &*buffer_guard, &self.limits, + self.as_info().id(), &snatch_guard, )?; hal_buffers.push(bb); } (res_index, num_bindings) } - Br::Sampler(id) => { - match decl.ty { - wgt::BindingType::Sampler(ty) => { - let sampler = used - .samplers - .add_single(&*sampler_guard, id) - .ok_or(Error::InvalidSampler(id))?; - - if sampler.device.as_info().id() != self.as_info().id() { - return Err(DeviceError::WrongDevice.into()); - } - - // Allowed sampler values for filtering and comparison - let (allowed_filtering, allowed_comparison) = match ty { - wgt::SamplerBindingType::Filtering => (None, false), - wgt::SamplerBindingType::NonFiltering => (Some(false), false), - wgt::SamplerBindingType::Comparison => (None, true), - }; - - if let Some(allowed_filtering) = allowed_filtering { - if allowed_filtering != sampler.filtering { - return Err(Error::WrongSamplerFiltering { - binding, - layout_flt: allowed_filtering, - sampler_flt: sampler.filtering, - }); - } - } + Br::Sampler(id) => match decl.ty { + wgt::BindingType::Sampler(ty) => { + let sampler = Self::create_sampler_binding( + &used, + &sampler_guard, + id, + self.as_info().id(), + )?; - if allowed_comparison != sampler.comparison { - return Err(Error::WrongSamplerComparison { + let (allowed_filtering, allowed_comparison) = match ty { + wgt::SamplerBindingType::Filtering => (None, false), + wgt::SamplerBindingType::NonFiltering => (Some(false), false), + wgt::SamplerBindingType::Comparison => (None, true), + }; + if let Some(allowed_filtering) = allowed_filtering { + if allowed_filtering != sampler.filtering { + return Err(Error::WrongSamplerFiltering { binding, - layout_cmp: allowed_comparison, - sampler_cmp: sampler.comparison, + layout_flt: allowed_filtering, + sampler_flt: sampler.filtering, }); } - - let res_index = hal_samplers.len(); - hal_samplers.push(sampler.raw()); - (res_index, 1) } - _ => { - return Err(Error::WrongBindingType { + if allowed_comparison != sampler.comparison { + return Err(Error::WrongSamplerComparison { binding, - actual: decl.ty, - expected: "Sampler", - }) + layout_cmp: allowed_comparison, + sampler_cmp: sampler.comparison, + }); } + + let res_index = hal_samplers.len(); + hal_samplers.push(sampler.raw()); + (res_index, 1) } - } + _ => { + return Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected: "Sampler", + }) + } + }, Br::SamplerArray(ref bindings_array) => { let num_bindings = bindings_array.len(); Self::check_array_binding(self.features, decl.count, num_bindings)?; let res_index = hal_samplers.len(); for &id in bindings_array.iter() { - let sampler = used - .samplers - .add_single(&*sampler_guard, id) - .ok_or(Error::InvalidSampler(id))?; - if sampler.device.as_info().id() != self.as_info().id() { - return Err(DeviceError::WrongDevice.into()); - } + let sampler = Self::create_sampler_binding( + &used, + &sampler_guard, + id, + self.as_info().id(), + )?; + hal_samplers.push(sampler.raw()); } (res_index, num_bindings) } Br::TextureView(id) => { - let view = used - .views - .add_single(&*texture_view_guard, id) - .ok_or(Error::InvalidTextureView(id))?; - let (pub_usage, internal_use) = self.texture_use_parameters( + let tb = self.create_texture_binding( binding, decl, - view, - "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", - )?; - Self::create_texture_binding( - view, - internal_use, - pub_usage, + &texture_view_guard, + id, &mut used, &mut used_texture_ranges, + &snatch_guard, )?; let res_index = hal_textures.len(); - hal_textures.push(hal::TextureBinding { - view: view - .raw(&snatch_guard) - .ok_or(Error::InvalidTextureView(id))?, - usage: internal_use, - }); + hal_textures.push(tb); (res_index, 1) } Br::TextureViewArray(ref bindings_array) => { @@ -2291,26 +2369,17 @@ impl Device { let res_index = hal_textures.len(); for &id in bindings_array.iter() { - let view = used - .views - .add_single(&*texture_view_guard, id) - .ok_or(Error::InvalidTextureView(id))?; - let (pub_usage, internal_use) = - self.texture_use_parameters(binding, decl, view, - "SampledTextureArray, ReadonlyStorageTextureArray or WriteonlyStorageTextureArray")?; - Self::create_texture_binding( - view, - internal_use, - pub_usage, + let tb = self.create_texture_binding( + binding, + decl, + &texture_view_guard, + id, &mut used, &mut used_texture_ranges, + &snatch_guard, )?; - hal_textures.push(hal::TextureBinding { - view: view - .raw(&snatch_guard) - .ok_or(Error::InvalidTextureView(id))?, - usage: internal_use, - }); + + hal_textures.push(tb); } (res_index, num_bindings) @@ -2799,6 +2868,7 @@ impl Device { module: shader_module.raw(), entry_point: final_entry_point_name.as_ref(), constants: desc.stage.constants.as_ref(), + zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, }, }; @@ -3214,6 +3284,7 @@ impl Device { module: vertex_shader_module.raw(), entry_point: &vertex_entry_point_name, constants: stage_desc.constants.as_ref(), + zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory, } }; @@ -3274,6 +3345,9 @@ impl Device { module: shader_module.raw(), entry_point: &fragment_entry_point_name, constants: fragment_state.stage.constants.as_ref(), + zero_initialize_workgroup_memory: fragment_state + .stage + .zero_initialize_workgroup_memory, }) } None => None, @@ -3519,10 +3593,9 @@ impl Device { .map_err(DeviceError::from)? }; drop(guard); - let closures = self.lock_life().triage_submissions( - submission_index, - self.command_allocator.lock().as_mut().unwrap(), - ); + let closures = self + .lock_life() + .triage_submissions(submission_index, &self.command_allocator); assert!( closures.is_empty(), "wait_for_submit is not expected to work with closures" @@ -3650,10 +3723,7 @@ impl Device { log::error!("failed to wait for the device: {error}"); } let mut life_tracker = self.lock_life(); - let _ = life_tracker.triage_submissions( - current_index, - self.command_allocator.lock().as_mut().unwrap(), - ); + let _ = life_tracker.triage_submissions(current_index, &self.command_allocator); if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() { // It's important to not hold the lock while calling the closure. drop(life_tracker); diff --git a/wgpu-core/src/hal_api.rs b/wgpu-core/src/hal_api.rs index 179024baed..f1a40b1cff 100644 --- a/wgpu-core/src/hal_api.rs +++ b/wgpu-core/src/hal_api.rs @@ -11,7 +11,7 @@ pub trait HalApi: hal::Api + 'static + WasmNotSendSync { fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance; fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>; fn hub(global: &Global) -> &Hub; - fn get_surface(surface: &Surface) -> Option<&Self::Surface>; + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface>; } impl HalApi for hal::api::Empty { @@ -25,7 +25,7 @@ impl HalApi for hal::api::Empty { fn hub(_: &Global) -> &Hub { unimplemented!("called empty api") } - fn get_surface(_: &Surface) -> Option<&Self::Surface> { + fn surface_as_hal(_: &Surface) -> Option<&Self::Surface> { unimplemented!("called empty api") } } @@ -46,8 +46,8 @@ impl HalApi for hal::api::Vulkan { fn hub(global: &Global) -> &Hub { &global.hubs.vulkan } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.vulkan.as_ref() } } @@ -67,8 +67,8 @@ impl HalApi for hal::api::Metal { fn hub(global: &Global) -> &Hub { &global.hubs.metal } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.metal.as_ref() } } @@ -88,8 +88,8 @@ impl HalApi for hal::api::Dx12 { fn hub(global: &Global) -> &Hub { &global.hubs.dx12 } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.dx12.as_ref() } } @@ -110,7 +110,7 @@ impl HalApi for hal::api::Gles { fn hub(global: &Global) -> &Hub { &global.hubs.gl } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.gl.as_ref() } } diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 794420cd15..4abba1c495 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -245,7 +245,7 @@ impl Hub { if let Element::Occupied(ref surface, _epoch) = *element { if let Some(ref mut present) = surface.presentation.lock().take() { if let Some(device) = present.device.downcast_ref::() { - let suf = A::get_surface(surface); + let suf = A::surface_as_hal(surface); unsafe { suf.unwrap().unconfigure(device.raw()); //TODO: we could destroy the surface here diff --git a/wgpu-core/src/identity.rs b/wgpu-core/src/identity.rs index d76d29341a..c89731f7af 100644 --- a/wgpu-core/src/identity.rs +++ b/wgpu-core/src/identity.rs @@ -1,8 +1,8 @@ -use parking_lot::Mutex; use wgt::Backend; use crate::{ id::{Id, Marker}, + lock::{rank, Mutex}, Epoch, Index, }; use std::{fmt::Debug, marker::PhantomData}; @@ -16,31 +16,26 @@ enum IdSource { /// A simple structure to allocate [`Id`] identifiers. /// -/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`free`] +/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`release`] /// marks an id as dead; it will never be returned again by `alloc`. /// -/// Use `IdentityManager::default` to construct new instances. +/// `IdentityValues` returns `Id`s whose index values are suitable for use as +/// indices into a `Vec` that holds those ids' referents: /// -/// `IdentityManager` returns `Id`s whose index values are suitable for use as -/// indices into a `Storage` that holds those ids' referents: +/// - Every live id has a distinct index value. Every live id's index +/// selects a distinct element in the vector. /// -/// - Every live id has a distinct index value. Each live id's index selects a -/// distinct element in the vector. -/// -/// - `IdentityManager` prefers low index numbers. If you size your vector to +/// - `IdentityValues` prefers low index numbers. If you size your vector to /// accommodate the indices produced here, the vector's length will reflect /// the highwater mark of actual occupancy. /// -/// - `IdentityManager` reuses the index values of freed ids before returning +/// - `IdentityValues` reuses the index values of freed ids before returning /// ids with new index values. Freed vector entries get reused. /// -/// See the module-level documentation for an overview of how this -/// fits together. -/// /// [`Id`]: crate::id::Id /// [`Backend`]: wgt::Backend; -/// [`alloc`]: IdentityManager::alloc -/// [`free`]: IdentityManager::free +/// [`alloc`]: IdentityValues::alloc +/// [`release`]: IdentityValues::release #[derive(Debug)] pub(super) struct IdentityValues { free: Vec<(Index, Epoch)>, @@ -122,12 +117,15 @@ impl IdentityManager { impl IdentityManager { pub fn new() -> Self { Self { - values: Mutex::new(IdentityValues { - free: Vec::new(), - next_index: 0, - count: 0, - id_source: IdSource::None, - }), + values: Mutex::new( + rank::IDENTITY_MANAGER_VALUES, + IdentityValues { + free: Vec::new(), + next_index: 0, + count: 0, + id_source: IdSource::None, + }, + ), _phantom: PhantomData, } } diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 20e67d5f71..f0a3890c1e 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -1,19 +1,19 @@ +use std::collections::HashMap; use std::sync::Arc; use crate::{ - any_surface::AnySurface, api_log, device::{queue::Queue, resource::Device, DeviceDescriptor}, global::Global, hal_api::HalApi, id::markers, id::{AdapterId, DeviceId, Id, Marker, QueueId, SurfaceId}, + lock::{rank, Mutex}, present::Presentation, resource::{Resource, ResourceInfo, ResourceType}, resource_log, LabelHelpers, DOWNLEVEL_WARNING_MESSAGE, }; -use parking_lot::Mutex; use wgt::{Backend, Backends, PowerPreference}; use hal::{Adapter as _, Instance as _, OpenDevice}; @@ -21,6 +21,7 @@ use thiserror::Error; pub type RequestAdapterOptions = wgt::RequestAdapterOptions; type HalInstance = ::Instance; +type HalSurface = ::Surface; #[derive(Clone, Debug, Error)] #[error("Limit '{name}' value {requested} is better than allowed {allowed}")] @@ -113,31 +114,36 @@ impl Instance { } pub(crate) fn destroy_surface(&self, surface: Surface) { - fn destroy(instance: &Option, surface: AnySurface) { - unsafe { - if let Some(suf) = surface.take::() { - instance.as_ref().unwrap().destroy_surface(suf); + fn destroy(instance: &Option, mut surface: Option>) { + if let Some(surface) = surface.take() { + unsafe { + instance.as_ref().unwrap().destroy_surface(surface); } } } - match surface.raw.backend() { - #[cfg(vulkan)] - Backend::Vulkan => destroy::(&self.vulkan, surface.raw), - #[cfg(metal)] - Backend::Metal => destroy::(&self.metal, surface.raw), - #[cfg(dx12)] - Backend::Dx12 => destroy::(&self.dx12, surface.raw), - #[cfg(gles)] - Backend::Gl => destroy::(&self.gl, surface.raw), - _ => unreachable!(), - } + #[cfg(vulkan)] + destroy::(&self.vulkan, surface.vulkan); + #[cfg(metal)] + destroy::(&self.metal, surface.metal); + #[cfg(dx12)] + destroy::(&self.dx12, surface.dx12); + #[cfg(gles)] + destroy::(&self.gl, surface.gl); } } pub struct Surface { pub(crate) presentation: Mutex>, pub(crate) info: ResourceInfo, - pub(crate) raw: AnySurface, + + #[cfg(vulkan)] + pub vulkan: Option>, + #[cfg(metal)] + pub metal: Option>, + #[cfg(dx12)] + pub dx12: Option>, + #[cfg(gles)] + pub gl: Option>, } impl Resource for Surface { @@ -163,7 +169,7 @@ impl Surface { &self, adapter: &Adapter, ) -> Result { - let suf = A::get_surface(self).ok_or(GetSurfaceSupportError::Unsupported)?; + let suf = A::surface_as_hal(self).ok_or(GetSurfaceSupportError::Unsupported)?; profiling::scope!("surface_capabilities"); let caps = unsafe { adapter @@ -203,7 +209,7 @@ impl Adapter { } pub fn is_surface_supported(&self, surface: &Surface) -> bool { - let suf = A::get_surface(surface); + let suf = A::surface_as_hal(surface); // If get_surface returns None, then the API does not advertise support for the surface. // @@ -461,13 +467,25 @@ pub enum RequestAdapterError { #[derive(Clone, Debug, Error)] #[non_exhaustive] pub enum CreateSurfaceError { - #[error("No backend is available")] - NoSupportedBackend, - #[error(transparent)] - InstanceError(#[from] hal::InstanceError), + #[error("The backend {0} was not enabled on the instance.")] + BackendNotEnabled(Backend), + #[error("Failed to create surface for any enabled backend: {0:?}")] + FailedToCreateSurfaceForAnyBackend(HashMap), } impl Global { + /// Creates a new surface targeting the given display/window handles. + /// + /// Internally attempts to create hal surfaces for all enabled backends. + /// + /// Fails only if creation for surfaces for all enabled backends fails in which case + /// the error for each enabled backend is listed. + /// Vice versa, if creation for any backend succeeds, success is returned. + /// Surface creation errors are logged to the debug log in any case. + /// + /// id_in: + /// - If `Some`, the id to assign to the surface. A new one will be generated otherwise. + /// /// # Safety /// /// - `display_handle` must be a valid object to create a surface upon. @@ -483,51 +501,86 @@ impl Global { profiling::scope!("Instance::create_surface"); fn init( + errors: &mut HashMap, + any_created: &mut bool, + backend: Backend, inst: &Option, display_handle: raw_window_handle::RawDisplayHandle, window_handle: raw_window_handle::RawWindowHandle, - ) -> Option> { - inst.as_ref().map(|inst| unsafe { - match inst.create_surface(display_handle, window_handle) { - Ok(raw) => Ok(AnySurface::new::(raw)), - Err(e) => Err(e), + ) -> Option> { + inst.as_ref().and_then(|inst| { + match unsafe { inst.create_surface(display_handle, window_handle) } { + Ok(raw) => { + *any_created = true; + Some(raw) + } + Err(err) => { + log::debug!( + "Instance::create_surface: failed to create surface for {:?}: {:?}", + backend, + err + ); + errors.insert(backend, err); + None + } } }) } - let mut hal_surface: Option> = None; - - #[cfg(vulkan)] - if hal_surface.is_none() { - hal_surface = - init::(&self.instance.vulkan, display_handle, window_handle); - } - #[cfg(metal)] - if hal_surface.is_none() { - hal_surface = - init::(&self.instance.metal, display_handle, window_handle); - } - #[cfg(dx12)] - if hal_surface.is_none() { - hal_surface = - init::(&self.instance.dx12, display_handle, window_handle); - } - #[cfg(gles)] - if hal_surface.is_none() { - hal_surface = init::(&self.instance.gl, display_handle, window_handle); - } - - let hal_surface = hal_surface.ok_or(CreateSurfaceError::NoSupportedBackend)??; + let mut errors = HashMap::default(); + let mut any_created = false; let surface = Surface { - presentation: Mutex::new(None), + presentation: Mutex::new(rank::SURFACE_PRESENTATION, None), info: ResourceInfo::new("", None), - raw: hal_surface, + + #[cfg(vulkan)] + vulkan: init::( + &mut errors, + &mut any_created, + Backend::Vulkan, + &self.instance.vulkan, + display_handle, + window_handle, + ), + #[cfg(metal)] + metal: init::( + &mut errors, + &mut any_created, + Backend::Metal, + &self.instance.metal, + display_handle, + window_handle, + ), + #[cfg(dx12)] + dx12: init::( + &mut errors, + &mut any_created, + Backend::Dx12, + &self.instance.dx12, + display_handle, + window_handle, + ), + #[cfg(gles)] + gl: init::( + &mut errors, + &mut any_created, + Backend::Gl, + &self.instance.gl, + display_handle, + window_handle, + ), }; - #[allow(clippy::arc_with_non_send_sync)] - let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); - Ok(id) + if any_created { + #[allow(clippy::arc_with_non_send_sync)] + let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); + Ok(id) + } else { + Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend( + errors, + )) + } } /// # Safety @@ -538,58 +591,72 @@ impl Global { &self, layer: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::create_surface_metal"); let surface = Surface { - presentation: Mutex::new(None), + presentation: Mutex::new(rank::SURFACE_PRESENTATION, None), info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .metal - .as_ref() - .map(|inst| { - // we don't want to link to metal-rs for this - #[allow(clippy::transmute_ptr_to_ref)] - inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }) - }) - .unwrap(); - AnySurface::new::(hal_surface) - }, + metal: Some(self.instance.metal.as_ref().map_or( + Err(CreateSurfaceError::BackendNotEnabled(Backend::Metal)), + |inst| { + // we don't want to link to metal-rs for this + #[allow(clippy::transmute_ptr_to_ref)] + Ok(inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) })) + }, + )?), + #[cfg(dx12)] + dx12: None, + #[cfg(vulkan)] + vulkan: None, + #[cfg(gles)] + gl: None, }; let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); - id + Ok(id) } #[cfg(dx12)] - /// # Safety - /// - /// The visual must be valid and able to be used to make a swapchain with. - pub unsafe fn instance_create_surface_from_visual( + fn instance_create_surface_dx12( &self, - visual: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { - profiling::scope!("Instance::instance_create_surface_from_visual"); - + create_surface_func: impl FnOnce(&HalInstance) -> HalSurface, + ) -> Result { let surface = Surface { - presentation: Mutex::new(None), + presentation: Mutex::new(rank::SURFACE_PRESENTATION, None), info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance + dx12: Some(create_surface_func( + self.instance .dx12 .as_ref() - .map(|inst| unsafe { inst.create_surface_from_visual(visual as _) }) - .unwrap(); - AnySurface::new::(hal_surface) - }, + .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?, + )), + #[cfg(metal)] + metal: None, + #[cfg(vulkan)] + vulkan: None, + #[cfg(gles)] + gl: None, }; let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); - id + Ok(id) + } + + #[cfg(dx12)] + /// # Safety + /// + /// The visual must be valid and able to be used to make a swapchain with. + pub unsafe fn instance_create_surface_from_visual( + &self, + visual: *mut std::ffi::c_void, + id_in: Option, + ) -> Result { + profiling::scope!("Instance::instance_create_surface_from_visual"); + self.instance_create_surface_dx12(id_in, |inst| unsafe { + inst.create_surface_from_visual(visual as _) + }) } #[cfg(dx12)] @@ -600,25 +667,11 @@ impl Global { &self, surface_handle: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::instance_create_surface_from_surface_handle"); - - let surface = Surface { - presentation: Mutex::new(None), - info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .dx12 - .as_ref() - .map(|inst| unsafe { inst.create_surface_from_surface_handle(surface_handle) }) - .unwrap(); - AnySurface::new::(hal_surface) - }, - }; - - let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); - id + self.instance_create_surface_dx12(id_in, |inst| unsafe { + inst.create_surface_from_surface_handle(surface_handle) + }) } #[cfg(dx12)] @@ -629,27 +682,11 @@ impl Global { &self, swap_chain_panel: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel"); - - let surface = Surface { - presentation: Mutex::new(None), - info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .dx12 - .as_ref() - .map(|inst| unsafe { - inst.create_surface_from_swap_chain_panel(swap_chain_panel as _) - }) - .unwrap(); - AnySurface::new::(hal_surface) - }, - }; - - let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); - id + self.instance_create_surface_dx12(id_in, |inst| unsafe { + inst.create_surface_from_swap_chain_panel(swap_chain_panel as _) + }) } pub fn surface_drop(&self, id: SurfaceId) { @@ -657,11 +694,15 @@ impl Global { api_log!("Surface::drop {id:?}"); - fn unconfigure(global: &Global, surface: &AnySurface, present: &Presentation) { - let hub = HalApi::hub(global); - if let Some(hal_surface) = surface.downcast_ref::() { + fn unconfigure( + global: &Global, + surface: &Option>, + present: &Presentation, + ) { + if let Some(surface) = surface { + let hub = HalApi::hub(global); if let Some(device) = present.device.downcast_ref::() { - hub.surface_unconfigure(device, hal_surface); + hub.surface_unconfigure(device, surface); } } } @@ -669,15 +710,16 @@ impl Global { let surface = self.surfaces.unregister(id); let surface = Arc::into_inner(surface.unwrap()) .expect("Surface cannot be destroyed because is still in use"); + if let Some(present) = surface.presentation.lock().take() { #[cfg(vulkan)] - unconfigure::(self, &surface.raw, &present); + unconfigure::(self, &surface.vulkan, &present); #[cfg(metal)] - unconfigure::(self, &surface.raw, &present); + unconfigure::(self, &surface.metal, &present); #[cfg(dx12)] - unconfigure::(self, &surface.raw, &present); + unconfigure::(self, &surface.dx12, &present); #[cfg(gles)] - unconfigure::(self, &surface.raw, &present); + unconfigure::(self, &surface.gl, &present); } self.instance.destroy_surface(surface); } @@ -785,7 +827,7 @@ impl Global { adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu); } if let Some(surface) = compatible_surface { - let surface = &A::get_surface(surface); + let surface = &A::surface_as_hal(surface); adapters.retain(|exposed| unsafe { // If the surface does not exist for this backend, // then the surface is not supported. diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index cf24f589fa..f62878797b 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -39,6 +39,8 @@ unused_braces, // It gets in the way a lot and does not prevent bugs in practice. clippy::pattern_type_mismatch, + // `wgpu-core` isn't entirely user-facing, so it's useful to document internal items. + rustdoc::private_intra_doc_links )] #![warn( trivial_casts, @@ -48,7 +50,6 @@ unused_qualifications )] -pub mod any_surface; pub mod binding_model; pub mod command; mod conv; @@ -62,6 +63,7 @@ pub mod id; pub mod identity; mod init_tracker; pub mod instance; +mod lock; pub mod pipeline; mod pool; pub mod present; diff --git a/wgpu-core/src/lock/mod.rs b/wgpu-core/src/lock/mod.rs new file mode 100644 index 0000000000..a6593a062d --- /dev/null +++ b/wgpu-core/src/lock/mod.rs @@ -0,0 +1,41 @@ +//! Instrumented lock types. +//! +//! This module defines a set of instrumented wrappers for the lock +//! types used in `wgpu-core` ([`Mutex`], [`RwLock`], and +//! [`SnatchLock`]) that help us understand and validate `wgpu-core` +//! synchronization. +//! +//! - The [`ranked`] module defines lock types that perform run-time +//! checks to ensure that each thread acquires locks only in a +//! specific order, to prevent deadlocks. +//! +//! - The [`vanilla`] module defines lock types that are +//! uninstrumented, no-overhead wrappers around the standard lock +//! types. +//! +//! (We plan to add more wrappers in the future.) +//! +//! If the `wgpu_validate_locks` config is set (for example, with +//! `RUSTFLAGS='--cfg wgpu_validate_locks'`), `wgpu-core` uses the +//! [`ranked`] module's locks. We hope to make this the default for +//! debug builds soon. +//! +//! Otherwise, `wgpu-core` uses the [`vanilla`] module's locks. +//! +//! [`Mutex`]: parking_lot::Mutex +//! [`RwLock`]: parking_lot::RwLock +//! [`SnatchLock`]: crate::snatch::SnatchLock + +pub mod rank; + +#[cfg_attr(not(wgpu_validate_locks), allow(dead_code))] +mod ranked; + +#[cfg_attr(wgpu_validate_locks, allow(dead_code))] +mod vanilla; + +#[cfg(wgpu_validate_locks)] +pub use ranked::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +#[cfg(not(wgpu_validate_locks))] +pub use vanilla::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs new file mode 100644 index 0000000000..4387b8d138 --- /dev/null +++ b/wgpu-core/src/lock/rank.rs @@ -0,0 +1,170 @@ +//! Ranks for `wgpu-core` locks, restricting acquisition order. +//! +//! See [`LockRank`]. + +/// The rank of a lock. +/// +/// Each [`Mutex`], [`RwLock`], and [`SnatchLock`] in `wgpu-core` has been +/// assigned a *rank*: a node in the DAG defined at the bottom of +/// `wgpu-core/src/lock/rank.rs`. The rank of the most recently +/// acquired lock you are still holding determines which locks you may +/// attempt to acquire next. +/// +/// When you create a lock in `wgpu-core`, you must specify its rank +/// by passing in a [`LockRank`] value. This module declares a +/// pre-defined set of ranks to cover everything in `wgpu-core`, named +/// after the type in which they occur, and the name of the type's +/// field that is a lock. For example, [`CommandBuffer::data`] is a +/// `Mutex`, and its rank here is the constant +/// [`COMMAND_BUFFER_DATA`]. +/// +/// [`Mutex`]: parking_lot::Mutex +/// [`RwLock`]: parking_lot::RwLock +/// [`SnatchLock`]: crate::snatch::SnatchLock +/// [`CommandBuffer::data`]: crate::command::CommandBuffer::data +#[derive(Debug, Copy, Clone)] +pub struct LockRank { + /// The bit representing this lock. + /// + /// There should only be a single bit set in this value. + pub(super) bit: LockRankSet, + + /// A bitmask of permitted successor ranks. + /// + /// If `rank` is the rank of the most recently acquired lock we + /// are still holding, then `rank.followers` is the mask of + /// locks we are allowed to acquire next. + /// + /// The `define_lock_ranks!` macro ensures that there are no + /// cycles in the graph of lock ranks and their followers. + pub(super) followers: LockRankSet, +} + +/// Define a set of lock ranks, and each rank's permitted successors. +macro_rules! define_lock_ranks { + { + $( + $( #[ $attr:meta ] )* + rank $name:ident $member:literal followed by { $( $follower:ident ),* $(,)? } + )* + } => { + // An enum that assigns a unique number to each rank. + #[allow(non_camel_case_types, clippy::upper_case_acronyms)] + enum LockRankNumber { $( $name, )* } + + bitflags::bitflags! { + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + /// A bitflags type representing a set of lock ranks. + pub struct LockRankSet: u64 { + $( + const $name = 1 << (LockRankNumber:: $name as u64); + )* + } + } + + impl LockRankSet { + pub fn name(self) -> &'static str { + match self { + $( + LockRankSet:: $name => $member, + )* + _ => "", + } + } + } + + $( + // If there is any cycle in the ranking, the initializers + // for `followers` will be cyclic, and rustc will give us + // an error message explaining the cycle. + $( #[ $attr ] )* + pub const $name: LockRank = LockRank { + bit: LockRankSet:: $name, + followers: LockRankSet::empty() $( .union($follower.bit) )*, + }; + )* + } +} + +define_lock_ranks! { + rank DEVICE_TEMP_SUSPECTED "Device::temp_suspected" followed by { + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + COMMAND_BUFFER_DATA, + DEVICE_TRACKERS, + } + rank COMMAND_BUFFER_DATA "CommandBuffer::data" followed by { + DEVICE_SNATCHABLE_LOCK, + DEVICE_USAGE_SCOPES, + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + BUFFER_BIND_GROUP_STATE_BUFFERS, + TEXTURE_BIND_GROUP_STATE_TEXTURES, + BUFFER_MAP_STATE, + STATELESS_BIND_GROUP_STATE_RESOURCES, + } + rank DEVICE_SNATCHABLE_LOCK "Device::snatchable_lock" followed by { + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + DEVICE_TRACE, + BUFFER_MAP_STATE, + BUFFER_BIND_GROUP_STATE_BUFFERS, + TEXTURE_BIND_GROUP_STATE_TEXTURES, + STATELESS_BIND_GROUP_STATE_RESOURCES, + // Uncomment this to see an interesting cycle. + // COMMAND_BUFFER_DATA, + } + rank BUFFER_MAP_STATE "Buffer::map_state" followed by { + DEVICE_PENDING_WRITES, + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + DEVICE_TRACE, + } + rank DEVICE_PENDING_WRITES "Device::pending_writes" followed by { + COMMAND_ALLOCATOR_FREE_ENCODERS, + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + DEVICE_LIFE_TRACKER, + } + rank DEVICE_LIFE_TRACKER "Device::life_tracker" followed by { + COMMAND_ALLOCATOR_FREE_ENCODERS, + // Uncomment this to see an interesting cycle. + // DEVICE_TEMP_SUSPECTED, + DEVICE_TRACE, + } + rank COMMAND_ALLOCATOR_FREE_ENCODERS "CommandAllocator::free_encoders" followed by { + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + } + + rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { } + rank BUFFER_BIND_GROUP_STATE_BUFFERS "BufferBindGroupState::buffers" followed by { } + rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { } + rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { } + rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { } + rank DEVICE_FENCE "Device::fence" followed by { } + #[allow(dead_code)] + rank DEVICE_TRACE "Device::trace" followed by { } + rank DEVICE_TRACKERS "Device::trackers" followed by { } + rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { } + rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { } + rank REGISTRY_STORAGE "Registry::storage" followed by { } + rank RENDER_BUNDLE_SCOPE_BUFFERS "RenderBundleScope::buffers" followed by { } + rank RENDER_BUNDLE_SCOPE_TEXTURES "RenderBundleScope::textures" followed by { } + rank RENDER_BUNDLE_SCOPE_BIND_GROUPS "RenderBundleScope::bind_groups" followed by { } + rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { } + rank RENDER_BUNDLE_SCOPE_QUERY_SETS "RenderBundleScope::query_sets" followed by { } + rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { } + rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { } + rank STAGING_BUFFER_RAW "StagingBuffer::raw" followed by { } + rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { } + rank SURFACE_PRESENTATION "Surface::presentation" followed by { } + rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { } + rank TEXTURE_BIND_GROUP_STATE_TEXTURES "TextureBindGroupState::textures" followed by { } + rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { } + rank TEXTURE_CLEAR_MODE "Texture::clear_mode" followed by { } + rank TEXTURE_VIEWS "Texture::views" followed by { } + + #[cfg(test)] + rank PAWN "pawn" followed by { ROOK, BISHOP } + #[cfg(test)] + rank ROOK "rook" followed by { KNIGHT } + #[cfg(test)] + rank KNIGHT "knight" followed by { } + #[cfg(test)] + rank BISHOP "bishop" followed by { } +} diff --git a/wgpu-core/src/lock/ranked.rs b/wgpu-core/src/lock/ranked.rs new file mode 100644 index 0000000000..ecf37c1d77 --- /dev/null +++ b/wgpu-core/src/lock/ranked.rs @@ -0,0 +1,386 @@ +//! Lock types that enforce well-ranked lock acquisition order. +//! +//! This module's [`Mutex`] and [`RwLock` types are instrumented to check that +//! `wgpu-core` acquires locks according to their rank, to prevent deadlocks. To +//! use it, put `--cfg wgpu_validate_locks` in `RUSTFLAGS`. +//! +//! The [`LockRank`] constants in the [`lock::rank`] module describe edges in a +//! directed graph of lock acquisitions: each lock's rank says, if this is the most +//! recently acquired lock that you are still holding, then these are the locks you +//! are allowed to acquire next. +//! +//! As long as this graph doesn't have cycles, any number of threads can acquire +//! locks along paths through the graph without deadlock: +//! +//! - Assume that if a thread is holding a lock, then it will either release it, +//! or block trying to acquire another one. No thread just sits on its locks +//! forever for unrelated reasons. If it did, then that would be a source of +//! deadlock "outside the system" that we can't do anything about. +//! +//! - This module asserts that threads acquire and release locks in a stack-like +//! order: a lock is dropped only when it is the *most recently acquired* lock +//! *still held* - call this the "youngest" lock. This stack-like ordering +//! isn't a Rust requirement; Rust lets you drop guards in any order you like. +//! This is a restriction we impose. +//! +//! - Consider the directed graph whose nodes are locks, and whose edges go from +//! each lock to its permitted followers, the locks in its [`LockRank::followers`] +//! set. The definition of the [`lock::rank`] module's [`LockRank`] constants +//! ensures that this graph has no cycles, including trivial cycles from a node to +//! itself. +//! +//! - This module then asserts that each thread attempts to acquire a lock only if +//! it is among its youngest lock's permitted followers. Thus, as a thread +//! acquires locks, it must be traversing a path through the graph along its +//! edges. +//! +//! - Because there are no cycles in the graph, whenever one thread is blocked +//! waiting to acquire a lock, that lock must be held by a different thread: if +//! you were allowed to acquire a lock you already hold, that would be a cycle in +//! the graph. +//! +//! - Furthermore, because the graph has no cycles, as we work our way from each +//! thread to the thread it is blocked waiting for, we must eventually reach an +//! end point: there must be some thread that is able to acquire its next lock, or +//! that is about to release a lock. +//! +//! Thus, the system as a whole is always able to make progress: it is free of +//! deadlocks. +//! +//! Note that this validation only monitors each thread's behavior in isolation: +//! there's only thread-local state, nothing communicated between threads. So we +//! don't detect deadlocks, per se, only the potential to cause deadlocks. This +//! means that the validation is conservative, but more reproducible, since it's not +//! dependent on any particular interleaving of execution. +//! +//! [`lock::rank`]: crate::lock::rank + +use super::rank::LockRank; +use std::{cell::Cell, panic::Location}; + +/// A `Mutex` instrumented for deadlock prevention. +/// +/// This is just a wrapper around a [`parking_lot::Mutex`], along with +/// its rank in the `wgpu_core` lock ordering. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct Mutex { + inner: parking_lot::Mutex, + rank: LockRank, +} + +/// A guard produced by locking [`Mutex`]. +/// +/// This is just a wrapper around a [`parking_lot::MutexGuard`], along +/// with the state needed to track lock acquisition. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct MutexGuard<'a, T> { + inner: parking_lot::MutexGuard<'a, T>, + saved: LockState, +} + +thread_local! { + static LOCK_STATE: Cell = const { Cell::new(LockState::INITIAL) }; +} + +/// Per-thread state for the deadlock checker. +#[derive(Debug, Copy, Clone)] +struct LockState { + /// The last lock we acquired, and where. + last_acquired: Option<(LockRank, &'static Location<'static>)>, + + /// The number of locks currently held. + /// + /// This is used to enforce stack-like lock acquisition and release. + depth: u32, +} + +impl LockState { + const INITIAL: LockState = LockState { + last_acquired: None, + depth: 0, + }; +} + +/// Check and record the acquisition of a lock with `new_rank`. +/// +/// Check that acquiring a lock with `new_rank` is permitted at this point, and +/// update the per-thread state accordingly. +/// +/// Return the `LockState` that must be restored when this thread is released. +fn acquire(new_rank: LockRank, location: &'static Location<'static>) -> LockState { + let state = LOCK_STATE.get(); + // Initially, it's fine to acquire any lock. So we only + // need to check when `last_acquired` is `Some`. + if let Some((ref last_rank, ref last_location)) = state.last_acquired { + assert!( + last_rank.followers.contains(new_rank.bit), + "Attempt to acquire nested mutexes in wrong order:\n\ + last locked {:<35} at {}\n\ + now locking {:<35} at {}\n\ + Locking {} after locking {} is not permitted.", + last_rank.bit.name(), + last_location, + new_rank.bit.name(), + location, + new_rank.bit.name(), + last_rank.bit.name(), + ); + } + LOCK_STATE.set(LockState { + last_acquired: Some((new_rank, location)), + depth: state.depth + 1, + }); + state +} + +/// Record the release of a lock whose saved state was `saved`. +/// +/// Check that locks are being acquired in stacking order, and update the +/// per-thread state accordingly. +fn release(saved: LockState) { + let prior = LOCK_STATE.replace(saved); + + // Although Rust allows mutex guards to be dropped in any + // order, this analysis requires that locks be acquired and + // released in stack order: the next lock to be released must be + // the most recently acquired lock still held. + assert_eq!( + prior.depth, + saved.depth + 1, + "Lock not released in stacking order" + ); +} + +impl Mutex { + pub fn new(rank: LockRank, value: T) -> Mutex { + Mutex { + inner: parking_lot::Mutex::new(value), + rank, + } + } + + #[track_caller] + pub fn lock(&self) -> MutexGuard { + let saved = acquire(self.rank, Location::caller()); + MutexGuard { + inner: self.inner.lock(), + saved, + } + } +} + +impl<'a, T> Drop for MutexGuard<'a, T> { + fn drop(&mut self) { + release(self.saved); + } +} + +impl<'a, T> std::ops::Deref for MutexGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.inner.deref_mut() + } +} + +impl std::fmt::Debug for Mutex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } +} + +/// An `RwLock` instrumented for deadlock prevention. +/// +/// This is just a wrapper around a [`parking_lot::RwLock`], along with +/// its rank in the `wgpu_core` lock ordering. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct RwLock { + inner: parking_lot::RwLock, + rank: LockRank, +} + +/// A read guard produced by locking [`RwLock`] for reading. +/// +/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`], along with +/// the state needed to track lock acquisition. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct RwLockReadGuard<'a, T> { + inner: parking_lot::RwLockReadGuard<'a, T>, + saved: LockState, +} + +/// A write guard produced by locking [`RwLock`] for writing. +/// +/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`], along +/// with the state needed to track lock acquisition. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct RwLockWriteGuard<'a, T> { + inner: parking_lot::RwLockWriteGuard<'a, T>, + saved: LockState, +} + +impl RwLock { + pub fn new(rank: LockRank, value: T) -> RwLock { + RwLock { + inner: parking_lot::RwLock::new(value), + rank, + } + } + + #[track_caller] + pub fn read(&self) -> RwLockReadGuard { + let saved = acquire(self.rank, Location::caller()); + RwLockReadGuard { + inner: self.inner.read(), + saved, + } + } + + #[track_caller] + pub fn write(&self) -> RwLockWriteGuard { + let saved = acquire(self.rank, Location::caller()); + RwLockWriteGuard { + inner: self.inner.write(), + saved, + } + } +} + +impl std::fmt::Debug for RwLock { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } +} + +impl<'a, T> Drop for RwLockReadGuard<'a, T> { + fn drop(&mut self) { + release(self.saved); + } +} + +impl<'a, T> Drop for RwLockWriteGuard<'a, T> { + fn drop(&mut self) { + release(self.saved); + } +} + +impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.inner.deref_mut() + } +} + +/// Locks can be acquired in the order indicated by their ranks. +#[test] +fn permitted() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::ROOK, ()); + + let _guard1 = lock1.lock(); + let _guard2 = lock2.lock(); +} + +/// Locks can only be acquired in the order indicated by their ranks. +#[test] +#[should_panic(expected = "Locking pawn after locking rook")] +fn forbidden_unrelated() { + use super::rank; + + let lock1 = Mutex::new(rank::ROOK, ()); + let lock2 = Mutex::new(rank::PAWN, ()); + + let _guard1 = lock1.lock(); + let _guard2 = lock2.lock(); +} + +/// Lock acquisitions can't skip ranks. +/// +/// These two locks *could* be acquired in this order, but only if other locks +/// are acquired in between them. Skipping ranks isn't allowed. +#[test] +#[should_panic(expected = "Locking knight after locking pawn")] +fn forbidden_skip() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::KNIGHT, ()); + + let _guard1 = lock1.lock(); + let _guard2 = lock2.lock(); +} + +/// Locks can be acquired and released in a stack-like order. +#[test] +fn stack_like() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::ROOK, ()); + let lock3 = Mutex::new(rank::BISHOP, ()); + + let guard1 = lock1.lock(); + let guard2 = lock2.lock(); + drop(guard2); + + let guard3 = lock3.lock(); + drop(guard3); + drop(guard1); +} + +/// Locks can only be acquired and released in a stack-like order. +#[test] +#[should_panic(expected = "Lock not released in stacking order")] +fn non_stack_like() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::ROOK, ()); + + let guard1 = lock1.lock(); + let guard2 = lock2.lock(); + + // Avoid a double panic from dropping this while unwinding due to the panic + // we're testing for. + std::mem::forget(guard2); + + drop(guard1); +} diff --git a/wgpu-core/src/lock/vanilla.rs b/wgpu-core/src/lock/vanilla.rs new file mode 100644 index 0000000000..4fc419f12e --- /dev/null +++ b/wgpu-core/src/lock/vanilla.rs @@ -0,0 +1,115 @@ +//! Plain, uninstrumented wrappers around [`parking_lot`] lock types. +//! +//! These definitions are used when no particular lock instrumentation +//! Cargo feature is selected. + +/// A plain wrapper around [`parking_lot::Mutex`]. +/// +/// This is just like [`parking_lot::Mutex`], except that our [`new`] +/// method takes a rank, indicating where the new mutex should sit in +/// `wgpu-core`'s lock ordering. The rank is ignored. +/// +/// See the [`lock`] module documentation for other wrappers. +/// +/// [`new`]: Mutex::new +/// [`lock`]: crate::lock +pub struct Mutex(parking_lot::Mutex); + +/// A guard produced by locking [`Mutex`]. +/// +/// This is just a wrapper around a [`parking_lot::MutexGuard`]. +pub struct MutexGuard<'a, T>(parking_lot::MutexGuard<'a, T>); + +impl Mutex { + pub fn new(_rank: super::rank::LockRank, value: T) -> Mutex { + Mutex(parking_lot::Mutex::new(value)) + } + + pub fn lock(&self) -> MutexGuard { + MutexGuard(self.0.lock()) + } +} + +impl<'a, T> std::ops::Deref for MutexGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} + +impl std::fmt::Debug for Mutex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// A plain wrapper around [`parking_lot::RwLock`]. +/// +/// This is just like [`parking_lot::RwLock`], except that our [`new`] +/// method takes a rank, indicating where the new mutex should sit in +/// `wgpu-core`'s lock ordering. The rank is ignored. +/// +/// See the [`lock`] module documentation for other wrappers. +/// +/// [`new`]: RwLock::new +/// [`lock`]: crate::lock +pub struct RwLock(parking_lot::RwLock); + +/// A read guard produced by locking [`RwLock`] as a reader. +/// +/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`]. +pub struct RwLockReadGuard<'a, T>(parking_lot::RwLockReadGuard<'a, T>); + +/// A write guard produced by locking [`RwLock`] as a writer. +/// +/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`]. +pub struct RwLockWriteGuard<'a, T>(parking_lot::RwLockWriteGuard<'a, T>); + +impl RwLock { + pub fn new(_rank: super::rank::LockRank, value: T) -> RwLock { + RwLock(parking_lot::RwLock::new(value)) + } + + pub fn read(&self) -> RwLockReadGuard { + RwLockReadGuard(self.0.read()) + } + + pub fn write(&self) -> RwLockWriteGuard { + RwLockWriteGuard(self.0.write()) + } +} + +impl std::fmt::Debug for RwLock { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs index b1689bd691..d70b118d7e 100644 --- a/wgpu-core/src/pipeline.rs +++ b/wgpu-core/src/pipeline.rs @@ -10,7 +10,8 @@ use crate::{ resource_log, validation, Label, }; use arrayvec::ArrayVec; -use std::{borrow::Cow, error::Error, fmt, marker::PhantomData, num::NonZeroU32, sync::Arc}; +use naga::error::ShaderError; +use std::{borrow::Cow, marker::PhantomData, num::NonZeroU32, sync::Arc}; use thiserror::Error; /// Information about buffer bindings, which @@ -107,79 +108,8 @@ impl ShaderModule { } } -#[derive(Clone, Debug)] -pub struct ShaderError { - pub source: String, - pub label: Option, - pub inner: Box, -} -#[cfg(feature = "wgsl")] -impl fmt::Display for ShaderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = self.label.as_deref().unwrap_or_default(); - let string = self.inner.emit_to_string(&self.source); - write!(f, "\nShader '{label}' parsing {string}") - } -} -#[cfg(feature = "glsl")] -impl fmt::Display for ShaderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = self.label.as_deref().unwrap_or_default(); - let string = self.inner.emit_to_string(&self.source); - write!(f, "\nShader '{label}' parsing {string}") - } -} -#[cfg(feature = "spirv")] -impl fmt::Display for ShaderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = self.label.as_deref().unwrap_or_default(); - let string = self.inner.emit_to_string(&self.source); - write!(f, "\nShader '{label}' parsing {string}") - } -} -impl fmt::Display for ShaderError> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use codespan_reporting::{ - diagnostic::{Diagnostic, Label}, - files::SimpleFile, - term, - }; - - let label = self.label.as_deref().unwrap_or_default(); - let files = SimpleFile::new(label, &self.source); - let config = term::Config::default(); - let mut writer = term::termcolor::NoColor::new(Vec::new()); - - let diagnostic = Diagnostic::error().with_labels( - self.inner - .spans() - .map(|&(span, ref desc)| { - Label::primary((), span.to_range().unwrap()).with_message(desc.to_owned()) - }) - .collect(), - ); - - term::emit(&mut writer, &config, &files, &diagnostic).expect("cannot write error"); - - write!( - f, - "\nShader validation {}", - String::from_utf8_lossy(&writer.into_inner()) - ) - } -} -impl Error for ShaderError -where - ShaderError: fmt::Display, - E: Error + 'static, -{ - fn source(&self) -> Option<&(dyn Error + 'static)> { - Some(&self.inner) - } -} - //Note: `Clone` would require `WithSpan: Clone`. -#[derive(Debug, Error)] +#[derive(Clone, Debug, Error)] #[non_exhaustive] pub enum CreateShaderModuleError { #[cfg(feature = "wgsl")] @@ -187,7 +117,7 @@ pub enum CreateShaderModuleError { Parsing(#[from] ShaderError), #[cfg(feature = "glsl")] #[error(transparent)] - ParsingGlsl(#[from] ShaderError), + ParsingGlsl(#[from] ShaderError), #[cfg(feature = "spirv")] #[error(transparent)] ParsingSpirV(#[from] ShaderError), @@ -209,17 +139,6 @@ pub enum CreateShaderModuleError { }, } -impl CreateShaderModuleError { - pub fn location(&self, source: &str) -> Option { - match *self { - #[cfg(feature = "wgsl")] - CreateShaderModuleError::Parsing(ref err) => err.inner.location(source), - CreateShaderModuleError::Validation(ref err) => err.inner.location(source), - _ => None, - } - } -} - /// Describes a programmable pipeline stage. #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -241,6 +160,11 @@ pub struct ProgrammableStageDescriptor<'a> { /// /// The value may represent any of WGSL's concrete scalar types. pub constants: Cow<'a, naga::back::PipelineConstants>, + /// Whether workgroup scoped memory will be initialized with zero values for this stage. + /// + /// This is required by the WebGPU spec, but may have overhead which can be avoided + /// for cross-platform applications + pub zero_initialize_workgroup_memory: bool, } /// Number of implicit bind groups derived at pipeline creation. diff --git a/wgpu-core/src/pool.rs b/wgpu-core/src/pool.rs index 47de6d5feb..7d17f3a7a3 100644 --- a/wgpu-core/src/pool.rs +++ b/wgpu-core/src/pool.rs @@ -5,8 +5,8 @@ use std::{ }; use once_cell::sync::OnceCell; -use parking_lot::Mutex; +use crate::lock::{rank, Mutex}; use crate::{PreHashedKey, PreHashedMap}; type SlotInner = Weak; @@ -22,13 +22,15 @@ pub struct ResourcePool { impl ResourcePool { pub fn new() -> Self { Self { - inner: Mutex::new(HashMap::default()), + inner: Mutex::new(rank::RESOURCE_POOL_INNER, HashMap::default()), } } - /// Get a resource from the pool with the given entry map, or create a new one if it doesn't exist using the given constructor. + /// Get a resource from the pool with the given entry map, or create a new + /// one if it doesn't exist using the given constructor. /// - /// Behaves such that only one resource will be created for each unique entry map at any one time. + /// Behaves such that only one resource will be created for each unique + /// entry map at any one time. pub fn get_or_init(&self, key: K, constructor: F) -> Result, E> where F: FnOnce(K) -> Result, E>, @@ -96,6 +98,8 @@ impl ResourcePool { /// Remove the given entry map from the pool. /// /// Must *only* be called in the Drop impl of [`BindGroupLayout`]. + /// + /// [`BindGroupLayout`]: crate::binding_model::BindGroupLayout pub fn remove(&self, key: &K) { let hashed_key = PreHashedKey::from_key(key); diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 2f274cd554..053f7fdb24 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -21,13 +21,13 @@ use crate::{ hal_api::HalApi, hal_label, id, init_tracker::TextureInitTracker, + lock::{rank, Mutex, RwLock}, resource::{self, ResourceInfo}, snatch::Snatchable, track, }; use hal::{Queue as _, Surface as _}; -use parking_lot::{Mutex, RwLock}; use thiserror::Error; use wgt::SurfaceStatus as Status; @@ -157,7 +157,7 @@ impl Global { #[cfg(not(feature = "trace"))] let _ = device; - let suf = A::get_surface(surface.as_ref()); + let suf = A::surface_as_hal(surface.as_ref()); let (texture_id, status) = match unsafe { suf.unwrap() .acquire_texture(Some(std::time::Duration::from_millis( @@ -215,7 +215,10 @@ impl Global { desc: texture_desc, hal_usage, format_features, - initialization_status: RwLock::new(TextureInitTracker::new(1, 1)), + initialization_status: RwLock::new( + rank::TEXTURE_INITIALIZATION_STATUS, + TextureInitTracker::new(1, 1), + ), full_range: track::TextureSelector { layers: 0..1, mips: 0..1, @@ -224,11 +227,14 @@ impl Global { "", Some(device.tracker_indices.textures.clone()), ), - clear_mode: RwLock::new(resource::TextureClearMode::Surface { - clear_view: Some(clear_view), - }), - views: Mutex::new(Vec::new()), - bind_groups: Mutex::new(Vec::new()), + clear_mode: RwLock::new( + rank::TEXTURE_CLEAR_MODE, + resource::TextureClearMode::Surface { + clear_view: Some(clear_view), + }, + ), + views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()), + bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()), }; let (id, resource) = fid.assign(Arc::new(texture)); @@ -324,7 +330,7 @@ impl Global { .textures .remove(texture.info.tracker_index()); let mut exclusive_snatch_guard = device.snatchable_lock.write(); - let suf = A::get_surface(&surface); + let suf = A::surface_as_hal(&surface); let mut inner = texture.inner_mut(&mut exclusive_snatch_guard); let inner = inner.as_mut().unwrap(); @@ -418,7 +424,7 @@ impl Global { .lock() .textures .remove(texture.info.tracker_index()); - let suf = A::get_surface(&surface); + let suf = A::surface_as_hal(&surface); let exclusive_snatch_guard = device.snatchable_lock.write(); match texture.inner.snatch(exclusive_snatch_guard).unwrap() { resource::TextureInner::Surface { mut raw, parent_id } => { diff --git a/wgpu-core/src/registry.rs b/wgpu-core/src/registry.rs index f78abcaa6a..f0f5674dae 100644 --- a/wgpu-core/src/registry.rs +++ b/wgpu-core/src/registry.rs @@ -1,11 +1,11 @@ use std::sync::Arc; -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use wgt::Backend; use crate::{ id::Id, identity::IdentityManager, + lock::{rank, RwLock, RwLockReadGuard, RwLockWriteGuard}, resource::Resource, storage::{Element, InvalidId, Storage}, }; @@ -38,6 +38,7 @@ impl RegistryReport { /// #[derive(Debug)] pub(crate) struct Registry { + // Must only contain an id which has either never been used or has been released from `storage` identity: Arc>, storage: RwLock>, backend: Backend, @@ -47,7 +48,7 @@ impl Registry { pub(crate) fn new(backend: Backend) -> Self { Self { identity: Arc::new(IdentityManager::new()), - storage: RwLock::new(Storage::new()), + storage: RwLock::new(rank::REGISTRY_STORAGE, Storage::new()), backend, } } @@ -98,9 +99,6 @@ impl FutureId<'_, T> { /// Assign an existing resource to a new ID. /// /// Registers it with the registry. - /// - /// This _will_ leak the ID, and it will not be recycled again. - /// See https://github.com/gfx-rs/wgpu/issues/4912. pub fn assign_existing(self, value: &Arc) -> Id { let mut data = self.data.write(); debug_assert!(!data.contains(self.id)); @@ -165,8 +163,11 @@ impl Registry { storage.insert_error(id, label); } pub(crate) fn unregister(&self, id: Id) -> Option> { - self.identity.free(id); let value = self.storage.write().remove(id); + // This needs to happen *after* removing it from the storage, to maintain the + // invariant that `self.identity` only contains ids which are actually available + // See https://github.com/gfx-rs/wgpu/issues/5372 + self.identity.free(id); //Returning None is legal if it's an error ID value } @@ -209,3 +210,53 @@ impl Registry { report } } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use crate::{ + id::Marker, + resource::{Resource, ResourceInfo, ResourceType}, + }; + + use super::Registry; + struct TestData { + info: ResourceInfo, + } + struct TestDataId; + impl Marker for TestDataId {} + + impl Resource for TestData { + type Marker = TestDataId; + + const TYPE: ResourceType = "Test data"; + + fn as_info(&self) -> &ResourceInfo { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo { + &mut self.info + } + } + + #[test] + fn simultaneous_registration() { + let registry = Registry::without_backend(); + std::thread::scope(|s| { + for _ in 0..5 { + s.spawn(|| { + for _ in 0..1000 { + let value = Arc::new(TestData { + info: ResourceInfo::new("Test data", None), + }); + let new_id = registry.prepare(None); + let (id, _) = new_id.assign(value); + registry.unregister(id); + } + }); + } + }) + } +} diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 11109e27f9..a6b945f417 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -13,6 +13,7 @@ use crate::{ TextureViewId, }, init_tracker::{BufferInitTracker, TextureInitTracker}, + lock::{Mutex, RwLock}, resource, resource_log, snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable}, track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex}, @@ -21,7 +22,6 @@ use crate::{ }; use hal::CommandEncoder; -use parking_lot::{Mutex, RwLock}; use smallvec::SmallVec; use thiserror::Error; use wgt::WasmNotSendSync; @@ -1026,7 +1026,9 @@ impl Global { profiling::scope!("Surface::as_hal"); let surface = self.surfaces.get(id).ok(); - let hal_surface = surface.as_ref().and_then(|surface| A::get_surface(surface)); + let hal_surface = surface + .as_ref() + .and_then(|surface| A::surface_as_hal(surface)); hal_surface_callback(hal_surface) } diff --git a/wgpu-core/src/snatch.rs b/wgpu-core/src/snatch.rs index d5cd1a3d37..08a1eba11d 100644 --- a/wgpu-core/src/snatch.rs +++ b/wgpu-core/src/snatch.rs @@ -1,6 +1,6 @@ #![allow(unused)] -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::lock::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{ backtrace::Backtrace, cell::{Cell, RefCell, UnsafeCell}, @@ -8,6 +8,8 @@ use std::{ thread, }; +use crate::lock::rank; + /// A guard that provides read access to snatchable data. pub struct SnatchGuard<'a>(RwLockReadGuard<'a, ()>); /// A guard that allows snatching the snatchable data. @@ -64,8 +66,58 @@ impl std::fmt::Debug for Snatchable { unsafe impl Sync for Snatchable {} +struct LockTrace { + purpose: &'static str, + caller: &'static Location<'static>, + backtrace: Backtrace, +} + +impl std::fmt::Display for LockTrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "a {} lock at {}\n{}", + self.purpose, self.caller, self.backtrace + ) + } +} + +#[cfg(debug_assertions)] +impl LockTrace { + #[track_caller] + fn enter(purpose: &'static str) { + let new = LockTrace { + purpose, + caller: Location::caller(), + backtrace: Backtrace::capture(), + }; + + if let Some(prev) = SNATCH_LOCK_TRACE.take() { + let current = thread::current(); + let name = current.name().unwrap_or(""); + panic!( + "thread '{name}' attempted to acquire a snatch lock recursively.\n\ + - Currently trying to acquire {new}\n\ + - Previously acquired {prev}", + ); + } else { + SNATCH_LOCK_TRACE.set(Some(new)); + } + } + + fn exit() { + SNATCH_LOCK_TRACE.take(); + } +} + +#[cfg(not(debug_assertions))] +impl LockTrace { + fn enter(purpose: &'static str) {} + fn exit() {} +} + thread_local! { - static READ_LOCK_LOCATION: Cell, Backtrace)>> = const { Cell::new(None) }; + static SNATCH_LOCK_TRACE: Cell> = const { Cell::new(None) }; } /// A Device-global lock for all snatchable data. @@ -78,31 +130,16 @@ impl SnatchLock { /// right SnatchLock (the one associated to the same device). This method is unsafe /// to force force sers to think twice about creating a SnatchLock. The only place this /// method should be called is when creating the device. - pub unsafe fn new() -> Self { + pub unsafe fn new(rank: rank::LockRank) -> Self { SnatchLock { - lock: RwLock::new(()), + lock: RwLock::new(rank, ()), } } /// Request read access to snatchable resources. #[track_caller] pub fn read(&self) -> SnatchGuard { - if cfg!(debug_assertions) { - let caller = Location::caller(); - let backtrace = Backtrace::capture(); - if let Some((prev, bt)) = READ_LOCK_LOCATION.take() { - let current = thread::current(); - let name = current.name().unwrap_or(""); - panic!( - "thread '{name}' attempted to acquire a snatch read lock recursively.\n - - {prev}\n{bt}\n - - {caller}\n{backtrace}" - ); - } else { - READ_LOCK_LOCATION.set(Some((caller, backtrace))); - } - } - + LockTrace::enter("read"); SnatchGuard(self.lock.read()) } @@ -111,14 +148,21 @@ impl SnatchLock { /// This should only be called when a resource needs to be snatched. This has /// a high risk of causing lock contention if called concurrently with other /// wgpu work. + #[track_caller] pub fn write(&self) -> ExclusiveSnatchGuard { + LockTrace::enter("write"); ExclusiveSnatchGuard(self.lock.write()) } } impl Drop for SnatchGuard<'_> { fn drop(&mut self) { - #[cfg(debug_assertions)] - READ_LOCK_LOCATION.take(); + LockTrace::exit(); + } +} + +impl Drop for ExclusiveSnatchGuard<'_> { + fn drop(&mut self) { + LockTrace::exit(); } } diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs index 6cf1fdda6f..9a52a53253 100644 --- a/wgpu-core/src/track/buffer.rs +++ b/wgpu-core/src/track/buffer.rs @@ -11,6 +11,7 @@ use super::{PendingTransition, ResourceTracker, TrackerIndex}; use crate::{ hal_api::HalApi, id::BufferId, + lock::{rank, Mutex}, resource::{Buffer, Resource}, snatch::SnatchGuard, storage::Storage, @@ -20,7 +21,6 @@ use crate::{ }, }; use hal::{BufferBarrier, BufferUses}; -use parking_lot::Mutex; use wgt::{strict_assert, strict_assert_eq}; impl ResourceUses for BufferUses { @@ -51,7 +51,7 @@ pub(crate) struct BufferBindGroupState { impl BufferBindGroupState { pub fn new() -> Self { Self { - buffers: Mutex::new(Vec::new()), + buffers: Mutex::new(rank::BUFFER_BIND_GROUP_STATE_BUFFERS, Vec::new()), _phantom: PhantomData, } @@ -245,6 +245,22 @@ impl BufferUsageScope { .get(id) .map_err(|_| UsageConflict::BufferInvalid { id })?; + self.insert_merge_single(buffer.clone(), new_state) + .map(|_| buffer) + } + + /// Merge a single state into the UsageScope, using an already resolved buffer. + /// + /// If the resulting state is invalid, returns a usage + /// conflict with the details of the invalid state. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_merge_single( + &mut self, + buffer: Arc>, + new_state: BufferUses, + ) -> Result<(), UsageConflict> { let index = buffer.info.tracker_index().as_usize(); self.allow_index(index); @@ -260,12 +276,12 @@ impl BufferUsageScope { index, BufferStateProvider::Direct { state: new_state }, ResourceMetadataProvider::Direct { - resource: Cow::Owned(buffer.clone()), + resource: Cow::Owned(buffer), }, )?; } - Ok(buffer) + Ok(()) } } diff --git a/wgpu-core/src/track/metadata.rs b/wgpu-core/src/track/metadata.rs index 3e71e0e084..d6e8d6f906 100644 --- a/wgpu-core/src/track/metadata.rs +++ b/wgpu-core/src/track/metadata.rs @@ -87,16 +87,18 @@ impl ResourceMetadata { /// Add the resource with the given index, epoch, and reference count to the /// set. /// + /// Returns a reference to the newly inserted resource. + /// (This allows avoiding a clone/reference count increase in many cases.) + /// /// # Safety /// /// The given `index` must be in bounds for this `ResourceMetadata`'s /// existing tables. See `tracker_assert_in_bounds`. #[inline(always)] - pub(super) unsafe fn insert(&mut self, index: usize, resource: Arc) { + pub(super) unsafe fn insert(&mut self, index: usize, resource: Arc) -> &Arc { self.owned.set(index, true); - unsafe { - *self.resources.get_unchecked_mut(index) = Some(resource); - } + let resource_dst = unsafe { self.resources.get_unchecked_mut(index) }; + resource_dst.insert(resource) } /// Get the resource with the given index. diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index f5b37f3756..7df13d039b 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -102,10 +102,14 @@ mod stateless; mod texture; use crate::{ - binding_model, command, conv, hal_api::HalApi, id, pipeline, resource, snatch::SnatchGuard, + binding_model, command, conv, + hal_api::HalApi, + id, + lock::{rank, Mutex, RwLock}, + pipeline, resource, + snatch::SnatchGuard, }; -use parking_lot::{Mutex, RwLock}; use std::{fmt, ops, sync::Arc}; use thiserror::Error; @@ -136,7 +140,8 @@ impl TrackerIndex { /// of a certain type. This index is separate from the resource ID for various reasons: /// - There can be multiple resource IDs pointing the the same resource. /// - IDs of dead handles can be recycled while resources are internally held alive (and tracked). -/// - The plan is to remove IDs in the long run (https://github.com/gfx-rs/wgpu/issues/5121). +/// - The plan is to remove IDs in the long run +/// ([#5121](https://github.com/gfx-rs/wgpu/issues/5121)). /// In order to produce these tracker indices, there is a shared TrackerIndexAllocator /// per resource type. Indices have the same lifetime as the internal resource they /// are associated to (alloc happens when creating the resource and free is called when @@ -190,7 +195,10 @@ pub(crate) struct SharedTrackerIndexAllocator { impl SharedTrackerIndexAllocator { pub fn new() -> Self { SharedTrackerIndexAllocator { - inner: Mutex::new(TrackerIndexAllocator::new()), + inner: Mutex::new( + rank::SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + TrackerIndexAllocator::new(), + ), } } @@ -487,11 +495,26 @@ impl RenderBundleScope { /// Create the render bundle scope and pull the maximum IDs from the hubs. pub fn new() -> Self { Self { - buffers: RwLock::new(BufferUsageScope::default()), - textures: RwLock::new(TextureUsageScope::default()), - bind_groups: RwLock::new(StatelessTracker::new()), - render_pipelines: RwLock::new(StatelessTracker::new()), - query_sets: RwLock::new(StatelessTracker::new()), + buffers: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_BUFFERS, + BufferUsageScope::default(), + ), + textures: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_TEXTURES, + TextureUsageScope::default(), + ), + bind_groups: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_BIND_GROUPS, + StatelessTracker::new(), + ), + render_pipelines: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_RENDER_PIPELINES, + StatelessTracker::new(), + ), + query_sets: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_QUERY_SETS, + StatelessTracker::new(), + ), } } @@ -650,8 +673,8 @@ impl Tracker { /// /// If a transition is needed to get the resources into the needed /// state, those transitions are stored within the tracker. A - /// subsequent call to [`BufferTracker::drain`] or - /// [`TextureTracker::drain`] is needed to get those transitions. + /// subsequent call to [`BufferTracker::drain_transitions`] or + /// [`TextureTracker::drain_transitions`] is needed to get those transitions. /// /// This is a really funky method used by Compute Passes to generate /// barriers after a call to dispatch without needing to iterate diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs index 00225f2305..25ffc027ee 100644 --- a/wgpu-core/src/track/stateless.rs +++ b/wgpu-core/src/track/stateless.rs @@ -6,9 +6,14 @@ use std::sync::Arc; -use parking_lot::Mutex; - -use crate::{id::Id, resource::Resource, resource_log, storage::Storage, track::ResourceMetadata}; +use crate::{ + id::Id, + lock::{rank, Mutex}, + resource::Resource, + resource_log, + storage::Storage, + track::ResourceMetadata, +}; use super::{ResourceTracker, TrackerIndex}; @@ -24,7 +29,7 @@ pub(crate) struct StatelessBindGroupSate { impl StatelessBindGroupSate { pub fn new() -> Self { Self { - resources: Mutex::new(Vec::new()), + resources: Mutex::new(rank::STATELESS_BIND_GROUP_STATE_RESOURCES, Vec::new()), } } @@ -153,16 +158,17 @@ impl StatelessTracker { /// /// If the ID is higher than the length of internal vectors, /// the vectors will be extended. A call to set_size is not needed. - pub fn insert_single(&mut self, resource: Arc) { + /// + /// Returns a reference to the newly inserted resource. + /// (This allows avoiding a clone/reference count increase in many cases.) + pub fn insert_single(&mut self, resource: Arc) -> &Arc { let index = resource.as_info().tracker_index().as_usize(); self.allow_index(index); self.tracker_assert_in_bounds(index); - unsafe { - self.metadata.insert(index, resource); - } + unsafe { self.metadata.insert(index, resource) } } /// Adds the given resource to the tracker. diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs index 3cf95ff38a..51ed72a18d 100644 --- a/wgpu-core/src/track/texture.rs +++ b/wgpu-core/src/track/texture.rs @@ -24,6 +24,7 @@ use super::{ }; use crate::{ hal_api::HalApi, + lock::{rank, Mutex}, resource::{Resource, Texture, TextureInner}, snatch::SnatchGuard, track::{ @@ -36,7 +37,6 @@ use hal::TextureUses; use arrayvec::ArrayVec; use naga::FastHashMap; -use parking_lot::Mutex; use wgt::{strict_assert, strict_assert_eq}; use std::{borrow::Cow, iter, marker::PhantomData, ops::Range, sync::Arc, vec::Drain}; @@ -164,7 +164,7 @@ pub(crate) struct TextureBindGroupState { impl TextureBindGroupState { pub fn new() -> Self { Self { - textures: Mutex::new(Vec::new()), + textures: Mutex::new(rank::TEXTURE_BIND_GROUP_STATE_TEXTURES, Vec::new()), } } diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index ab21c6dfe3..dafcb3a1ab 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-hal" -version = "0.19.3" +version = "0.20.0" authors = ["gfx-rs developers"] edition = "2021" description = "WebGPU hardware abstraction layer" @@ -110,13 +110,13 @@ glow = { version = "0.13.1", optional = true } [dependencies.wgt] package = "wgpu-types" path = "../wgpu-types" -version = "0.19.2" +version = "0.20.0" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] # backend: Vulkan ash = { version = "0.37.3", optional = true } gpu-alloc = { version = "0.6", optional = true } -gpu-descriptor = { version = "0.2", optional = true } +gpu-descriptor = { version = "0.3", optional = true } smallvec = { version = "1", optional = true, features = ["union"] } khronos-egl = { version = "6", features = ["dynamic"], optional = true } @@ -147,7 +147,7 @@ winapi = { version = "0.3", features = [ "winuser", "dcomp", ] } -d3d12 = { path = "../d3d12/", version = "0.19.0", optional = true, features = [ +d3d12 = { path = "../d3d12/", version = "0.20.0", optional = true, features = [ "libloading", ] } @@ -155,7 +155,7 @@ d3d12 = { path = "../d3d12/", version = "0.19.0", optional = true, features = [ # backend: Metal block = { version = "0.1", optional = true } -metal = { version = "0.27.0", git = "https://github.com/gfx-rs/metal-rs", rev = "ff8fd3d6dc7792852f8a015458d7e6d42d7fb352" } +metal = { version = "0.28.0" } objc = "0.2.5" core-graphics-types = "0.1" @@ -178,7 +178,7 @@ ndk-sys = { version = "0.5.0", optional = true } [dependencies.naga] path = "../naga" -version = "0.19.2" +version = "0.20.0" [build-dependencies] cfg_aliases.workspace = true @@ -186,13 +186,13 @@ cfg_aliases.workspace = true # DEV dependencies [dev-dependencies.naga] path = "../naga" -version = "0.19.2" +version = "0.20.0" features = ["wgsl-in"] [dev-dependencies] cfg-if = "1" env_logger = "0.11" -glam = "0.25.0" # for ray-traced-triangle example +glam = "0.27.0" # for ray-traced-triangle example winit = { version = "0.29.14", features = [ "android-native-activity", ] } # for "halmark" example diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 29dfd49d28..aef6919c8f 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -253,12 +253,14 @@ impl Example { module: &shader, entry_point: "vs_main", constants: &constants, + zero_initialize_workgroup_memory: true, }, vertex_buffers: &[], fragment_stage: Some(hal::ProgrammableStage { module: &shader, entry_point: "fs_main", constants: &constants, + zero_initialize_workgroup_memory: true, }), primitive: wgt::PrimitiveState { topology: wgt::PrimitiveTopology::TriangleStrip, @@ -843,6 +845,7 @@ fn main() { } } ex.render(); + window.request_redraw(); } _ => { example.as_mut().unwrap().update(event); diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 2ed2d64627..3985cd60af 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -372,6 +372,7 @@ impl Example { module: &shader_module, entry_point: "main", constants: &Default::default(), + zero_initialize_workgroup_memory: true, }, }) } diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 2b7040720e..faf25cc852 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -115,18 +115,6 @@ impl super::Adapter { ) }); - let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = - d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { - HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, - }; - assert_eq!(0, unsafe { - device.CheckFeatureSupport( - d3d12_ty::D3D12_FEATURE_SHADER_MODEL, - &mut shader_model_support as *mut _ as *mut _, - mem::size_of::() as _, - ) - }); - let mut workarounds = super::Workarounds::default(); let info = wgt::AdapterInfo { @@ -321,7 +309,7 @@ impl super::Adapter { wgt::Features::TEXTURE_BINDING_ARRAY | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, - shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + shader_model >= naga::back::hlsl::ShaderModel::V5_1, ); let bgra8unorm_storage_supported = { @@ -343,21 +331,28 @@ impl super::Adapter { bgra8unorm_storage_supported, ); - // we must be using DXC because uint64_t was added with Shader Model 6 - // and FXC only supports up to 5.1 - let int64_shader_ops_supported = dxc_container.is_some() && { - let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = - unsafe { mem::zeroed() }; - let hr = unsafe { - device.CheckFeatureSupport( - d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1, - &mut features1 as *mut _ as *mut _, - mem::size_of::() as _, - ) - }; - hr == 0 && features1.Int64ShaderOps != 0 + let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1, + &mut features1 as *mut _ as *mut _, + mem::size_of::() as _, + ) }; - features.set(wgt::Features::SHADER_INT64, int64_shader_ops_supported); + + features.set( + wgt::Features::SHADER_INT64, + shader_model >= naga::back::hlsl::ShaderModel::V6_0 + && hr == 0 + && features1.Int64ShaderOps != 0, + ); + + features.set( + wgt::Features::SUBGROUP, + shader_model >= naga::back::hlsl::ShaderModel::V6_0 + && hr == 0 + && features1.WaveOps != 0, + ); // float32-filterable should always be available on d3d12 features.set(wgt::Features::FLOAT32_FILTERABLE, true); @@ -425,6 +420,8 @@ impl super::Adapter { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable + max_subgroup_size: 128, // The push constants are part of the root signature which // has a limit of 64 DWORDS (256 bytes), but other resources // also share the root signature: diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 2b6c1d959e..b09ea76080 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -224,7 +224,7 @@ pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { } /// D3D12 doesn't support passing factors ending in `_COLOR` for alpha blending -/// (see https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc). +/// (see ). /// Therefore this function takes an additional `is_alpha` argument /// which if set will return an equivalent `_ALPHA` factor. fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index f4539817d3..82075294ee 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -226,9 +226,20 @@ impl super::Device { ) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?; + let needs_temp_options = stage.zero_initialize_workgroup_memory + != layout.naga_options.zero_initialize_workgroup_memory; + let mut temp_options; + let naga_options = if needs_temp_options { + temp_options = layout.naga_options.clone(); + temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; + &temp_options + } else { + &layout.naga_options + }; + //TODO: reuse the writer let mut source = String::new(); - let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let mut writer = hlsl::Writer::new(&mut source, naga_options); let reflection_info = { profiling::scope!("naga::back::hlsl::write"); writer @@ -239,7 +250,7 @@ impl super::Device { let full_stage = format!( "{}_{}\0", naga_stage.to_hlsl_str(), - layout.naga_options.shader_model.to_str() + naga_options.shader_model.to_str() ); let ep_index = module diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 735732ef29..9f021bc241 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -440,7 +440,7 @@ impl Texture { } } - /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + /// see fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count } diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index b9d044337c..052c77006b 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -104,7 +104,7 @@ impl super::Adapter { } } - fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo { + fn make_info(vendor_orig: String, renderer_orig: String, version: String) -> wgt::AdapterInfo { let vendor = vendor_orig.to_lowercase(); let renderer = renderer_orig.to_lowercase(); @@ -179,13 +179,33 @@ impl super::Adapter { 0 }; + let driver; + let driver_info; + if version.starts_with("WebGL ") || version.starts_with("OpenGL ") { + let es_sig = " ES"; + match version.find(es_sig) { + Some(pos) => { + driver = version[..pos + es_sig.len()].to_owned(); + driver_info = version[pos + es_sig.len() + 1..].to_owned(); + } + None => { + let pos = version.find(' ').unwrap(); + driver = version[..pos].to_owned(); + driver_info = version[pos + 1..].to_owned(); + } + } + } else { + driver = "OpenGL".to_owned(); + driver_info = version; + } + wgt::AdapterInfo { name: renderer_orig, vendor: vendor_id, device: 0, device_type: inferred_device_type, - driver: String::new(), - driver_info: String::new(), + driver, + driver_info, backend: wgt::Backend::Gl, } } @@ -507,8 +527,7 @@ impl super::Adapter { let has_etc = if cfg!(any(webgl, Emscripten)) { extensions.contains("WEBGL_compressed_texture_etc") } else { - // This is a required part of GLES3, but not part of Desktop GL at all. - es_ver.is_some() + es_ver.is_some() || extensions.contains("GL_ARB_ES3_compatibility") }; features.set(wgt::Features::TEXTURE_COMPRESSION_ETC2, has_etc); @@ -728,6 +747,8 @@ impl super::Adapter { } else { !0 }, + min_subgroup_size: 0, + max_subgroup_size: 0, max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, @@ -825,7 +846,7 @@ impl super::Adapter { max_msaa_samples: max_samples, }), }, - info: Self::make_info(vendor, renderer), + info: Self::make_info(vendor, renderer, version), features, capabilities: crate::Capabilities { limits, diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 921941735c..a1e2736aa6 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -255,11 +255,23 @@ impl super::Device { }; let mut output = String::new(); + let needs_temp_options = stage.zero_initialize_workgroup_memory + != context.layout.naga_options.zero_initialize_workgroup_memory; + let mut temp_options; + let naga_options = if needs_temp_options { + // We use a conditional here, as cloning the naga_options could be expensive + // That is, we want to avoid doing that unless we cannot avoid it + temp_options = context.layout.naga_options.clone(); + temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; + &temp_options + } else { + &context.layout.naga_options + }; let mut writer = glsl::Writer::new( &mut output, &module, &info, - &context.layout.naga_options, + naga_options, &pipeline_options, policies, ) @@ -305,6 +317,7 @@ impl super::Device { naga_stage: naga_stage.to_owned(), shader_id: stage.module.id, entry_point: stage.entry_point.to_owned(), + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, }); } let mut guard = self diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs index b166f4f102..7494dcad76 100644 --- a/wgpu-hal/src/gles/egl.rs +++ b/wgpu-hal/src/gles/egl.rs @@ -526,7 +526,24 @@ impl Inner { } let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?; - egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap(); + + let supports_opengl = if version >= (1, 4) { + let client_apis = egl + .query_string(Some(display), khronos_egl::CLIENT_APIS) + .unwrap() + .to_string_lossy(); + client_apis + .split(' ') + .any(|client_api| client_api == "OpenGL") + } else { + false + }; + egl.bind_api(if supports_opengl { + khronos_egl::OPENGL_API + } else { + khronos_egl::OPENGL_ES_API + }) + .unwrap(); let needs_robustness = true; let mut khr_context_flags = 0; @@ -977,6 +994,7 @@ impl crate::Instance for Instance { srgb_kind: inner.srgb_kind, }) } + unsafe fn destroy_surface(&self, _surface: Surface) {} unsafe fn enumerate_adapters(&self) -> Vec> { @@ -993,6 +1011,12 @@ impl crate::Instance for Instance { }) }; + // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions, + // as otherwise the user has to do the sRGB conversion. + if !matches!(inner.srgb_kind, SrgbFrameBufferKind::None) { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + if self.flags.contains(wgt::InstanceFlags::DEBUG) && gl.supports_debug() { log::debug!("Max label length: {}", unsafe { gl.get_parameter_i32(glow::MAX_LABEL_LENGTH) diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 6f41f7c000..0fcb09be46 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -602,6 +602,7 @@ struct ProgramStage { naga_stage: naga::ShaderStage, shader_id: ShaderId, entry_point: String, + zero_initialize_workgroup_memory: bool, } #[derive(PartialEq, Eq, Hash)] diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 29dfb79d04..7c728d3978 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -213,12 +213,27 @@ impl super::Queue { instance_count, ref first_instance_location, } => { - match base_vertex { - 0 => { - unsafe { - gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) - }; + let supports_full_instancing = self + .shared + .private_caps + .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); + if supports_full_instancing { + unsafe { + gl.draw_elements_instanced_base_vertex_base_instance( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + first_instance, + ) + } + } else { + unsafe { gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) }; + + if base_vertex == 0 { unsafe { // Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`. // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`. @@ -231,41 +246,17 @@ impl super::Queue { instance_count as i32, ) } - } - _ => { - let supports_full_instancing = self - .shared - .private_caps - .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); - - if supports_full_instancing { - unsafe { - gl.draw_elements_instanced_base_vertex_base_instance( - topology, - index_count as i32, - index_type, - index_offset as i32, - instance_count as i32, - base_vertex, - first_instance, - ) - } - } else { - unsafe { - gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) - }; - - // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. - unsafe { - gl.draw_elements_instanced_base_vertex( - topology, - index_count as _, - index_type, - index_offset as i32, - instance_count as i32, - base_vertex, - ) - } + } else { + // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. + unsafe { + gl.draw_elements_instanced_base_vertex( + topology, + index_count as _, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + ) } } } diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs index 2564892969..aae70478b4 100644 --- a/wgpu-hal/src/gles/wgl.rs +++ b/wgpu-hal/src/gles/wgl.rs @@ -507,6 +507,8 @@ impl crate::Instance for Instance { .supported_extensions() .contains("GL_ARB_framebuffer_sRGB"); + // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions, + // as otherwise the user has to do the sRGB conversion. if srgb_capable { unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index ddcb0634fe..d300ca30cc 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -3,14 +3,14 @@ * This crate defines a set of traits abstracting over modern graphics APIs, * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL. * - * `wgpu_hal` is a spiritual successor to + * `wgpu-hal` is a spiritual successor to * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and * oriented towards WebGPU implementation goals. It has no overhead for * validation or tracking, and the API translation overhead is kept to the bare * minimum by the design of WebGPU. This API can be used for resource-demanding * applications and engines. * - * The `wgpu_hal` crate's main design choices: + * The `wgpu-hal` crate's main design choices: * * - Our traits are meant to be *portable*: proper use * should get equivalent results regardless of the backend. @@ -19,7 +19,7 @@ * validation, if any, and incorrect use will often cause undefined behavior. * This allows us to minimize the overhead we impose over the underlying * graphics system. If you need safety, the [`wgpu-core`] crate provides a - * safe API for driving `wgpu_hal`, implementing all necessary validation, + * safe API for driving `wgpu-hal`, implementing all necessary validation, * resource state tracking, and so on. (Note that `wgpu-core` is designed for * use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for * `wgpu-core`.) Or, you can do your own validation. @@ -27,7 +27,7 @@ * - In the same vein, returned errors *only cover cases the user can't * anticipate*, like running out of memory or losing the device. Any errors * that the user could reasonably anticipate are their responsibility to - * avoid. For example, `wgpu_hal` returns no error for mapping a buffer that's + * avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's * not mappable: as the buffer creator, the user should already know if they * can map it. * @@ -43,7 +43,7 @@ * - We map buffer contents *persistently*. This means that the buffer * can remain mapped on the CPU while the GPU reads or writes to it. * You must explicitly indicate when data might need to be - * transferred between CPU and GPU, if `wgpu_hal` indicates that the + * transferred between CPU and GPU, if `wgpu-hal` indicates that the * mapping is not coherent (that is, automatically synchronized * between the two devices). * @@ -62,7 +62,7 @@ * function documentation. For this reason, we recommend that iterators don't * do any mutating work. * - * Unfortunately, `wgpu_hal`'s safety requirements are not fully documented. + * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented. * Ideally, all trait methods would have doc comments setting out the * requirements users must meet to ensure correct and portable behavior. If you * are aware of a specific requirement that a backend imposes that is not @@ -76,7 +76,7 @@ * * ## Primary backends * - * The `wgpu_hal` crate has full-featured backends implemented on the following + * The `wgpu-hal` crate has full-featured backends implemented on the following * platform graphics APIs: * * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's @@ -93,7 +93,7 @@ * * ## Secondary backends * - * The `wgpu_hal` crate has a partial implementation based on the following + * The `wgpu-hal` crate has a partial implementation based on the following * platform graphics API: * * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are @@ -110,6 +110,92 @@ * * [tdc]: wgt::DownlevelCapabilities * + * ## Traits + * + * The `wgpu-hal` crate defines a handful of traits that together + * represent a cross-platform abstraction for modern GPU APIs. + * + * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its + * own, only a collection of associated types. + * + * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`] + * creates an instance value, which you can use to enumerate the adapters + * available on the system. For example, [`vulkan::Api::Instance::init`][Ii] + * returns an instance that can enumerate the Vulkan physical devices on your + * system. + * + * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a + * particular device from a particular backend. For example, a Vulkan instance + * might have a Lavapipe software adapter and a GPU-based adapter. + * + * - [`Api::Device`] implements the [`Device`] trait, representing an active + * link to a device. You get a device value by calling [`Adapter::open`], and + * then use it to create buffers, textures, shader modules, and so on. + * + * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit + * command buffers to a given device. + * + * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you + * use to build buffers of commands to submit to a queue. This has all the + * methods for drawing and running compute shaders, which is presumably what + * you're here for. + * + * - [`Api::Surface`] implements the [`Surface`] trait, which represents a + * swapchain for presenting images on the screen, via interaction with the + * system's window manager. + * + * The [`Api`] trait has various other associated types like [`Api::Buffer`] and + * [`Api::Texture`] that represent resources the rest of the interface can + * operate on, but these generally do not have their own traits. + * + * [Ii]: Instance::init + * + * ## Validation is the calling code's responsibility, not `wgpu-hal`'s + * + * As much as possible, `wgpu-hal` traits place the burden of validation, + * resource tracking, and state tracking on the caller, not on the trait + * implementations themselves. Anything which can reasonably be handled in + * backend-independent code should be. A `wgpu_hal` backend's sole obligation is + * to provide portable behavior, and report conditions that the calling code + * can't reasonably anticipate, like device loss or running out of memory. + * + * The `wgpu` crate collection is intended for use in security-sensitive + * applications, like web browsers, where the API is available to untrusted + * code. This means that `wgpu-core`'s validation is not simply a service to + * developers, to be provided opportunistically when the performance costs are + * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s + * validation must be exhaustive, to ensure that even malicious content cannot + * provoke and exploit undefined behavior in the platform's graphics API. + * + * Because graphics APIs' requirements are complex, the only practical way for + * `wgpu` to provide exhaustive validation is to comprehensively track the + * lifetime and state of all the resources in the system. Implementing this + * separately for each backend is infeasible; effort would be better spent + * making the cross-platform validation in `wgpu-core` legible and trustworthy. + * Fortunately, the requirements are largely similar across the various + * platforms, so cross-platform validation is practical. + * + * Some backends have specific requirements that aren't practical to foist off + * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or + * Microsoft COM reference counts is best handled by using appropriate pointer + * types within the backend. + * + * A desire for "defense in depth" may suggest performing additional validation + * in `wgpu-hal` when the opportunity arises, but this must be done with + * caution. Even experienced contributors infer the expectations their changes + * must meet by considering not just requirements made explicit in types, tests, + * assertions, and comments, but also those implicit in the surrounding code. + * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting + * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry + * about it - that would be redundant!" The responsibility for exhaustive + * validation always rests with `wgpu-core`, regardless of what may or may not + * be checked in `wgpu-hal`. + * + * To this end, any "defense in depth" validation that does appear in `wgpu-hal` + * for requirements that `wgpu-core` should have enforced should report failure + * via the `unreachable!` macro, because problems detected at this stage always + * indicate a bug in `wgpu-core`. + * * ## Debugging * * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug] @@ -303,6 +389,15 @@ pub trait Api: Clone + fmt::Debug + Sized { type Queue: Queue; type CommandEncoder: CommandEncoder; + + /// This API's command buffer type. + /// + /// The only thing you can do with `CommandBuffer`s is build them + /// with a [`CommandEncoder`] and then pass them to + /// [`Queue::submit`] for execution, or destroy them by passing + /// them to [`CommandEncoder::reset_all`]. + /// + /// [`CommandEncoder`]: Api::CommandEncoder type CommandBuffer: WasmNotSendSync + fmt::Debug; type Buffer: fmt::Debug + WasmNotSendSync + 'static; @@ -311,6 +406,24 @@ pub trait Api: Clone + fmt::Debug + Sized { type TextureView: fmt::Debug + WasmNotSendSync; type Sampler: fmt::Debug + WasmNotSendSync; type QuerySet: fmt::Debug + WasmNotSendSync; + + /// A value you can block on to wait for something to finish. + /// + /// A `Fence` holds a monotonically increasing [`FenceValue`]. You can call + /// [`Device::wait`] to block until a fence reaches or passes a value you + /// choose. [`Queue::submit`] can take a `Fence` and a [`FenceValue`] to + /// store in it when the submitted work is complete. + /// + /// Attempting to set a fence to a value less than its current value has no + /// effect. + /// + /// Waiting on a fence returns as soon as the fence reaches *or passes* the + /// requested value. This implies that, in order to reliably determine when + /// an operation has completed, operations must finish in order of + /// increasing fence values: if a higher-valued operation were to finish + /// before a lower-valued operation, then waiting for the fence to reach the + /// lower value could return before the lower-valued operation has actually + /// finished. type Fence: fmt::Debug + WasmNotSendSync; type BindGroupLayout: fmt::Debug + WasmNotSendSync; @@ -510,7 +623,25 @@ pub trait Device: WasmNotSendSync { &self, fence: &::Fence, ) -> Result; - /// Calling wait with a lower value than the current fence value will immediately return. + + /// Wait for `fence` to reach `value`. + /// + /// Operations like [`Queue::submit`] can accept a [`Fence`] and a + /// [`FenceValue`] to store in it, so you can use this `wait` function + /// to wait for a given queue submission to finish execution. + /// + /// The `value` argument must be a value that some actual operation you have + /// already presented to the device is going to store in `fence`. You cannot + /// wait for values yet to be submitted. (This restriction accommodates + /// implementations like the `vulkan` backend's [`FencePool`] that must + /// allocate a distinct synchronization object for each fence value one is + /// able to wait for.) + /// + /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value + /// returns immediately. + /// + /// [`Fence`]: Api::Fence + /// [`FencePool`]: vulkan/enum.Fence.html#variant.FencePool unsafe fn wait( &self, fence: &::Fence, @@ -542,14 +673,48 @@ pub trait Device: WasmNotSendSync { pub trait Queue: WasmNotSendSync { type A: Api; - /// Submits the command buffers for execution on GPU. + /// Submit `command_buffers` for execution on GPU. + /// + /// If `signal_fence` is `Some(fence, value)`, update `fence` to `value` + /// when the operation is complete. See [`Fence`] for details. + /// + /// If two calls to `submit` on a single `Queue` occur in a particular order + /// (that is, they happen on the same thread, or on two threads that have + /// synchronized to establish an ordering), then the first submission's + /// commands all complete execution before any of the second submission's + /// commands begin. All results produced by one submission are visible to + /// the next. + /// + /// Within a submission, command buffers execute in the order in which they + /// appear in `command_buffers`. All results produced by one buffer are + /// visible to the next. + /// + /// If two calls to `submit` on a single `Queue` from different threads are + /// not synchronized to occur in a particular order, they must pass distinct + /// [`Fence`]s. As explained in the [`Fence`] documentation, waiting for + /// operations to complete is only trustworthy when operations finish in + /// order of increasing fence value, but submissions from different threads + /// cannot determine how to order the fence values if the submissions + /// themselves are unordered. If each thread uses a separate [`Fence`], this + /// problem does not arise. /// /// Valid usage: - /// - all of the command buffers were created from command pools - /// that are associated with this queue. - /// - all of the command buffers had `CommandBuffer::finish()` called. - /// - all surface textures that the command buffers write to must be - /// passed to the surface_textures argument. + /// + /// - All of the [`CommandBuffer`][cb]s were created from + /// [`CommandEncoder`][ce]s that are associated with this queue. + /// + /// - All of those [`CommandBuffer`][cb]s must remain alive until + /// the submitted commands have finished execution. (Since + /// command buffers must not outlive their encoders, this + /// implies that the encoders must remain alive as well.) + /// + /// - All of the [`SurfaceTexture`][st]s that the command buffers + /// write to appear in the `surface_textures` argument. + /// + /// [`Fence`]: Api::Fence + /// [cb]: Api::CommandBuffer + /// [ce]: Api::CommandEncoder + /// [st]: Api::SurfaceTexture unsafe fn submit( &self, command_buffers: &[&::CommandBuffer], @@ -564,7 +729,12 @@ pub trait Queue: WasmNotSendSync { unsafe fn get_timestamp_period(&self) -> f32; } -/// Encoder and allocation pool for `CommandBuffer`. +/// Encoder and allocation pool for `CommandBuffer`s. +/// +/// A `CommandEncoder` not only constructs `CommandBuffer`s but also +/// acts as the allocation pool that owns the buffers' underlying +/// storage. Thus, `CommandBuffer`s must not outlive the +/// `CommandEncoder` that created them. /// /// The life cycle of a `CommandBuffer` is as follows: /// @@ -577,14 +747,17 @@ pub trait Queue: WasmNotSendSync { /// /// - Call methods like `copy_buffer_to_buffer`, `begin_render_pass`, /// etc. on a "recording" `CommandEncoder` to add commands to the -/// list. +/// list. (If an error occurs, you must call `discard_encoding`; see +/// below.) /// /// - Call `end_encoding` on a recording `CommandEncoder` to close the /// encoder and construct a fresh `CommandBuffer` consisting of the /// list of commands recorded up to that point. /// /// - Call `discard_encoding` on a recording `CommandEncoder` to drop -/// the commands recorded thus far and close the encoder. +/// the commands recorded thus far and close the encoder. This is +/// the only safe thing to do on a `CommandEncoder` if an error has +/// occurred while recording commands. /// /// - Call `reset_all` on a closed `CommandEncoder`, passing all the /// live `CommandBuffers` built from it. All the `CommandBuffer`s @@ -602,6 +775,10 @@ pub trait Queue: WasmNotSendSync { /// built it. /// /// - A `CommandEncoder` must not outlive its `Device`. +/// +/// It is the user's responsibility to meet this requirements. This +/// allows `CommandEncoder` implementations to keep their state +/// tracking to a minimum. pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { type A: Api; @@ -614,13 +791,20 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// This `CommandEncoder` must be in the "closed" state. unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>; - /// Discard the command list under construction, if any. + /// Discard the command list under construction. + /// + /// If an error has occurred while recording commands, this + /// is the only safe thing to do with the encoder. /// /// This puts this `CommandEncoder` in the "closed" state. /// /// # Safety /// /// This `CommandEncoder` must be in the "recording" state. + /// + /// Callers must not assume that implementations of this + /// function are idempotent, and thus should not call it + /// multiple times in a row. unsafe fn discard_encoding(&mut self); /// Return a fresh [`CommandBuffer`] holding the recorded commands. @@ -1425,6 +1609,11 @@ pub struct ProgrammableStage<'a, A: Api> { pub entry_point: &'a str, /// Pipeline constants pub constants: &'a naga::back::PipelineConstants, + /// Whether workgroup scoped memory will be initialized with zero values for this stage. + /// + /// This is required by the WebGPU spec, but may have overhead which can be avoided + /// for cross-platform applications + pub zero_initialize_workgroup_memory: bool, } // Rust gets confused about the impl requirements for `A` @@ -1434,6 +1623,7 @@ impl Clone for ProgrammableStage<'_, A> { module: self.module, entry_point: self.entry_point, constants: self.constants, + zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, } } } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index b67d5c6f97..cddba472bd 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -813,6 +813,14 @@ impl super::PrivateCapabilities { None }, timestamp_query_support, + supports_simd_scoped_operations: family_check + && (device.supports_family(MTLGPUFamily::Metal3) + || device.supports_family(MTLGPUFamily::Mac2) + || device.supports_family(MTLGPUFamily::Apple7)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=5 + int64: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Metal3)), } } @@ -886,7 +894,7 @@ impl super::PrivateCapabilities { } features.set( F::SHADER_INT64, - self.msl_version >= MTLLanguageVersion::V2_3, + self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, ); features.set( @@ -898,6 +906,10 @@ impl super::PrivateCapabilities { features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); + if self.supports_simd_scoped_operations { + features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); + } + features } @@ -952,6 +964,8 @@ impl super::PrivateCapabilities { max_vertex_buffers: self.max_vertex_buffers, max_vertex_attributes: 31, max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + min_subgroup_size: 4, + max_subgroup_size: 64, max_push_constant_size: 0x1000, min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, min_storage_buffer_offset_alignment: self.buffer_alignment as u32, diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 0906d21510..2c8f5a2bfb 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -112,7 +112,7 @@ impl super::Device { // TODO: support bounds checks on binding arrays binding_array: naga::proc::BoundsCheckPolicy::Unchecked, }, - zero_initialize_workgroup_memory: true, + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, }; let pipeline_options = naga::back::msl::PipelineOptions { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 6aeafb0f86..7d547cfe3c 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -269,6 +269,8 @@ struct PrivateCapabilities { supports_shader_primitive_index: bool, has_unified_memory: Option, timestamp_query_support: TimestampQuerySupport, + supports_simd_scoped_operations: bool, + int64: bool, } #[derive(Clone, Debug)] @@ -649,7 +651,7 @@ struct BufferResource { /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can /// hold WGSL runtime-sized arrays. When one does, we must pass its size to /// shader entry points to implement bounds checks and WGSL's `arrayLength` - /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// function. See `device::CompiledShader::sized_bindings` for details. /// /// [`Storage`]: wgt::BufferBindingType::Storage binding_size: Option, @@ -680,12 +682,12 @@ struct PipelineStageInfo { /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. sizes_slot: Option, /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. sized_bindings: Vec, } @@ -801,7 +803,7 @@ struct CommandState { /// /// Specifically: /// - /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// - The keys are [`ResourceBinding`] values (that is, the WGSL `@group` /// and `@binding` attributes) for `var` global variables in the /// current module that contain runtime-sized arrays. /// @@ -813,7 +815,7 @@ struct CommandState { /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must /// pass to the entry point. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. /// /// [`ResourceBinding`]: naga::ResourceBinding storage_buffer_length_map: rustc_hash::FxHashMap, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 245c0f0933..f1700d7f55 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -35,6 +35,8 @@ fn indexing_features() -> wgt::Features { /// [`PhysicalDeviceFeatures::from_extensions_and_requested_features`] /// constructs an value of this type indicating which Vulkan features to /// enable, based on the `wgpu_types::Features` requested. +/// +/// [`Instance::expose_adapter`]: super::Instance::expose_adapter #[derive(Debug, Default)] pub struct PhysicalDeviceFeatures { /// Basic Vulkan 1.0 features. @@ -86,6 +88,9 @@ pub struct PhysicalDeviceFeatures { /// /// However, we do populate this when creating a device if /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`] is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter + /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`]: wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE buffer_device_address: Option, /// Features provided by `VK_KHR_ray_query`, @@ -95,12 +100,17 @@ pub struct PhysicalDeviceFeatures { /// this from `vkGetPhysicalDeviceFeatures2`. /// /// However, we do populate this when creating a device if ray tracing is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter ray_query: Option, /// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted /// to Vulkan 1.3. zero_initialize_workgroup_memory: Option, + + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. + subgroup_size_control: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -148,6 +158,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.subgroup_size_control { + info = info.push_next(feature); + } info } @@ -175,6 +188,7 @@ impl PhysicalDeviceFeatures { /// [`Features`]: wgt::Features /// [`DownlevelFlags`]: wgt::DownlevelFlags /// [`PrivateCapabilities`]: super::PrivateCapabilities + /// [`add_to_device_create_builder`]: PhysicalDeviceFeatures::add_to_device_create_builder /// [`DeviceCreateInfoBuilder`]: vk::DeviceCreateInfoBuilder /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( @@ -434,6 +448,17 @@ impl PhysicalDeviceFeatures { } else { None }, + subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtSubgroupSizeControlFn::name()) + { + Some( + vk::PhysicalDeviceSubgroupSizeControlFeatures::builder() + .subgroup_size_control(true) + .build(), + ) + } else { + None + }, } } @@ -442,6 +467,9 @@ impl PhysicalDeviceFeatures { /// Given `self`, together with the instance and physical device it was /// built from, and a `caps` also built from those, determine which wgpu /// features and downlevel flags the device can support. + /// + /// [`Features`]: wgt::Features + /// [`DownlevelFlags`]: wgt::DownlevelFlags fn to_wgpu( &self, instance: &ash::Instance, @@ -638,6 +666,34 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref subgroup) = caps.subgroup { + if (caps.device_api_version >= vk::API_VERSION_1_3 + || caps.supports_extension(vk::ExtSubgroupSizeControlFn::name())) + && subgroup.supported_operations.contains( + vk::SubgroupFeatureFlags::BASIC + | vk::SubgroupFeatureFlags::VOTE + | vk::SubgroupFeatureFlags::ARITHMETIC + | vk::SubgroupFeatureFlags::BALLOT + | vk::SubgroupFeatureFlags::SHUFFLE + | vk::SubgroupFeatureFlags::SHUFFLE_RELATIVE, + ) + { + features.set( + F::SUBGROUP, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::COMPUTE | vk::ShaderStageFlags::FRAGMENT), + ); + features.set( + F::SUBGROUP_VERTEX, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::VERTEX), + ); + features.insert(F::SUBGROUP_BARRIER); + } + } + let supports_depth_format = |format| { supports_format( instance, @@ -773,6 +829,13 @@ pub struct PhysicalDeviceProperties { /// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2. driver: Option, + /// Additional `vk::PhysicalDevice` properties from Vulkan 1.1. + subgroup: Option, + + /// Additional `vk::PhysicalDevice` properties from the + /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3. + subgroup_size_control: Option, + /// The device API version. /// /// Which is the version of Vulkan supported for device-level functionality. @@ -888,6 +951,11 @@ impl PhysicalDeviceProperties { if self.supports_extension(vk::ExtImageRobustnessFn::name()) { extensions.push(vk::ExtImageRobustnessFn::name()); } + + // Require `VK_EXT_subgroup_size_control` if the associated feature was requested + if requested_features.contains(wgt::Features::SUBGROUP) { + extensions.push(vk::ExtSubgroupSizeControlFn::name()); + } } // Optional `VK_KHR_swapchain_mutable_format` @@ -987,6 +1055,14 @@ impl PhysicalDeviceProperties { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: limits.max_vertex_input_attributes, max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + min_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.min_subgroup_size) + .unwrap_or(0), + max_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.max_subgroup_size) + .unwrap_or(0), max_push_constant_size: limits.max_push_constants_size, min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, @@ -1042,6 +1118,9 @@ impl super::InstanceShared { let supports_driver_properties = capabilities.device_api_version >= vk::API_VERSION_1_2 || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + let supports_subgroup_size_control = capabilities.device_api_version + >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()); let supports_acceleration_structure = capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); @@ -1075,6 +1154,20 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if capabilities.device_api_version >= vk::API_VERSION_1_1 { + let next = capabilities + .subgroup + .insert(vk::PhysicalDeviceSubgroupProperties::default()); + builder = builder.push_next(next); + } + + if supports_subgroup_size_control { + let next = capabilities + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlProperties::default()); + builder = builder.push_next(next); + } + let mut properties2 = builder.build(); unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2); @@ -1190,6 +1283,16 @@ impl super::InstanceShared { builder = builder.push_next(next); } + // `VK_EXT_subgroup_size_control` is promoted to 1.3 + if capabilities.device_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()) + { + let next = features + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlFeatures::default()); + builder = builder.push_next(next); + } + let mut features2 = builder.build(); unsafe { get_device_properties.get_physical_device_features2(phd, &mut features2); @@ -1382,6 +1485,9 @@ impl super::Instance { }), image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2 || phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()), + subgroup_size_control: phd_features + .subgroup_size_control + .map_or(false, |ext| ext.subgroup_size_control == vk::TRUE), }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -1581,6 +1687,15 @@ impl super::Adapter { capabilities.push(spv::Capability::Geometry); } + if features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) { + capabilities.push(spv::Capability::GroupNonUniform); + capabilities.push(spv::Capability::GroupNonUniformVote); + capabilities.push(spv::Capability::GroupNonUniformArithmetic); + capabilities.push(spv::Capability::GroupNonUniformBallot); + capabilities.push(spv::Capability::GroupNonUniformShuffle); + capabilities.push(spv::Capability::GroupNonUniformShuffleRelative); + } + if features.intersects( wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, @@ -1619,7 +1734,13 @@ impl super::Adapter { capabilities.push(spv::Capability::RayQueryKHR); } spv::Options { - lang_version: (1, 0), + lang_version: if features + .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) + { + (1, 3) + } else { + (1, 0) + }, flags, capabilities: Some(capabilities.iter().cloned().collect()), bounds_check_policies: naga::proc::BoundsCheckPolicies { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 43a2471954..ceb44dfbe6 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -104,6 +104,11 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn discard_encoding(&mut self) { + // Safe use requires this is not called in the "closed" state, so the buffer + // shouldn't be null. Assert this to make sure we're not pushing null + // buffers to the discard pile. + assert_ne!(self.active, vk::CommandBuffer::null()); + self.discarded.push(self.active); self.active = vk::CommandBuffer::null(); } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 52b899900f..ec392533a0 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2,6 +2,7 @@ use super::conv; use arrayvec::ArrayVec; use ash::{extensions::khr, vk}; +use naga::back::spv::ZeroInitializeWorkgroupMemoryMode; use parking_lot::Mutex; use std::{ @@ -737,7 +738,8 @@ impl super::Device { }; let needs_temp_options = !runtime_checks || !binding_map.is_empty() - || naga_shader.debug_source.is_some(); + || naga_shader.debug_source.is_some() + || !stage.zero_initialize_workgroup_memory; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -760,6 +762,10 @@ impl super::Device { file_name: debug.file_name.as_ref().as_ref(), }) } + if !stage.zero_initialize_workgroup_memory { + temp_options.zero_initialize_workgroup_memory = + ZeroInitializeWorkgroupMemoryMode::None; + } &temp_options } else { @@ -782,8 +788,14 @@ impl super::Device { } }; + let mut flags = vk::PipelineShaderStageCreateFlags::empty(); + if self.shared.private_caps.subgroup_size_control { + flags |= vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE + } + let entry_point = CString::new(stage.entry_point).unwrap(); let create_info = vk::PipelineShaderStageCreateInfo::builder() + .flags(flags) .stage(conv::map_shader_stage(stage_flags)) .module(vk_module) .name(&entry_point) diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index d969c887d5..d1ea82772e 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -238,6 +238,7 @@ struct PrivateCapabilities { robust_image_access2: bool, zero_initialize_workgroup_memory: bool, image_format_list: bool, + subgroup_size_control: bool, } bitflags::bitflags!( @@ -447,6 +448,7 @@ pub struct BindGroup { set: gpu_descriptor::DescriptorSet, } +/// Miscellaneous allocation recycling pool for `CommandAllocator`. #[derive(Default)] struct Temp { marker: Vec, @@ -476,11 +478,31 @@ impl Temp { pub struct CommandEncoder { raw: vk::CommandPool, device: Arc, + + /// The current command buffer, if `self` is in the ["recording"] + /// state. + /// + /// ["recording"]: crate::CommandEncoder + /// + /// If non-`null`, the buffer is in the Vulkan "recording" state. active: vk::CommandBuffer, + + /// What kind of pass we are currently within: compute or render. bind_point: vk::PipelineBindPoint, + + /// Allocation recycling pool for this encoder. temp: Temp, + + /// A pool of available command buffers. + /// + /// These are all in the Vulkan "initial" state. free: Vec, + + /// A pool of discarded command buffers. + /// + /// These could be in any Vulkan state except "pending". discarded: Vec, + /// If this is true, the active renderpass enabled a debug span, /// and needs to be disabled on renderpass close. rpass_debug_marker_active: bool, @@ -537,9 +559,47 @@ pub struct QuerySet { raw: vk::QueryPool, } +/// The [`Api::Fence`] type for [`vulkan::Api`]. +/// +/// This is an `enum` because there are two possible implementations of +/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of +/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but +/// require non-1.0 features. +/// +/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if +/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`] +/// otherwise. +/// +/// [`Api::Fence`]: crate::Api::Fence +/// [`vulkan::Api`]: Api +/// [`Device::create_fence`]: crate::Device::create_fence +/// [`TimelineSemaphore`]: Fence::TimelineSemaphore +/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore +/// [`FencePool`]: Fence::FencePool #[derive(Debug)] pub enum Fence { + /// A Vulkan [timeline semaphore]. + /// + /// These are simpler to use than Vulkan fences, since timeline semaphores + /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work. + /// + /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores + /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence TimelineSemaphore(vk::Semaphore), + + /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`]. + /// + /// The effective [`FenceValue`] of this variant is the greater of + /// `last_completed` and the maximum value associated with a signalled fence + /// in `active`. + /// + /// Fences are available in all versions of Vulkan, but since they only have + /// two states, "signaled" and "unsignaled", we need to use a separate fence + /// for each queue submission we might want to wait for, and remember which + /// [`FenceValue`] each one represents. + /// + /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences + /// [`FenceValue`]: crate::FenceValue FencePool { last_completed: crate::FenceValue, /// The pending fence values have to be ascending. @@ -549,21 +609,32 @@ pub enum Fence { } impl Fence { + /// Return the highest [`FenceValue`] among the signalled fences in `active`. + /// + /// As an optimization, assume that we already know that the fence has + /// reached `last_completed`, and don't bother checking fences whose values + /// are less than that: those fences remain in the `active` array only + /// because we haven't called `maintain` yet to clean them up. + /// + /// [`FenceValue`]: crate::FenceValue fn check_active( device: &ash::Device, - mut max_value: crate::FenceValue, + mut last_completed: crate::FenceValue, active: &[(crate::FenceValue, vk::Fence)], ) -> Result { for &(value, raw) in active.iter() { unsafe { - if value > max_value && device.get_fence_status(raw)? { - max_value = value; + if value > last_completed && device.get_fence_status(raw)? { + last_completed = value; } } } - Ok(max_value) + Ok(last_completed) } + /// Return the highest signalled [`FenceValue`] for `self`. + /// + /// [`FenceValue`]: crate::FenceValue fn get_latest( &self, device: &ash::Device, @@ -584,6 +655,18 @@ impl Fence { } } + /// Trim the internal state of this [`Fence`]. + /// + /// This function has no externally visible effect, but you should call it + /// periodically to keep this fence's resource consumption under control. + /// + /// For fences using the [`FencePool`] implementation, this function + /// recycles fences that have been signaled. If you don't call this, + /// [`Queue::submit`] will just keep allocating a new Vulkan fence every + /// time it's called. + /// + /// [`FencePool`]: Fence::FencePool + /// [`Queue::submit`]: crate::Queue::submit fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { match *self { Self::TimelineSemaphore(_) => {} diff --git a/wgpu-info/src/human.rs b/wgpu-info/src/human.rs index 9bb281352c..24eeec0008 100644 --- a/wgpu-info/src/human.rs +++ b/wgpu-info/src/human.rs @@ -143,6 +143,8 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize max_vertex_buffers, max_vertex_attributes, max_vertex_buffer_array_stride, + min_subgroup_size, + max_subgroup_size, max_push_constant_size, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, @@ -176,6 +178,8 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize writeln!(output, "\t\t Max Vertex Buffers: {max_vertex_buffers}")?; writeln!(output, "\t\t Max Vertex Attributes: {max_vertex_attributes}")?; writeln!(output, "\t\t Max Vertex Buffer Array Stride: {max_vertex_buffer_array_stride}")?; + writeln!(output, "\t\t Min Subgroup Size: {min_subgroup_size}")?; + writeln!(output, "\t\t Max Subgroup Size: {max_subgroup_size}")?; writeln!(output, "\t\t Max Push Constant Size: {max_push_constant_size}")?; writeln!(output, "\t\t Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?; writeln!(output, "\t\t Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?; diff --git a/wgpu-macros/Cargo.toml b/wgpu-macros/Cargo.toml index b06df02cce..3c605e6554 100644 --- a/wgpu-macros/Cargo.toml +++ b/wgpu-macros/Cargo.toml @@ -15,6 +15,6 @@ publish = false proc-macro = true [dependencies] -heck = "0.4" +heck = "0.5" quote = "1" syn = { version = "2", features = ["full"] } diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml index f8024f516e..ea18e6b335 100644 --- a/wgpu-types/Cargo.toml +++ b/wgpu-types/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-types" -version = "0.19.2" +version = "0.20.0" authors = ["gfx-rs developers"] edition = "2021" description = "WebGPU types" @@ -46,4 +46,4 @@ web-sys = { version = "0.3.69", features = [ [dev-dependencies] serde = { version = "1", features = ["serde_derive"] } -serde_json = "1.0.115" +serde_json = "1.0.116" diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 75d3947df9..8d5e367301 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -890,6 +890,30 @@ bitflags::bitflags! { /// /// This is a native only feature. const SHADER_INT64 = 1 << 55; + /// Allows compute and fragment shaders to use the subgroup operation built-ins + /// + /// Supported Platforms: + /// - Vulkan + /// - DX12 + /// - Metal + /// + /// This is a native only feature. + const SUBGROUP = 1 << 56; + /// Allows vertex shaders to use the subgroup operation built-ins + /// + /// Supported Platforms: + /// - Vulkan + /// + /// This is a native only feature. + const SUBGROUP_VERTEX = 1 << 57; + /// Allows shaders to use the subgroup barrier + /// + /// Supported Platforms: + /// - Vulkan + /// - Metal + /// + /// This is a native only feature. + const SUBGROUP_BARRIER = 1 << 58; } } @@ -1119,7 +1143,7 @@ pub struct Limits { /// pipeline output data, across all color attachments. pub max_color_attachment_bytes_per_sample: u32, /// Maximum number of bytes used for workgroup memory in a compute entry point. Defaults to - /// 16352. Higher is "better". + /// 16384. Higher is "better". pub max_compute_workgroup_storage_size: u32, /// Maximum value of the product of the `workgroup_size` dimensions for a compute entry-point. /// Defaults to 256. Higher is "better". @@ -1136,6 +1160,11 @@ pub struct Limits { /// The maximum value for each dimension of a `ComputePass::dispatch(x, y, z)` operation. /// Defaults to 65535. Higher is "better". pub max_compute_workgroups_per_dimension: u32, + + /// Minimal number of invocations in a subgroup. Higher is "better". + pub min_subgroup_size: u32, + /// Maximal number of invocations in a subgroup. Lower is "better". + pub max_subgroup_size: u32, /// Amount of storage available for push constants in bytes. Defaults to 0. Higher is "better". /// Requesting more than 0 during device creation requires [`Features::PUSH_CONSTANTS`] to be enabled. /// @@ -1146,7 +1175,6 @@ pub struct Limits { /// - OpenGL doesn't natively support push constants, and are emulated with uniforms, /// so this number is less useful but likely 256. pub max_push_constant_size: u32, - /// Maximum number of live non-sampler bindings. /// /// This limit only affects the d3d12 backend. Using a large number will allow the device @@ -1156,6 +1184,14 @@ pub struct Limits { impl Default for Limits { fn default() -> Self { + Self::defaults() + } +} + +impl Limits { + // Rust doesn't allow const in trait implementations, so we break this out + // to allow reusing these defaults in const contexts like `downlevel_defaults` + const fn defaults() -> Self { Self { max_texture_dimension_1d: 8192, max_texture_dimension_2d: 8192, @@ -1170,10 +1206,10 @@ impl Default for Limits { max_storage_buffers_per_shader_stage: 8, max_storage_textures_per_shader_stage: 4, max_uniform_buffers_per_shader_stage: 12, - max_uniform_buffer_binding_size: 64 << 10, - max_storage_buffer_binding_size: 128 << 20, + max_uniform_buffer_binding_size: 64 << 10, // (64 KiB) + max_storage_buffer_binding_size: 128 << 20, // (128 MiB) max_vertex_buffers: 8, - max_buffer_size: 256 << 20, + max_buffer_size: 256 << 20, // (256 MiB) max_vertex_attributes: 16, max_vertex_buffer_array_stride: 2048, min_uniform_buffer_offset_alignment: 256, @@ -1187,13 +1223,13 @@ impl Default for Limits { max_compute_workgroup_size_y: 256, max_compute_workgroup_size_z: 64, max_compute_workgroups_per_dimension: 65535, + min_subgroup_size: 0, + max_subgroup_size: 0, max_push_constant_size: 0, max_non_sampler_bindings: 1_000_000, } } -} -impl Limits { /// These default limits are guaranteed to be compatible with GLES-3.1, and D3D11 /// /// Those limits are as follows (different from default are marked with *): @@ -1218,13 +1254,15 @@ impl Limits { /// max_vertex_buffers: 8, /// max_vertex_attributes: 16, /// max_vertex_buffer_array_stride: 2048, + /// min_subgroup_size: 0, + /// max_subgroup_size: 0, /// max_push_constant_size: 0, /// min_uniform_buffer_offset_alignment: 256, /// min_storage_buffer_offset_alignment: 256, /// max_inter_stage_shader_components: 60, /// max_color_attachments: 8, /// max_color_attachment_bytes_per_sample: 32, - /// max_compute_workgroup_storage_size: 16352, + /// max_compute_workgroup_storage_size: 16352, // * /// max_compute_invocations_per_workgroup: 256, /// max_compute_workgroup_size_x: 256, /// max_compute_workgroup_size_y: 256, @@ -1239,35 +1277,11 @@ impl Limits { max_texture_dimension_1d: 2048, max_texture_dimension_2d: 2048, max_texture_dimension_3d: 256, - max_texture_array_layers: 256, - max_bind_groups: 4, - max_bindings_per_bind_group: 1000, - max_dynamic_uniform_buffers_per_pipeline_layout: 8, - max_dynamic_storage_buffers_per_pipeline_layout: 4, - max_sampled_textures_per_shader_stage: 16, - max_samplers_per_shader_stage: 16, max_storage_buffers_per_shader_stage: 4, - max_storage_textures_per_shader_stage: 4, - max_uniform_buffers_per_shader_stage: 12, - max_uniform_buffer_binding_size: 16 << 10, - max_storage_buffer_binding_size: 128 << 20, - max_vertex_buffers: 8, - max_vertex_attributes: 16, - max_vertex_buffer_array_stride: 2048, - max_push_constant_size: 0, - min_uniform_buffer_offset_alignment: 256, - min_storage_buffer_offset_alignment: 256, - max_inter_stage_shader_components: 60, - max_color_attachments: 8, - max_color_attachment_bytes_per_sample: 32, + max_uniform_buffer_binding_size: 16 << 10, // (16 KiB) + // see: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=7 max_compute_workgroup_storage_size: 16352, - max_compute_invocations_per_workgroup: 256, - max_compute_workgroup_size_x: 256, - max_compute_workgroup_size_y: 256, - max_compute_workgroup_size_z: 64, - max_compute_workgroups_per_dimension: 65535, - max_buffer_size: 256 << 20, - max_non_sampler_bindings: 1_000_000, + ..Self::defaults() } } @@ -1296,6 +1310,8 @@ impl Limits { /// max_vertex_buffers: 8, /// max_vertex_attributes: 16, /// max_vertex_buffer_array_stride: 255, // + + /// min_subgroup_size: 0, + /// max_subgroup_size: 0, /// max_push_constant_size: 0, /// min_uniform_buffer_offset_alignment: 256, /// min_storage_buffer_offset_alignment: 256, @@ -1326,6 +1342,8 @@ impl Limits { max_compute_workgroup_size_y: 0, max_compute_workgroup_size_z: 0, max_compute_workgroups_per_dimension: 0, + min_subgroup_size: 0, + max_subgroup_size: 0, // Value supported by Intel Celeron B830 on Windows (OpenGL 3.1) max_inter_stage_shader_components: 31, @@ -1418,6 +1436,10 @@ impl Limits { compare!(max_vertex_buffers, Less); compare!(max_vertex_attributes, Less); compare!(max_vertex_buffer_array_stride, Less); + if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 { + compare!(min_subgroup_size, Greater); + compare!(max_subgroup_size, Less); + } compare!(max_push_constant_size, Less); compare!(min_uniform_buffer_offset_alignment, Greater); compare!(min_storage_buffer_offset_alignment, Greater); diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index b6c824e283..024b39bca3 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -21,7 +21,7 @@ use wasm_bindgen::{prelude::*, JsCast}; use crate::{ context::{downcast_ref, ObjectId, QueueWriteBuffer, Unused}, - SurfaceTargetUnsafe, UncapturedErrorHandler, + CompilationInfo, SurfaceTargetUnsafe, UncapturedErrorHandler, }; fn create_identified(value: T) -> (Identified, Sendable) { @@ -106,6 +106,88 @@ impl crate::Error { } } +#[derive(Debug)] +pub struct WebShaderModule { + module: webgpu_sys::GpuShaderModule, + compilation_info: WebShaderCompilationInfo, +} + +#[derive(Debug, Clone)] +enum WebShaderCompilationInfo { + /// WGSL shaders get their compilation info from a native WebGPU function. + /// We need the source to be able to do UTF16 to UTF8 location remapping. + Wgsl { source: String }, + /// Transformed shaders get their compilation info from the transformer. + /// Further compilation errors are reported without a span. + Transformed { + compilation_info: crate::CompilationInfo, + }, +} + +fn map_utf16_to_utf8_offset(utf16_offset: u32, text: &str) -> u32 { + let mut utf16_i = 0; + for (utf8_index, c) in text.char_indices() { + if utf16_i >= utf16_offset { + return utf8_index as u32; + } + utf16_i += c.len_utf16() as u32; + } + if utf16_i >= utf16_offset { + text.len() as u32 + } else { + log::error!( + "UTF16 offset {} is out of bounds for string {}", + utf16_offset, + text + ); + u32::MAX + } +} + +impl crate::CompilationMessage { + fn from_js( + js_message: webgpu_sys::GpuCompilationMessage, + compilation_info: &WebShaderCompilationInfo, + ) -> Self { + let message_type = match js_message.type_() { + webgpu_sys::GpuCompilationMessageType::Error => crate::CompilationMessageType::Error, + webgpu_sys::GpuCompilationMessageType::Warning => { + crate::CompilationMessageType::Warning + } + webgpu_sys::GpuCompilationMessageType::Info => crate::CompilationMessageType::Info, + _ => crate::CompilationMessageType::Error, + }; + let utf16_offset = js_message.offset() as u32; + let utf16_length = js_message.length() as u32; + let span = match compilation_info { + WebShaderCompilationInfo::Wgsl { .. } if utf16_offset == 0 && utf16_length == 0 => None, + WebShaderCompilationInfo::Wgsl { source } => { + let offset = map_utf16_to_utf8_offset(utf16_offset, source); + let length = map_utf16_to_utf8_offset(utf16_length, &source[offset as usize..]); + let line_number = js_message.line_num() as u32; // That's legal, because we're counting lines the same way + + let prefix = &source[..offset as usize]; + let line_start = prefix.rfind('\n').map(|pos| pos + 1).unwrap_or(0) as u32; + let line_position = offset - line_start + 1; // Counting UTF-8 byte indices + + Some(crate::SourceLocation { + offset, + length, + line_number, + line_position, + }) + } + WebShaderCompilationInfo::Transformed { .. } => None, + }; + + crate::CompilationMessage { + message: js_message.message(), + message_type, + location: span, + } + } +} + // We need to assert that any future we return is Send to match the native API. // // This is safe on wasm32 *for now*, but similarly to the unsafe Send impls for the handle type @@ -737,6 +819,8 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { max_compute_workgroup_size_z: limits.max_compute_workgroup_size_z(), max_compute_workgroups_per_dimension: limits.max_compute_workgroups_per_dimension(), // The following are not part of WebGPU + min_subgroup_size: wgt::Limits::default().min_subgroup_size, + max_subgroup_size: wgt::Limits::default().max_subgroup_size, max_push_constant_size: wgt::Limits::default().max_push_constant_size, max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings, } @@ -844,6 +928,41 @@ fn future_pop_error_scope(result: JsFutureResult) -> Option { } } +fn future_compilation_info( + result: JsFutureResult, + base_compilation_info: &WebShaderCompilationInfo, +) -> crate::CompilationInfo { + let base_messages = match base_compilation_info { + WebShaderCompilationInfo::Transformed { compilation_info } => { + compilation_info.messages.iter().cloned() + } + _ => [].iter().cloned(), + }; + + let messages = match result { + Ok(js_value) => { + let info = webgpu_sys::GpuCompilationInfo::from(js_value); + base_messages + .chain(info.messages().into_iter().map(|message| { + crate::CompilationMessage::from_js( + webgpu_sys::GpuCompilationMessage::from(message), + base_compilation_info, + ) + })) + .collect() + } + Err(_v) => base_messages + .chain(std::iter::once(crate::CompilationMessage { + message: "Getting compilation info failed".to_string(), + message_type: crate::CompilationMessageType::Error, + location: None, + })) + .collect(), + }; + + crate::CompilationInfo { messages } +} + /// Calls `callback(success_value)` when the promise completes successfully, calls `callback(failure_value)` /// when the promise completes unsuccessfully. fn register_then_closures(promise: &Promise, callback: F, success_value: T, failure_value: T) @@ -1000,8 +1119,8 @@ impl crate::context::Context for ContextWebGpu { type DeviceData = Sendable; type QueueId = Identified; type QueueData = Sendable; - type ShaderModuleId = Identified; - type ShaderModuleData = Sendable; + type ShaderModuleId = Identified; + type ShaderModuleData = Sendable; type BindGroupLayoutId = Identified; type BindGroupLayoutData = Sendable; type BindGroupId = Identified; @@ -1036,6 +1155,10 @@ impl crate::context::Context for ContextWebGpu { type RenderBundleData = Sendable; type SurfaceId = Identified<(Canvas, webgpu_sys::GpuCanvasContext)>; type SurfaceData = Sendable<(Canvas, webgpu_sys::GpuCanvasContext)>; + type BlasData = (); + type BlasId = ObjectId; + type TlasData = (); + type TlasId = ObjectId; type SurfaceOutputDetail = SurfaceOutputDetail; type SubmissionIndex = Unused; @@ -1062,10 +1185,10 @@ impl crate::context::Context for ContextWebGpu { type PopErrorScopeFuture = MakeSendFuture Option>; - type BlasData = (); - type BlasId = ObjectId; - type TlasData = (); - type TlasId = ObjectId; + type CompilationInfoFuture = MakeSendFuture< + wasm_bindgen_futures::JsFuture, + Box CompilationInfo>, + >; fn init(_instance_desc: wgt::InstanceDescriptor) -> Self { let Some(gpu) = get_browser_gpu_property() else { @@ -1423,10 +1546,10 @@ impl crate::context::Context for ContextWebGpu { desc: crate::ShaderModuleDescriptor<'_>, _shader_bound_checks: wgt::ShaderBoundChecks, ) -> (Self::ShaderModuleId, Self::ShaderModuleData) { - let mut descriptor: webgpu_sys::GpuShaderModuleDescriptor = match desc.source { + let shader_module_result = match desc.source { #[cfg(feature = "spirv")] crate::ShaderSource::SpirV(ref spv) => { - use naga::{back, front, valid}; + use naga::front; let options = naga::front::spv::Options { adjust_coordinate_space: false, @@ -1434,18 +1557,25 @@ impl crate::context::Context for ContextWebGpu { block_ctx_dump_prefix: None, }; let spv_parser = front::spv::Frontend::new(spv.iter().cloned(), &options); - let spv_module = spv_parser.parse().unwrap(); - - let mut validator = valid::Validator::new( - valid::ValidationFlags::all(), - valid::Capabilities::all(), - ); - let spv_module_info = validator.validate(&spv_module).unwrap(); - - let writer_flags = naga::back::wgsl::WriterFlags::empty(); - let wgsl_text = - back::wgsl::write_string(&spv_module, &spv_module_info, writer_flags).unwrap(); - webgpu_sys::GpuShaderModuleDescriptor::new(wgsl_text.as_str()) + spv_parser + .parse() + .map_err(|inner| { + CompilationInfo::from(naga::error::ShaderError { + source: String::new(), + label: desc.label.map(|s| s.to_string()), + inner: Box::new(inner), + }) + }) + .and_then(|spv_module| { + validate_transformed_shader_module(&spv_module, "", &desc).map(|v| { + ( + v, + WebShaderCompilationInfo::Transformed { + compilation_info: CompilationInfo { messages: vec![] }, + }, + ) + }) + }) } #[cfg(feature = "glsl")] crate::ShaderSource::Glsl { @@ -1453,7 +1583,7 @@ impl crate::context::Context for ContextWebGpu { stage, ref defines, } => { - use naga::{back, front, valid}; + use naga::front; // Parse the given shader code and store its representation. let options = front::glsl::Options { @@ -1461,45 +1591,91 @@ impl crate::context::Context for ContextWebGpu { defines: defines.clone(), }; let mut parser = front::glsl::Frontend::default(); - let glsl_module = parser.parse(&options, shader).unwrap(); - - let mut validator = valid::Validator::new( - valid::ValidationFlags::all(), - valid::Capabilities::all(), - ); - let glsl_module_info = validator.validate(&glsl_module).unwrap(); - - let writer_flags = naga::back::wgsl::WriterFlags::empty(); - let wgsl_text = - back::wgsl::write_string(&glsl_module, &glsl_module_info, writer_flags) - .unwrap(); - webgpu_sys::GpuShaderModuleDescriptor::new(wgsl_text.as_str()) + parser + .parse(&options, shader) + .map_err(|inner| { + CompilationInfo::from(naga::error::ShaderError { + source: shader.to_string(), + label: desc.label.map(|s| s.to_string()), + inner: Box::new(inner), + }) + }) + .and_then(|glsl_module| { + validate_transformed_shader_module(&glsl_module, shader, &desc).map(|v| { + ( + v, + WebShaderCompilationInfo::Transformed { + compilation_info: CompilationInfo { messages: vec![] }, + }, + ) + }) + }) } #[cfg(feature = "wgsl")] - crate::ShaderSource::Wgsl(ref code) => webgpu_sys::GpuShaderModuleDescriptor::new(code), + crate::ShaderSource::Wgsl(ref code) => { + let shader_module = webgpu_sys::GpuShaderModuleDescriptor::new(code); + Ok(( + shader_module, + WebShaderCompilationInfo::Wgsl { + source: code.to_string(), + }, + )) + } #[cfg(feature = "naga-ir")] - crate::ShaderSource::Naga(module) => { - use naga::{back, valid}; - - let mut validator = valid::Validator::new( - valid::ValidationFlags::all(), - valid::Capabilities::all(), - ); - let module_info = validator.validate(&module).unwrap(); - - let writer_flags = naga::back::wgsl::WriterFlags::empty(); - let wgsl_text = - back::wgsl::write_string(&module, &module_info, writer_flags).unwrap(); - webgpu_sys::GpuShaderModuleDescriptor::new(wgsl_text.as_str()) + crate::ShaderSource::Naga(ref module) => { + validate_transformed_shader_module(module, "", &desc).map(|v| { + ( + v, + WebShaderCompilationInfo::Transformed { + compilation_info: CompilationInfo { messages: vec![] }, + }, + ) + }) } crate::ShaderSource::Dummy(_) => { panic!("found `ShaderSource::Dummy`") } }; + + #[cfg(naga)] + fn validate_transformed_shader_module( + module: &naga::Module, + source: &str, + desc: &crate::ShaderModuleDescriptor<'_>, + ) -> Result { + use naga::{back, valid}; + let mut validator = + valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()); + let module_info = validator.validate(module).map_err(|err| { + CompilationInfo::from(naga::error::ShaderError { + source: source.to_string(), + label: desc.label.map(|s| s.to_string()), + inner: Box::new(err), + }) + })?; + + let writer_flags = naga::back::wgsl::WriterFlags::empty(); + let wgsl_text = back::wgsl::write_string(module, &module_info, writer_flags).unwrap(); + Ok(webgpu_sys::GpuShaderModuleDescriptor::new( + wgsl_text.as_str(), + )) + } + let (mut descriptor, compilation_info) = match shader_module_result { + Ok(v) => v, + Err(compilation_info) => ( + webgpu_sys::GpuShaderModuleDescriptor::new(""), + WebShaderCompilationInfo::Transformed { compilation_info }, + ), + }; if let Some(label) = desc.label { descriptor.label(label); } - create_identified(device_data.0.create_shader_module(&descriptor)) + let shader_module = WebShaderModule { + module: device_data.0.create_shader_module(&descriptor), + compilation_info, + }; + let (id, data) = create_identified(shader_module); + (id, data) } unsafe fn device_create_shader_module_spirv( @@ -1704,7 +1880,7 @@ impl crate::context::Context for ContextWebGpu { ) -> (Self::RenderPipelineId, Self::RenderPipelineData) { let module: &::ShaderModuleData = downcast_ref(desc.vertex.module.data.as_ref()); - let mut mapped_vertex_state = webgpu_sys::GpuVertexState::new(&module.0); + let mut mapped_vertex_state = webgpu_sys::GpuVertexState::new(&module.0.module); mapped_vertex_state.entry_point(desc.vertex.entry_point); let buffers = desc @@ -1779,7 +1955,8 @@ impl crate::context::Context for ContextWebGpu { .collect::(); let module: &::ShaderModuleData = downcast_ref(frag.module.data.as_ref()); - let mut mapped_fragment_desc = webgpu_sys::GpuFragmentState::new(&module.0, &targets); + let mut mapped_fragment_desc = + webgpu_sys::GpuFragmentState::new(&module.0.module, &targets); mapped_fragment_desc.entry_point(frag.entry_point); mapped_desc.fragment(&mapped_fragment_desc); } @@ -1804,7 +1981,8 @@ impl crate::context::Context for ContextWebGpu { ) -> (Self::ComputePipelineId, Self::ComputePipelineData) { let shader_module: &::ShaderModuleData = downcast_ref(desc.module.data.as_ref()); - let mut mapped_compute_stage = webgpu_sys::GpuProgrammableStage::new(&shader_module.0); + let mut mapped_compute_stage = + webgpu_sys::GpuProgrammableStage::new(&shader_module.0.module); mapped_compute_stage.entry_point(desc.entry_point); let auto_layout = wasm_bindgen::JsValue::from(webgpu_sys::GpuAutoLayoutMode::Auto); let mut mapped_desc = webgpu_sys::GpuComputePipelineDescriptor::new( @@ -2097,6 +2275,22 @@ impl crate::context::Context for ContextWebGpu { buffer_data.0.mapping.borrow_mut().mapped_buffer = None; } + fn shader_get_compilation_info( + &self, + _shader: &Self::ShaderModuleId, + shader_data: &Self::ShaderModuleData, + ) -> Self::CompilationInfoFuture { + let compilation_info_promise = shader_data.0.module.get_compilation_info(); + let map_future = Box::new({ + let compilation_info = shader_data.0.compilation_info.clone(); + move |result| future_compilation_info(result, &compilation_info) + }); + MakeSendFuture::new( + wasm_bindgen_futures::JsFuture::from(compilation_info_promise), + map_future, + ) + } + fn texture_create_view( &self, _texture: &Self::TextureId, diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index d897b5d8a0..9c79909e1e 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1,7 +1,8 @@ use crate::{ context::{ObjectId, Unused}, AdapterInfo, BindGroupDescriptor, BindGroupLayoutDescriptor, BindingResource, BufferBinding, - BufferDescriptor, CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, + BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, CompilationMessage, + CompilationMessageType, ComputePassDescriptor, ComputePipelineDescriptor, DownlevelCapabilities, Features, Label, Limits, LoadOp, MapMode, Operations, PipelineLayoutDescriptor, RenderBundleEncoderDescriptor, RenderPipelineDescriptor, SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, ShaderSource, StoreOp, @@ -23,9 +24,10 @@ use std::{ sync::Arc, }; use wgc::{ - command::{bundle_ffi::*, compute_ffi::*, render_ffi::*}, + command::{bundle_ffi::*, compute_commands::*, render_commands::*}, device::DeviceLostClosure, id::{CommandEncoderId, TextureViewId}, + pipeline::CreateShaderModuleError, }; use wgt::WasmNotSendSync; @@ -441,6 +443,11 @@ pub struct Buffer { error_sink: ErrorSink, } +#[derive(Debug)] +pub struct ShaderModule { + compilation_info: CompilationInfo, +} + #[derive(Debug)] pub struct Texture { id: wgc::id::TextureId, @@ -493,7 +500,7 @@ impl crate::Context for ContextWgpuCore { type QueueId = wgc::id::QueueId; type QueueData = Queue; type ShaderModuleId = wgc::id::ShaderModuleId; - type ShaderModuleData = (); + type ShaderModuleData = ShaderModule; type BindGroupLayoutId = wgc::id::BindGroupLayoutId; type BindGroupLayoutData = (); type BindGroupId = wgc::id::BindGroupId; @@ -554,6 +561,7 @@ impl crate::Context for ContextWgpuCore { >; type PopErrorScopeFuture = Ready>; + type CompilationInfoFuture = Ready; fn init(instance_desc: wgt::InstanceDescriptor) -> Self { Self(wgc::global::Global::new("wgpu", instance_desc)) @@ -569,7 +577,7 @@ impl crate::Context for ContextWgpuCore { raw_window_handle, } => unsafe { self.0 - .instance_create_surface(raw_display_handle, raw_window_handle, None)? + .instance_create_surface(raw_display_handle, raw_window_handle, None) }, #[cfg(metal)] @@ -593,7 +601,7 @@ impl crate::Context for ContextWgpuCore { self.0 .instance_create_surface_from_swap_chain_panel(swap_chain_panel, None) }, - }; + }?; Ok(( id, @@ -906,16 +914,21 @@ impl crate::Context for ContextWgpuCore { let (id, error) = wgc::gfx_select!( device => self.0.device_create_shader_module(*device, &descriptor, source, None) ); - if let Some(cause) = error { - self.handle_error( - &device_data.error_sink, - cause, - LABEL, - desc.label, - "Device::create_shader_module", - ); - } - (id, ()) + let compilation_info = match error { + Some(cause) => { + self.handle_error( + &device_data.error_sink, + cause.clone(), + LABEL, + desc.label, + "Device::create_shader_module", + ); + CompilationInfo::from(cause) + } + None => CompilationInfo { messages: vec![] }, + }; + + (id, ShaderModule { compilation_info }) } unsafe fn device_create_shader_module_spirv( @@ -933,16 +946,20 @@ impl crate::Context for ContextWgpuCore { let (id, error) = wgc::gfx_select!( device => self.0.device_create_shader_module_spirv(*device, &descriptor, Borrowed(&desc.source), None) ); - if let Some(cause) = error { - self.handle_error( - &device_data.error_sink, - cause, - LABEL, - desc.label, - "Device::create_shader_module_spirv", - ); - } - (id, ()) + let compilation_info = match error { + Some(cause) => { + self.handle_error( + &device_data.error_sink, + cause.clone(), + LABEL, + desc.label, + "Device::create_shader_module_spirv", + ); + CompilationInfo::from(cause) + } + None => CompilationInfo { messages: vec![] }, + }; + (id, ShaderModule { compilation_info }) } fn device_create_bind_group_layout( @@ -1161,7 +1178,11 @@ impl crate::Context for ContextWgpuCore { stage: pipe::ProgrammableStageDescriptor { module: desc.vertex.module.id.into(), entry_point: Some(Borrowed(desc.vertex.entry_point)), - constants: Borrowed(desc.vertex.constants), + constants: Borrowed(desc.vertex.compilation_options.constants), + zero_initialize_workgroup_memory: desc + .vertex + .compilation_options + .zero_initialize_workgroup_memory, }, buffers: Borrowed(&vertex_buffers), }, @@ -1172,7 +1193,10 @@ impl crate::Context for ContextWgpuCore { stage: pipe::ProgrammableStageDescriptor { module: frag.module.id.into(), entry_point: Some(Borrowed(frag.entry_point)), - constants: Borrowed(frag.constants), + constants: Borrowed(frag.compilation_options.constants), + zero_initialize_workgroup_memory: frag + .compilation_options + .zero_initialize_workgroup_memory, }, targets: Borrowed(frag.targets), }), @@ -1221,7 +1245,10 @@ impl crate::Context for ContextWgpuCore { stage: pipe::ProgrammableStageDescriptor { module: desc.module.id.into(), entry_point: Some(Borrowed(desc.entry_point)), - constants: Borrowed(desc.constants), + constants: Borrowed(desc.compilation_options.constants), + zero_initialize_workgroup_memory: desc + .compilation_options + .zero_initialize_workgroup_memory, }, }; @@ -1554,6 +1581,14 @@ impl crate::Context for ContextWgpuCore { } } + fn shader_get_compilation_info( + &self, + _shader: &Self::ShaderModuleId, + shader_data: &Self::ShaderModuleData, + ) -> Self::CompilationInfoFuture { + ready(shader_data.compilation_info.clone()) + } + fn texture_create_view( &self, texture: &Self::TextureId, @@ -2331,15 +2366,7 @@ impl crate::Context for ContextWgpuCore { _bind_group_data: &Self::BindGroupData, offsets: &[wgt::DynamicOffset], ) { - unsafe { - wgpu_compute_pass_set_bind_group( - pass_data, - index, - *bind_group, - offsets.as_ptr(), - offsets.len(), - ) - } + wgpu_compute_pass_set_bind_group(pass_data, index, *bind_group, offsets); } fn compute_pass_set_push_constants( @@ -2349,14 +2376,7 @@ impl crate::Context for ContextWgpuCore { offset: u32, data: &[u8], ) { - unsafe { - wgpu_compute_pass_set_push_constant( - pass_data, - offset, - data.len().try_into().unwrap(), - data.as_ptr(), - ) - } + wgpu_compute_pass_set_push_constant(pass_data, offset, data); } fn compute_pass_insert_debug_marker( @@ -2365,10 +2385,7 @@ impl crate::Context for ContextWgpuCore { pass_data: &mut Self::ComputePassData, label: &str, ) { - unsafe { - let label = std::ffi::CString::new(label).unwrap(); - wgpu_compute_pass_insert_debug_marker(pass_data, label.as_ptr(), 0); - } + wgpu_compute_pass_insert_debug_marker(pass_data, label, 0); } fn compute_pass_push_debug_group( @@ -2377,10 +2394,7 @@ impl crate::Context for ContextWgpuCore { pass_data: &mut Self::ComputePassData, group_label: &str, ) { - unsafe { - let label = std::ffi::CString::new(group_label).unwrap(); - wgpu_compute_pass_push_debug_group(pass_data, label.as_ptr(), 0); - } + wgpu_compute_pass_push_debug_group(pass_data, group_label, 0); } fn compute_pass_pop_debug_group( @@ -2647,15 +2661,7 @@ impl crate::Context for ContextWgpuCore { _bind_group_data: &Self::BindGroupData, offsets: &[wgt::DynamicOffset], ) { - unsafe { - wgpu_render_pass_set_bind_group( - pass_data, - index, - *bind_group, - offsets.as_ptr(), - offsets.len(), - ) - } + wgpu_render_pass_set_bind_group(pass_data, index, *bind_group, offsets) } fn render_pass_set_index_buffer( @@ -2692,15 +2698,7 @@ impl crate::Context for ContextWgpuCore { offset: u32, data: &[u8], ) { - unsafe { - wgpu_render_pass_set_push_constants( - pass_data, - stages, - offset, - data.len().try_into().unwrap(), - data.as_ptr(), - ) - } + wgpu_render_pass_set_push_constants(pass_data, stages, offset, data) } fn render_pass_draw( @@ -2882,10 +2880,7 @@ impl crate::Context for ContextWgpuCore { pass_data: &mut Self::RenderPassData, label: &str, ) { - unsafe { - let label = std::ffi::CString::new(label).unwrap(); - wgpu_render_pass_insert_debug_marker(pass_data, label.as_ptr(), 0); - } + wgpu_render_pass_insert_debug_marker(pass_data, label, 0); } fn render_pass_push_debug_group( @@ -2894,10 +2889,7 @@ impl crate::Context for ContextWgpuCore { pass_data: &mut Self::RenderPassData, group_label: &str, ) { - unsafe { - let label = std::ffi::CString::new(group_label).unwrap(); - wgpu_render_pass_push_debug_group(pass_data, label.as_ptr(), 0); - } + wgpu_render_pass_push_debug_group(pass_data, group_label, 0); } fn render_pass_pop_debug_group( @@ -2962,13 +2954,7 @@ impl crate::Context for ContextWgpuCore { render_bundles: &mut dyn Iterator, ) { let temp_render_bundles = render_bundles.map(|(i, _)| i).collect::>(); - unsafe { - wgpu_render_pass_execute_bundles( - pass_data, - temp_render_bundles.as_ptr(), - temp_render_bundles.len(), - ) - } + wgpu_render_pass_execute_bundles(pass_data, &temp_render_bundles) } fn device_create_blas( @@ -3258,6 +3244,35 @@ fn default_error_handler(err: crate::Error) { panic!("wgpu error: {err}\n"); } +impl From for CompilationInfo { + fn from(value: CreateShaderModuleError) -> Self { + match value { + #[cfg(feature = "wgsl")] + CreateShaderModuleError::Parsing(v) => v.into(), + #[cfg(feature = "glsl")] + CreateShaderModuleError::ParsingGlsl(v) => v.into(), + #[cfg(feature = "spirv")] + CreateShaderModuleError::ParsingSpirV(v) => v.into(), + CreateShaderModuleError::Validation(v) => v.into(), + // Device errors are reported through the error sink, and are not compilation errors. + // Same goes for native shader module generation errors. + CreateShaderModuleError::Device(_) | CreateShaderModuleError::Generation => { + CompilationInfo { + messages: Vec::new(), + } + } + // Everything else is an error message without location information. + _ => CompilationInfo { + messages: vec![CompilationMessage { + message: value.to_string(), + message_type: CompilationMessageType::Error, + location: None, + }], + }, + } + } +} + #[derive(Debug)] pub struct QueueWriteBuffer { buffer_id: wgc::id::StagingBufferId, diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index ad74cd58a4..54c9a97e5b 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -9,13 +9,13 @@ use wgt::{ use crate::{ AnyWasmNotSendSync, BindGroupDescriptor, BindGroupLayoutDescriptor, Buffer, BufferAsyncError, - BufferDescriptor, CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, - DeviceDescriptor, Error, ErrorFilter, ImageCopyBuffer, ImageCopyTexture, Maintain, - MaintainResult, MapMode, PipelineLayoutDescriptor, QuerySetDescriptor, RenderBundleDescriptor, - RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor, - RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor, - ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, Texture, TextureDescriptor, - TextureViewDescriptor, UncapturedErrorHandler, + BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, ComputePassDescriptor, + ComputePipelineDescriptor, DeviceDescriptor, Error, ErrorFilter, ImageCopyBuffer, + ImageCopyTexture, Maintain, MaintainResult, MapMode, PipelineLayoutDescriptor, + QuerySetDescriptor, RenderBundleDescriptor, RenderBundleEncoderDescriptor, + RenderPassDescriptor, RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError, + SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, + Texture, TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler, }; /// Meta trait for an id tracked by a context. @@ -100,6 +100,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized { + 'static; type PopErrorScopeFuture: Future> + WasmNotSend + 'static; + type CompilationInfoFuture: Future + WasmNotSend + 'static; + fn init(instance_desc: wgt::InstanceDescriptor) -> Self; unsafe fn instance_create_surface( &self, @@ -328,6 +330,11 @@ pub trait Context: Debug + WasmNotSendSync + Sized { sub_range: Range, ) -> Box; fn buffer_unmap(&self, buffer: &Self::BufferId, buffer_data: &Self::BufferData); + fn shader_get_compilation_info( + &self, + shader: &Self::ShaderModuleId, + shader_data: &Self::ShaderModuleData, + ) -> Self::CompilationInfoFuture; fn texture_create_view( &self, texture: &Self::TextureId, @@ -1160,6 +1167,11 @@ pub type DevicePopErrorFuture = Box> + Send>; #[cfg(not(send_sync))] pub type DevicePopErrorFuture = Box>>; +#[cfg(send_sync)] +pub type ShaderCompilationInfoFuture = Box + Send>; +#[cfg(not(send_sync))] +pub type ShaderCompilationInfoFuture = Box>; + #[cfg(send_sync)] pub type SubmittedWorkDoneCallback = Box; #[cfg(not(send_sync))] @@ -1382,6 +1394,11 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { sub_range: Range, ) -> Box; fn buffer_unmap(&self, buffer: &ObjectId, buffer_data: &crate::Data); + fn shader_get_compilation_info( + &self, + shader: &ObjectId, + shader_data: &crate::Data, + ) -> Pin; fn texture_create_view( &self, texture: &ObjectId, @@ -2537,6 +2554,17 @@ where Context::buffer_unmap(self, &buffer, buffer_data) } + fn shader_get_compilation_info( + &self, + shader: &ObjectId, + shader_data: &crate::Data, + ) -> Pin { + let shader = ::from(*shader); + let shader_data = downcast_ref(shader_data); + let future = Context::shader_get_compilation_info(self, &shader, shader_data); + Box::pin(future) + } + fn texture_create_view( &self, texture: &ObjectId, diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 47bb1f0aaa..2f8033ff45 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -210,12 +210,31 @@ pub struct SubmissionIndex(ObjectId, Arc); #[cfg(send_sync)] static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync); -/// The main purpose of this struct is to resolve mapped ranges (convert sizes -/// to end points), and to ensure that the sub-ranges don't intersect. +/// The mapped portion of a buffer, if any, and its outstanding views. +/// +/// This ensures that views fall within the mapped range and don't overlap, and +/// also takes care of turning `Option` sizes into actual buffer +/// offsets. #[derive(Debug)] struct MapContext { + /// The overall size of the buffer. + /// + /// This is just a convenient copy of [`Buffer::size`]. total_size: BufferAddress, + + /// The range of the buffer that is mapped. + /// + /// This is `0..0` if the buffer is not mapped. This becomes non-empty when + /// the buffer is mapped at creation time, and when you call `map_async` on + /// some [`BufferSlice`] (so technically, it indicates the portion that is + /// *or has been requested to be* mapped.) + /// + /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range. initial_range: Range, + + /// The ranges covered by all outstanding [`BufferView`]s and + /// [`BufferViewMut`]s. These are non-overlapping, and are all contained + /// within `initial_range`. sub_ranges: Vec>, } @@ -228,6 +247,7 @@ impl MapContext { } } + /// Record that the buffer is no longer mapped. fn reset(&mut self) { self.initial_range = 0..0; @@ -237,12 +257,22 @@ impl MapContext { ); } + /// Record that the `size` bytes of the buffer at `offset` are now viewed. + /// + /// Return the byte offset within the buffer of the end of the viewed range. + /// + /// # Panics + /// + /// This panics if the given range overlaps with any existing range. fn add(&mut self, offset: BufferAddress, size: Option) -> BufferAddress { let end = match size { Some(s) => offset + s.get(), None => self.initial_range.end, }; assert!(self.initial_range.start <= offset && end <= self.initial_range.end); + // This check is essential for avoiding undefined behavior: it is the + // only thing that ensures that `&mut` references to the buffer's + // contents don't alias anything else. for sub in self.sub_ranges.iter() { assert!( end <= sub.start || offset >= sub.end, @@ -253,6 +283,14 @@ impl MapContext { end } + /// Record that the `size` bytes of the buffer at `offset` are no longer viewed. + /// + /// # Panics + /// + /// This panics if the given range does not exactly match one previously + /// passed to [`add`]. + /// + /// [`add]`: MapContext::add fn remove(&mut self, offset: BufferAddress, size: Option) { let end = match size { Some(s) => offset + s.get(), @@ -274,6 +312,112 @@ impl MapContext { /// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init). /// /// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface). +/// +/// # Mapping buffers +/// +/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*: +/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or +/// `&mut [u8]` slice of bytes. Buffers created with the +/// [`mapped_at_creation`][mac] flag set are also mapped initially. +/// +/// Depending on the hardware, the buffer could be memory shared between CPU and +/// GPU, so that the CPU has direct access to the same bytes the GPU will +/// consult; or it may be ordinary CPU memory, whose contents the system must +/// copy to/from the GPU as needed. This crate's API is designed to work the +/// same way in either case: at any given time, a buffer is either mapped and +/// available to the CPU, or unmapped and ready for use by the GPU, but never +/// both. This makes it impossible for either side to observe changes by the +/// other immediately, and any necessary transfers can be carried out when the +/// buffer transitions from one state to the other. +/// +/// There are two ways to map a buffer: +/// +/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire +/// buffer is mapped when it is created. This is the easiest way to initialize +/// a new buffer. You can set `mapped_at_creation` on any kind of buffer, +/// regardless of its [`usage`] flags. +/// +/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`] +/// flags, then you can call `buffer.slice(range).map_async(mode, callback)` +/// to map the portion of `buffer` given by `range`. This waits for the GPU to +/// finish using the buffer, and invokes `callback` as soon as the buffer is +/// safe for the CPU to access. +/// +/// Once a buffer is mapped: +/// +/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a +/// [`BufferView`], which dereferences to a `&[u8]` that you can use to read +/// the buffer's contents. +/// +/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a +/// [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to +/// read and write the buffer's contents. +/// +/// The given `range` must fall within the mapped portion of the buffer. If you +/// attempt to access overlapping ranges, even for shared access only, these +/// methods panic. +/// +/// For example: +/// +/// ```no_run +/// # let buffer: wgpu::Buffer = todo!(); +/// let slice = buffer.slice(10..20); +/// slice.map_async(wgpu::MapMode::Read, |result| { +/// match result { +/// Ok(()) => { +/// let view = slice.get_mapped_range(); +/// // read data from `view`, which dereferences to `&[u8]` +/// } +/// Err(e) => { +/// // handle mapping error +/// } +/// } +/// }); +/// ``` +/// +/// This example calls `Buffer::slice` to obtain a [`BufferSlice`] referring to +/// the second ten bytes of `buffer`. (To obtain access to the entire buffer, +/// you could call `buffer.slice(..)`.) The code then calls `map_async` to wait +/// for the buffer to be available, and finally calls `get_mapped_range` on the +/// slice to actually get at the bytes. +/// +/// If using `map_async` directly is awkward, you may find it more convenient to +/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`]. +/// However, those each have their own tradeoffs; the asynchronous nature of GPU +/// execution makes it hard to avoid friction altogether. +/// +/// While a buffer is mapped, you must not submit any commands to the GPU that +/// access it. You may record command buffers that use the buffer, but you must +/// not submit such command buffers. +/// +/// When you are done using the buffer on the CPU, you must call +/// [`Buffer::unmap`] to make it available for use by the GPU again. All +/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be +/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic. +/// +/// ## Mapping buffers on the web +/// +/// When compiled to WebAssembly and running in a browser content process, +/// `wgpu` implements its API in terms of the browser's WebGPU implementation. +/// In this context, `wgpu` is further isolated from the GPU: +/// +/// - Depending on the browser's WebGPU implementation, mapping and unmapping +/// buffers probably entails copies between WebAssembly linear memory and the +/// graphics driver's buffers. +/// +/// - All modern web browsers isolate web content in its own sandboxed process, +/// which can only interact with the GPU via interprocess communication (IPC). +/// Although most browsers' IPC systems use shared memory for large data +/// transfers, there will still probably need to be copies into and out of the +/// shared memory buffers. +/// +/// All of these copies contribute to the cost of buffer mapping in this +/// configuration. +/// +/// [`usage`]: BufferDescriptor::usage +/// [mac]: BufferDescriptor::mapped_at_creation +/// [`MAP_READ`]: BufferUsages::MAP_READ +/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE #[derive(Debug)] pub struct Buffer { context: Arc, @@ -287,14 +431,38 @@ pub struct Buffer { #[cfg(send_sync)] static_assertions::assert_impl_all!(Buffer: Send, Sync); -/// Slice into a [`Buffer`]. +/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like. /// -/// It can be created with [`Buffer::slice`]. To use the whole buffer, call with unbounded slice: +/// You can create a `BufferSlice` by calling [`Buffer::slice`]: /// -/// `buffer.slice(..)` +/// ```no_run +/// # let buffer: wgpu::Buffer = todo!(); +/// let slice = buffer.slice(10..20); +/// ``` /// -/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification, -/// an offset and size are specified as arguments to each call working with the [`Buffer`], instead. +/// This returns a slice referring to the second ten bytes of `buffer`. To get a +/// slice of the entire `Buffer`: +/// +/// ```no_run +/// # let buffer: wgpu::Buffer = todo!(); +/// let whole_buffer_slice = buffer.slice(..); +/// ``` +/// +/// A [`BufferSlice`] is nothing more than a reference to the `Buffer` and a +/// starting and ending position. To access the slice's contents on the CPU, you +/// must first [map] the buffer, and then call [`BufferSlice::get_mapped_range`] +/// or [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's +/// contents, which dereferences to a `&[u8]` or `&mut [u8]`. +/// +/// You can also pass buffer slices to methods like +/// [`RenderPass::set_vertex_buffer`] and [`RenderPass::set_index_buffer`] to +/// indicate which data a draw call should consume. +/// +/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU +/// specification, an offset and size are specified as arguments to each call +/// working with the [`Buffer`], instead. +/// +/// [map]: Buffer#mapping-buffers #[derive(Copy, Clone, Debug)] pub struct BufferSlice<'a> { buffer: &'a Buffer, @@ -380,9 +548,19 @@ static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync); /// serves a similar role. pub struct Surface<'window> { context: Arc, - _surface: Option>, + + /// Optionally, keep the source of the handle used for the surface alive. + /// + /// This is useful for platforms where the surface is created from a window and the surface + /// would become invalid when the window is dropped. + _handle_source: Option>, + + /// Wgpu-core surface id. id: ObjectId, - data: Box, + + /// Additional surface data returned by [`DynContext::instance_create_surface`]. + surface_data: Box, + // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`. // It is required to set the attributes of the `SurfaceTexture` in the // `Surface::get_current_texture` method. @@ -399,15 +577,15 @@ impl<'window> fmt::Debug for Surface<'window> { f.debug_struct("Surface") .field("context", &self.context) .field( - "_surface", - &if self._surface.is_some() { + "_handle_source", + &if self._handle_source.is_some() { "Some" } else { "None" }, ) .field("id", &self.id) - .field("data", &self.data) + .field("data", &self.surface_data) .field("config", &self.config) .finish() } @@ -419,7 +597,8 @@ static_assertions::assert_impl_all!(Surface<'_>: Send, Sync); impl Drop for Surface<'_> { fn drop(&mut self) { if !thread::panicking() { - self.context.surface_drop(&self.id, self.data.as_ref()) + self.context + .surface_drop(&self.id, self.surface_data.as_ref()) } } } @@ -647,6 +826,139 @@ impl Drop for ShaderModule { } } +impl ShaderModule { + /// Get the compilation info for the shader module. + pub fn get_compilation_info(&self) -> impl Future + WasmNotSend { + self.context + .shader_get_compilation_info(&self.id, self.data.as_ref()) + } +} + +/// Compilation information for a shader module. +/// +/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo). +/// The source locations use bytes, and index a UTF-8 encoded string. +#[derive(Debug, Clone)] +pub struct CompilationInfo { + /// The messages from the shader compilation process. + pub messages: Vec, +} + +/// A single message from the shader compilation process. +/// +/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage), +/// except that the location uses UTF-8 for all positions. +#[derive(Debug, Clone)] +pub struct CompilationMessage { + /// The text of the message. + pub message: String, + /// The type of the message. + pub message_type: CompilationMessageType, + /// Where in the source code the message points at. + pub location: Option, +} + +/// The type of a compilation message. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompilationMessageType { + /// An error message. + Error, + /// A warning message. + Warning, + /// An informational message. + Info, +} + +/// A human-readable representation for a span, tailored for text source. +/// +/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from +/// the WebGPU specification, except +/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units. +/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans. +/// +/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct SourceLocation { + /// 1-based line number. + pub line_number: u32, + /// 1-based column in code units (in bytes) of the start of the span. + /// Remember to convert accordingly when displaying to the user. + pub line_position: u32, + /// 0-based Offset in code units (in bytes) of the start of the span. + pub offset: u32, + /// Length in code units (in bytes) of the span. + pub length: u32, +} + +#[cfg(all(feature = "wgsl", wgpu_core))] +impl From> for CompilationInfo { + fn from(value: naga::error::ShaderError) -> Self { + CompilationInfo { + messages: vec![CompilationMessage { + message: value.to_string(), + message_type: CompilationMessageType::Error, + location: value.inner.location(&value.source).map(Into::into), + }], + } + } +} +#[cfg(feature = "glsl")] +impl From> for CompilationInfo { + fn from(value: naga::error::ShaderError) -> Self { + let messages = value + .inner + .errors + .into_iter() + .map(|err| CompilationMessage { + message: err.to_string(), + message_type: CompilationMessageType::Error, + location: err.location(&value.source).map(Into::into), + }) + .collect(); + CompilationInfo { messages } + } +} + +#[cfg(feature = "spirv")] +impl From> for CompilationInfo { + fn from(value: naga::error::ShaderError) -> Self { + CompilationInfo { + messages: vec![CompilationMessage { + message: value.to_string(), + message_type: CompilationMessageType::Error, + location: None, + }], + } + } +} + +#[cfg(any(wgpu_core, naga))] +impl From>> + for CompilationInfo +{ + fn from(value: naga::error::ShaderError>) -> Self { + CompilationInfo { + messages: vec![CompilationMessage { + message: value.to_string(), + message_type: CompilationMessageType::Error, + location: value.inner.location(&value.source).map(Into::into), + }], + } + } +} + +#[cfg(any(wgpu_core, naga))] +impl From for SourceLocation { + fn from(value: naga::SourceLocation) -> Self { + SourceLocation { + length: value.length, + offset: value.offset, + line_number: value.line_number, + line_position: value.line_position, + } + } +} + /// Source of a shader module. /// /// The source will be parsed and validated. @@ -1484,14 +1796,10 @@ pub struct VertexState<'a> { /// The name of the entry point in the compiled shader. There must be a function with this name /// in the shader. pub entry_point: &'a str, - /// Specifies the values of pipeline-overridable constants in the shader module. - /// - /// If an `@id` attribute was specified on the declaration, - /// the key must be the pipeline constant ID as a decimal ASCII number; if not, - /// the key must be the constant's identifier name. + /// Advanced options for when this pipeline is compiled /// - /// The value may represent any of WGSL's concrete scalar types. - pub constants: &'a HashMap, + /// This implements `Default`, and for most users can be set to `Default::default()` + pub compilation_options: PipelineCompilationOptions<'a>, /// The format of any vertex buffers used with this pipeline. pub buffers: &'a [VertexBufferLayout<'a>], } @@ -1511,14 +1819,10 @@ pub struct FragmentState<'a> { /// The name of the entry point in the compiled shader. There must be a function with this name /// in the shader. pub entry_point: &'a str, - /// Specifies the values of pipeline-overridable constants in the shader module. - /// - /// If an `@id` attribute was specified on the declaration, - /// the key must be the pipeline constant ID as a decimal ASCII number; if not, - /// the key must be the constant's identifier name. + /// Advanced options for when this pipeline is compiled /// - /// The value may represent any of WGSL's concrete scalar types. - pub constants: &'a HashMap, + /// This implements `Default`, and for most users can be set to `Default::default()` + pub compilation_options: PipelineCompilationOptions<'a>, /// The color state of the render targets. pub targets: &'a [Option], } @@ -1591,6 +1895,41 @@ pub struct ComputePassDescriptor<'a> { #[cfg(send_sync)] static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync); +#[derive(Clone, Debug)] +/// Advanced options for use when a pipeline is compiled +/// +/// This implements `Default`, and for most users can be set to `Default::default()` +pub struct PipelineCompilationOptions<'a> { + /// Specifies the values of pipeline-overridable constants in the shader module. + /// + /// If an `@id` attribute was specified on the declaration, + /// the key must be the pipeline constant ID as a decimal ASCII number; if not, + /// the key must be the constant's identifier name. + /// + /// The value may represent any of WGSL's concrete scalar types. + pub constants: &'a HashMap, + /// Whether workgroup scoped memory will be initialized with zero values for this stage. + /// + /// This is required by the WebGPU spec, but may have overhead which can be avoided + /// for cross-platform applications + pub zero_initialize_workgroup_memory: bool, +} + +impl<'a> Default for PipelineCompilationOptions<'a> { + fn default() -> Self { + // HashMap doesn't have a const constructor, due to the use of RandomState + // This does introduce some synchronisation costs, but these should be minor, + // and might be cheaper than the alternative of getting new random state + static DEFAULT_CONSTANTS: std::sync::OnceLock> = + std::sync::OnceLock::new(); + let constants = DEFAULT_CONSTANTS.get_or_init(Default::default); + Self { + constants, + zero_initialize_workgroup_memory: true, + } + } +} + /// Describes a compute pipeline. /// /// For use with [`Device::create_compute_pipeline`]. @@ -1608,14 +1947,10 @@ pub struct ComputePipelineDescriptor<'a> { /// The name of the entry point in the compiled shader. There must be a function with this name /// and no return value in the shader. pub entry_point: &'a str, - /// Specifies the values of pipeline-overridable constants in the shader module. - /// - /// If an `@id` attribute was specified on the declaration, - /// the key must be the pipeline constant ID as a decimal ASCII number; if not, - /// the key must be the constant's identifier name. + /// Advanced options for when this pipeline is compiled /// - /// The value may represent any of WGSL's concrete scalar types. - pub constants: &'a HashMap, + /// This implements `Default`, and for most users can be set to `Default::default()` + pub compilation_options: PipelineCompilationOptions<'a>, } #[cfg(send_sync)] static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync); @@ -1983,6 +2318,8 @@ impl Instance { /// Creates a new surface targeting a given window/canvas/surface/etc.. /// + /// Internally, this creates surfaces for all backends that are enabled for this instance. + /// /// See [`SurfaceTarget`] for what targets are supported. /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants. /// @@ -1993,7 +2330,7 @@ impl Instance { target: impl Into>, ) -> Result, CreateSurfaceError> { // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window. - let handle_origin; + let handle_source; let target = target.into(); let mut surface = match target { @@ -2003,14 +2340,14 @@ impl Instance { inner: CreateSurfaceErrorKind::RawHandle(e), })?, ); - handle_origin = Some(window); + handle_source = Some(window); surface }?, #[cfg(any(webgpu, webgl))] SurfaceTarget::Canvas(canvas) => { - handle_origin = None; + handle_source = None; let value: &wasm_bindgen::JsValue = &canvas; let obj = std::ptr::NonNull::from(value).cast(); @@ -2029,7 +2366,7 @@ impl Instance { #[cfg(any(webgpu, webgl))] SurfaceTarget::OffscreenCanvas(canvas) => { - handle_origin = None; + handle_source = None; let value: &wasm_bindgen::JsValue = &canvas; let obj = std::ptr::NonNull::from(value).cast(); @@ -2048,13 +2385,15 @@ impl Instance { } }; - surface._surface = handle_origin; + surface._handle_source = handle_source; Ok(surface) } /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target. /// + /// Internally, this creates surfaces for all backends that are enabled for this instance. + /// /// See [`SurfaceTargetUnsafe`] for what targets are supported. /// See [`Instance::create_surface`] for surface creation with safe target variants. /// @@ -2069,9 +2408,9 @@ impl Instance { Ok(Surface { context: Arc::clone(&self.context), - _surface: None, + _handle_source: None, id, - data, + surface_data: data, config: Mutex::new(None), }) } @@ -2245,7 +2584,7 @@ impl Adapter { &self.id, self.data.as_ref(), &surface.id, - surface.data.as_ref(), + surface.surface_data.as_ref(), ) } @@ -2933,6 +3272,18 @@ fn range_to_offset_size>( } /// Read only view into a mapped buffer. +/// +/// To get a `BufferView`, first [map] the buffer, and then +/// call `buffer.slice(range).get_mapped_range()`. +/// +/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust +/// slice methods to access the buffer's contents. It also implements +/// `AsRef<[u8]>`, if that's more convenient. +/// +/// If you try to create overlapping views of a buffer, mutable or +/// otherwise, `get_mapped_range` will panic. +/// +/// [map]: Buffer#mapping-buffers #[derive(Debug)] pub struct BufferView<'a> { slice: BufferSlice<'a>, @@ -2941,8 +3292,20 @@ pub struct BufferView<'a> { /// Write only view into mapped buffer. /// +/// To get a `BufferViewMut`, first [map] the buffer, and then +/// call `buffer.slice(range).get_mapped_range_mut()`. +/// +/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual +/// Rust slice methods to access the buffer's contents. It also implements +/// `AsMut<[u8]>`, if that's more convenient. +/// /// It is possible to read the buffer using this view, but doing so is not /// recommended, as it is likely to be slow. +/// +/// If you try to create overlapping views of a buffer, mutable or +/// otherwise, `get_mapped_range_mut` will panic. +/// +/// [map]: Buffer#mapping-buffers #[derive(Debug)] pub struct BufferViewMut<'a> { slice: BufferSlice<'a>, @@ -4644,11 +5007,24 @@ impl<'a> Drop for QueueWriteBufferView<'a> { impl Queue { /// Schedule a data write into `buffer` starting at `offset`. /// - /// This method is intended to have low performance costs. - /// As such, the write is not immediately submitted, and instead enqueued - /// internally to happen at the start of the next `submit()` call. - /// /// This method fails if `data` overruns the size of `buffer` starting at `offset`. + /// + /// This does *not* submit the transfer to the GPU immediately. Calls to + /// `write_buffer` begin execution only on the next call to + /// [`Queue::submit`]. To get a set of scheduled transfers started + /// immediately, it's fine to call `submit` with no command buffers at all: + /// + /// ```no_run + /// # let queue: wgpu::Queue = todo!(); + /// queue.submit([]); + /// ``` + /// + /// However, `data` will be immediately copied into staging memory, so the + /// caller may discard it any time after this call completes. + /// + /// If possible, consider using [`Queue::write_buffer_with`] instead. That + /// method avoids an intermediate copy and is often able to transfer data + /// more efficiently than this one. pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) { DynContext::queue_write_buffer( &*self.context, @@ -4661,14 +5037,32 @@ impl Queue { ) } - /// Schedule a data write into `buffer` starting at `offset` via the returned - /// [`QueueWriteBufferView`]. + /// Write to a buffer via a directly mapped staging buffer. /// - /// Reading from this buffer is slow and will not yield the actual contents of the buffer. + /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy + /// of its contents into `buffer` at `offset`. The returned view + /// dereferences to a `size`-byte long `&mut [u8]`, in which you should + /// store the data you would like written to `buffer`. /// - /// This method is intended to have low performance costs. - /// As such, the write is not immediately submitted, and instead enqueued - /// internally to happen at the start of the next `submit()` call. + /// This method may perform transfers faster than [`Queue::write_buffer`], + /// because the returned [`QueueWriteBufferView`] is actually the staging + /// buffer for the write, mapped into the caller's address space. Writing + /// your data directly into this staging buffer avoids the temporary + /// CPU-side buffer needed by `write_buffer`. + /// + /// Reading from the returned view is slow, and will not yield the current + /// contents of `buffer`. + /// + /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the + /// transfer to the GPU immediately. The transfer begins only on the next + /// call to [`Queue::submit`] after the view is dropped. To get a set of + /// scheduled transfers started immediately, it's fine to call `submit` with + /// no command buffers at all: + /// + /// ```no_run + /// # let queue: wgpu::Queue = todo!(); + /// queue.submit([]); + /// ``` /// /// This method fails if `size` is greater than the size of `buffer` starting at `offset`. #[must_use] @@ -4712,13 +5106,20 @@ impl Queue { /// texture (coordinate offset, mip level) that will be overwritten. /// * `size` is the size, in texels, of the region to be written. /// - /// This method is intended to have low performance costs. - /// As such, the write is not immediately submitted, and instead enqueued - /// internally to happen at the start of the next `submit()` call. - /// However, `data` will be immediately copied into staging memory; so the caller may - /// discard it any time after this call completes. - /// /// This method fails if `size` overruns the size of `texture`, or if `data` is too short. + /// + /// This does *not* submit the transfer to the GPU immediately. Calls to + /// `write_texture` begin execution only on the next call to + /// [`Queue::submit`]. To get a set of scheduled transfers started + /// immediately, it's fine to call `submit` with no command buffers at all: + /// + /// ```no_run + /// # let queue: wgpu::Queue = todo!(); + /// queue.submit([]); + /// ``` + /// + /// However, `data` will be immediately copied into staging memory, so the + /// caller may discard it any time after this call completes. pub fn write_texture( &self, texture: ImageCopyTexture<'_>, @@ -4849,7 +5250,7 @@ impl Surface<'_> { DynContext::surface_get_capabilities( &*self.context, &self.id, - self.data.as_ref(), + self.surface_data.as_ref(), &adapter.id, adapter.data.as_ref(), ) @@ -4888,7 +5289,7 @@ impl Surface<'_> { DynContext::surface_configure( &*self.context, &self.id, - self.data.as_ref(), + self.surface_data.as_ref(), &device.id, device.data.as_ref(), config, @@ -4907,8 +5308,11 @@ impl Surface<'_> { /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated, /// recreating the swapchain will panic. pub fn get_current_texture(&self) -> Result { - let (texture_id, texture_data, status, detail) = - DynContext::surface_get_current_texture(&*self.context, &self.id, self.data.as_ref()); + let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture( + &*self.context, + &self.id, + self.surface_data.as_ref(), + ); let suboptimal = match status { SurfaceStatus::Good => false, @@ -4971,7 +5375,7 @@ impl Surface<'_> { .downcast_ref::() .map(|ctx| unsafe { ctx.surface_as_hal::( - self.data.downcast_ref().unwrap(), + self.surface_data.downcast_ref().unwrap(), hal_surface_callback, ) })