diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs index 0ab3c40262..34ca29c0ea 100644 --- a/deno_webgpu/pipeline.rs +++ b/deno_webgpu/pipeline.rs @@ -112,6 +112,7 @@ pub fn op_webgpu_create_compute_pipeline( entry_point: compute.entry_point.map(Cow::from), constants: Cow::Owned(compute.constants.unwrap_or_default()), zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, cache: None, }; @@ -344,6 +345,7 @@ pub fn op_webgpu_create_render_pipeline( constants: Cow::Owned(fragment.constants.unwrap_or_default()), // Required to be true for WebGPU zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, targets: Cow::Owned(fragment.targets), }) @@ -369,6 +371,7 @@ pub fn op_webgpu_create_render_pipeline( constants: Cow::Owned(args.vertex.constants.unwrap_or_default()), // Required to be true for WebGPU zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, buffers: Cow::Owned(vertex_buffers), }, diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index fbeaa4cc8d..a070b19943 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -211,6 +211,8 @@ pub struct Options { pub bounds_check_policies: index::BoundsCheckPolicies, /// Should workgroup variables be zero initialized (by polyfilling)? pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out. + pub enable_loop_ub_checking: bool, } impl Default for Options { @@ -223,6 +225,7 @@ impl Default for Options { fake_missing_bindings: true, bounds_check_policies: index::BoundsCheckPolicies::default(), zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, } } } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 19b0263b30..65d20a89a1 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -600,6 +600,9 @@ struct ExpressionContext<'a> { /// accesses. These may need to be cached in temporary variables. See /// `index::find_checked_indexes` for details. guarded_indices: HandleSet, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } impl<'a> ExpressionContext<'a> { @@ -3028,8 +3031,7 @@ impl Writer { ref continuing, break_if, } => { - // We only emit the macro if the index policy is not checked. - if context.expression.policies.index != index::BoundsCheckPolicy::Unchecked { + if context.expression.enable_loop_ub_checking { self.emit_loop_reachable_macro()?; } if !continuing.is_empty() || break_if.is_some() { @@ -4868,6 +4870,7 @@ template module, mod_info, pipeline_options, + enable_loop_ub_checking: options.enable_loop_ub_checking, }, result_struct: None, }; @@ -5768,6 +5771,7 @@ template module, mod_info, pipeline_options, + enable_loop_ub_checking: options.enable_loop_ub_checking, }, result_struct: Some(&stage_out_name), }; diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs index f6a78db94c..d0a7f73e2a 100644 --- a/naga/src/proc/index.rs +++ b/naga/src/proc/index.rs @@ -67,8 +67,6 @@ pub enum BoundsCheckPolicy { pub struct BoundsCheckPolicies { /// How should the generated code handle array, vector, or matrix indices /// that are out of range? - /// - /// On Metal, this policy also dictates how loops are checked for UB. #[cfg_attr(feature = "deserialize", serde(default))] pub index: BoundsCheckPolicy, diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index b6ad2354c3..d87bd1bc8d 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1266,6 +1266,7 @@ impl Global { .vertex .stage .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.vertex.stage.enable_loop_ub_checking, }; ResolvedVertexState { stage, @@ -1294,6 +1295,7 @@ impl Global { .vertex .stage .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.vertex.stage.enable_loop_ub_checking, }; Some(ResolvedFragmentState { stage, @@ -1492,6 +1494,7 @@ impl Global { entry_point: desc.stage.entry_point.clone(), constants: desc.stage.constants.clone(), zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.stage.enable_loop_ub_checking, }; let desc = ResolvedComputePipelineDescriptor { diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index afbf73bc03..ac1ddaf4d3 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2829,6 +2829,7 @@ impl Device { entry_point: final_entry_point_name.as_ref(), constants: desc.stage.constants.as_ref(), zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.stage.enable_loop_ub_checking, }, cache: cache.as_ref().map(|it| it.raw()), }; @@ -3250,6 +3251,7 @@ impl Device { entry_point: &vertex_entry_point_name, constants: stage_desc.constants.as_ref(), zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory, + enable_loop_ub_checking: stage_desc.enable_loop_ub_checking, } }; @@ -3306,6 +3308,7 @@ impl Device { zero_initialize_workgroup_memory: fragment_state .stage .zero_initialize_workgroup_memory, + enable_loop_ub_checking: fragment_state.stage.enable_loop_ub_checking, }) } None => None, diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index 35a95f8bbf..5976ea7f80 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -204,6 +204,7 @@ impl IndirectValidation { entry_point: "main", constants: &Default::default(), zero_initialize_workgroup_memory: false, + enable_loop_ub_checking: true, }, cache: None, }; diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs index 01ceabf669..5d2f3a8434 100644 --- a/wgpu-core/src/pipeline.rs +++ b/wgpu-core/src/pipeline.rs @@ -145,6 +145,9 @@ pub struct ProgrammableStageDescriptor<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } /// Describes a programmable pipeline stage. @@ -172,6 +175,9 @@ pub struct ResolvedProgrammableStageDescriptor<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } /// Number of implicit bind groups derived at pipeline creation. diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 8ab7f1cb47..010c74aa8e 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -259,6 +259,7 @@ impl Example { entry_point: "vs_main", constants: &constants, zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, vertex_buffers: &[], fragment_stage: Some(hal::ProgrammableStage { @@ -266,6 +267,7 @@ impl Example { entry_point: "fs_main", constants: &constants, zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }), primitive: wgt::PrimitiveState { topology: wgt::PrimitiveTopology::TriangleStrip, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 4eedfe7817..de4e5a9b41 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -400,6 +400,7 @@ impl Example { entry_point: "main", constants: &Default::default(), zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, cache: None, }) diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs index 5509d7cce6..336ca3de5a 100644 --- a/wgpu-hal/src/dynamic/mod.rs +++ b/wgpu-hal/src/dynamic/mod.rs @@ -146,6 +146,7 @@ impl<'a> ProgrammableStage<'a, dyn DynShaderModule> { entry_point: self.entry_point, constants: self.constants, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, + enable_loop_ub_checking: self.enable_loop_ub_checking, } } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 0cddb69976..1daba70194 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -2138,6 +2138,9 @@ pub struct ProgrammableStage<'a, M: DynShaderModule + ?Sized> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } impl Clone for ProgrammableStage<'_, M> { @@ -2147,6 +2150,7 @@ impl Clone for ProgrammableStage<'_, M> { entry_point: self.entry_point, constants: self.constants, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, + enable_loop_ub_checking: self.enable_loop_ub_checking, } } } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 4cc8ef0eb0..fc67044455 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -150,6 +150,7 @@ impl super::Device { binding_array: naga::proc::BoundsCheckPolicy::Unchecked, }, zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, + enable_loop_ub_checking: stage.enable_loop_ub_checking, }; let pipeline_options = naga::back::msl::PipelineOptions { diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs index 697507bca2..3187ecb5a4 100644 --- a/wgpu/src/api/common_pipeline.rs +++ b/wgpu/src/api/common_pipeline.rs @@ -20,6 +20,9 @@ pub struct PipelineCompilationOptions<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } impl<'a> Default for PipelineCompilationOptions<'a> { @@ -33,6 +36,7 @@ impl<'a> Default for PipelineCompilationOptions<'a> { Self { constants, zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index befec4bd78..174614f2bf 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1092,6 +1092,10 @@ impl crate::Context for ContextWgpuCore { .vertex .compilation_options .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc + .vertex + .compilation_options + .enable_loop_ub_checking, }, buffers: Borrowed(&vertex_buffers), }, @@ -1106,6 +1110,7 @@ impl crate::Context for ContextWgpuCore { zero_initialize_workgroup_memory: frag .compilation_options .zero_initialize_workgroup_memory, + enable_loop_ub_checking: frag.compilation_options.enable_loop_ub_checking, }, targets: Borrowed(frag.targets), }), @@ -1150,6 +1155,7 @@ impl crate::Context for ContextWgpuCore { zero_initialize_workgroup_memory: desc .compilation_options .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.compilation_options.enable_loop_ub_checking, }, cache: desc.cache.map(downcast_pipeline_cache).copied(), };