diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index 0ab3c40262..34ca29c0ea 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -112,6 +112,7 @@ pub fn op_webgpu_create_compute_pipeline(
             entry_point: compute.entry_point.map(Cow::from),
             constants: Cow::Owned(compute.constants.unwrap_or_default()),
             zero_initialize_workgroup_memory: true,
+            enable_loop_ub_checking: true,
         },
         cache: None,
     };
@@ -344,6 +345,7 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(fragment.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
+                enable_loop_ub_checking: true,
             },
             targets: Cow::Owned(fragment.targets),
         })
@@ -369,6 +371,7 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
+                enable_loop_ub_checking: true,
             },
             buffers: Cow::Owned(vertex_buffers),
         },
diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs
index fbeaa4cc8d..a070b19943 100644
--- a/naga/src/back/msl/mod.rs
+++ b/naga/src/back/msl/mod.rs
@@ -211,6 +211,8 @@ pub struct Options {
     pub bounds_check_policies: index::BoundsCheckPolicies,
     /// Should workgroup variables be zero initialized (by polyfilling)?
     pub zero_initialize_workgroup_memory: bool,
+    /// Specifies whether shader loops are forcibly prevented from being optimized out.
+    pub enable_loop_ub_checking: bool,
 }
 
 impl Default for Options {
@@ -223,6 +225,7 @@ impl Default for Options {
             fake_missing_bindings: true,
             bounds_check_policies: index::BoundsCheckPolicies::default(),
             zero_initialize_workgroup_memory: true,
+            enable_loop_ub_checking: true,
         }
     }
 }
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index 19b0263b30..65d20a89a1 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -600,6 +600,9 @@ struct ExpressionContext<'a> {
     /// accesses. These may need to be cached in temporary variables. See
     /// `index::find_checked_indexes` for details.
     guarded_indices: HandleSet<crate::Expression>,
+    /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead
+    /// to UB on Metal. Loop checking may have significant overhead.
+    pub enable_loop_ub_checking: bool,
 }
 
 impl<'a> ExpressionContext<'a> {
@@ -3028,8 +3031,7 @@ impl<W: Write> Writer<W> {
                     ref continuing,
                     break_if,
                 } => {
-                    // We only emit the macro if the index policy is not checked.
-                    if context.expression.policies.index != index::BoundsCheckPolicy::Unchecked {
+                    if context.expression.enable_loop_ub_checking {
                         self.emit_loop_reachable_macro()?;
                     }
                     if !continuing.is_empty() || break_if.is_some() {
@@ -4868,6 +4870,7 @@ template <typename A>
                     module,
                     mod_info,
                     pipeline_options,
+                    enable_loop_ub_checking: options.enable_loop_ub_checking,
                 },
                 result_struct: None,
             };
@@ -5768,6 +5771,7 @@ template <typename A>
                     module,
                     mod_info,
                     pipeline_options,
+                    enable_loop_ub_checking: options.enable_loop_ub_checking,
                 },
                 result_struct: Some(&stage_out_name),
             };
diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs
index f6a78db94c..d0a7f73e2a 100644
--- a/naga/src/proc/index.rs
+++ b/naga/src/proc/index.rs
@@ -67,8 +67,6 @@ pub enum BoundsCheckPolicy {
 pub struct BoundsCheckPolicies {
     /// How should the generated code handle array, vector, or matrix indices
     /// that are out of range?
-    ///
-    /// On Metal, this policy also dictates how loops are checked for UB.
     #[cfg_attr(feature = "deserialize", serde(default))]
     pub index: BoundsCheckPolicy,
 
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index b6ad2354c3..d87bd1bc8d 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1266,6 +1266,7 @@ impl Global {
                         .vertex
                         .stage
                         .zero_initialize_workgroup_memory,
+                    enable_loop_ub_checking: desc.vertex.stage.enable_loop_ub_checking,
                 };
                 ResolvedVertexState {
                     stage,
@@ -1294,6 +1295,7 @@ impl Global {
                         .vertex
                         .stage
                         .zero_initialize_workgroup_memory,
+                    enable_loop_ub_checking: desc.vertex.stage.enable_loop_ub_checking,
                 };
                 Some(ResolvedFragmentState {
                     stage,
@@ -1492,6 +1494,7 @@ impl Global {
                 entry_point: desc.stage.entry_point.clone(),
                 constants: desc.stage.constants.clone(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
+                enable_loop_ub_checking: desc.stage.enable_loop_ub_checking,
             };
 
             let desc = ResolvedComputePipelineDescriptor {
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index afbf73bc03..ac1ddaf4d3 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2829,6 +2829,7 @@ impl Device {
                 entry_point: final_entry_point_name.as_ref(),
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
+                enable_loop_ub_checking: desc.stage.enable_loop_ub_checking,
             },
             cache: cache.as_ref().map(|it| it.raw()),
         };
@@ -3250,6 +3251,7 @@ impl Device {
                 entry_point: &vertex_entry_point_name,
                 constants: stage_desc.constants.as_ref(),
                 zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
+                enable_loop_ub_checking: stage_desc.enable_loop_ub_checking,
             }
         };
 
@@ -3306,6 +3308,7 @@ impl Device {
                     zero_initialize_workgroup_memory: fragment_state
                         .stage
                         .zero_initialize_workgroup_memory,
+                    enable_loop_ub_checking: fragment_state.stage.enable_loop_ub_checking,
                 })
             }
             None => None,
diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs
index 35a95f8bbf..5976ea7f80 100644
--- a/wgpu-core/src/indirect_validation.rs
+++ b/wgpu-core/src/indirect_validation.rs
@@ -204,6 +204,7 @@ impl IndirectValidation {
                 entry_point: "main",
                 constants: &Default::default(),
                 zero_initialize_workgroup_memory: false,
+                enable_loop_ub_checking: true,
             },
             cache: None,
         };
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 01ceabf669..5d2f3a8434 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -145,6 +145,9 @@ pub struct ProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead
+    /// to UB on Metal. Loop checking may have significant overhead.
+    pub enable_loop_ub_checking: bool,
 }
 
 /// Describes a programmable pipeline stage.
@@ -172,6 +175,9 @@ pub struct ResolvedProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead
+    /// to UB on Metal. Loop checking may have significant overhead.
+    pub enable_loop_ub_checking: bool,
 }
 
 /// Number of implicit bind groups derived at pipeline creation.
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index 8ab7f1cb47..010c74aa8e 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -259,6 +259,7 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "vs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
+                enable_loop_ub_checking: true,
             },
             vertex_buffers: &[],
             fragment_stage: Some(hal::ProgrammableStage {
@@ -266,6 +267,7 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "fs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
+                enable_loop_ub_checking: true,
             }),
             primitive: wgt::PrimitiveState {
                 topology: wgt::PrimitiveTopology::TriangleStrip,
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index 4eedfe7817..de4e5a9b41 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -400,6 +400,7 @@ impl<A: hal::Api> Example<A> {
                     entry_point: "main",
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
+                    enable_loop_ub_checking: true,
                 },
                 cache: None,
             })
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 5509d7cce6..336ca3de5a 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -146,6 +146,7 @@ impl<'a> ProgrammableStage<'a, dyn DynShaderModule> {
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
+            enable_loop_ub_checking: self.enable_loop_ub_checking,
         }
     }
 }
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 0cddb69976..1daba70194 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -2138,6 +2138,9 @@ pub struct ProgrammableStage<'a, M: DynShaderModule + ?Sized> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead
+    /// to UB on Metal. Loop checking may have significant overhead.
+    pub enable_loop_ub_checking: bool,
 }
 
 impl<M: DynShaderModule + ?Sized> Clone for ProgrammableStage<'_, M> {
@@ -2147,6 +2150,7 @@ impl<M: DynShaderModule + ?Sized> Clone for ProgrammableStage<'_, M> {
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
+            enable_loop_ub_checking: self.enable_loop_ub_checking,
         }
     }
 }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 4cc8ef0eb0..fc67044455 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -150,6 +150,7 @@ impl super::Device {
                 binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
             },
             zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory,
+            enable_loop_ub_checking: stage.enable_loop_ub_checking,
         };
 
         let pipeline_options = naga::back::msl::PipelineOptions {
diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs
index 697507bca2..3187ecb5a4 100644
--- a/wgpu/src/api/common_pipeline.rs
+++ b/wgpu/src/api/common_pipeline.rs
@@ -20,6 +20,9 @@ pub struct PipelineCompilationOptions<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead
+    /// to UB on Metal. Loop checking may have significant overhead.
+    pub enable_loop_ub_checking: bool,
 }
 
 impl<'a> Default for PipelineCompilationOptions<'a> {
@@ -33,6 +36,7 @@ impl<'a> Default for PipelineCompilationOptions<'a> {
         Self {
             constants,
             zero_initialize_workgroup_memory: true,
+            enable_loop_ub_checking: true,
         }
     }
 }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index befec4bd78..174614f2bf 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1092,6 +1092,10 @@ impl crate::Context for ContextWgpuCore {
                         .vertex
                         .compilation_options
                         .zero_initialize_workgroup_memory,
+                    enable_loop_ub_checking: desc
+                        .vertex
+                        .compilation_options
+                        .enable_loop_ub_checking,
                 },
                 buffers: Borrowed(&vertex_buffers),
             },
@@ -1106,6 +1110,7 @@ impl crate::Context for ContextWgpuCore {
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
                         .zero_initialize_workgroup_memory,
+                    enable_loop_ub_checking: frag.compilation_options.enable_loop_ub_checking,
                 },
                 targets: Borrowed(frag.targets),
             }),
@@ -1150,6 +1155,7 @@ impl crate::Context for ContextWgpuCore {
                 zero_initialize_workgroup_memory: desc
                     .compilation_options
                     .zero_initialize_workgroup_memory,
+                enable_loop_ub_checking: desc.compilation_options.enable_loop_ub_checking,
             },
             cache: desc.cache.map(downcast_pipeline_cache).copied(),
         };