diff --git a/Cargo.lock b/Cargo.lock
index 5c781ca2a44..84c28f3c333 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1757,6 +1757,20 @@ dependencies = [
  "crunchy",
 ]
 
+[[package]]
+name = "half-2"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864"
+dependencies = [
+ "arbitrary",
+ "bytemuck",
+ "cfg-if",
+ "crunchy",
+ "num-traits",
+ "serde",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -2137,6 +2151,12 @@ dependencies = [
  "windows-targets 0.48.5",
 ]
 
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
 [[package]]
 name = "libredox"
 version = "0.1.3"
@@ -2337,12 +2357,14 @@ dependencies = [
  "codespan-reporting",
  "diff",
  "env_logger",
+ "half-2",
  "hashbrown",
  "hexf-parse",
  "hlsl-snapshots",
  "indexmap",
  "itertools 0.13.0",
  "log",
+ "num-traits",
  "petgraph 0.7.1",
  "pp-rs",
  "ron",
@@ -2354,6 +2376,7 @@ dependencies = [
  "termcolor",
  "thiserror 2.0.11",
  "unicode-ident",
+ "walkdir",
 ]
 
 [[package]]
@@ -2516,6 +2539,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
  "autocfg",
+ "libm",
 ]
 
 [[package]]
@@ -4703,6 +4727,7 @@ dependencies = [
  "env_logger",
  "futures-lite",
  "glam",
+ "half-2",
  "image",
  "itertools 0.13.0",
  "js-sys",
diff --git a/Cargo.toml b/Cargo.toml
index 926d3bd53ba..80fa8b32e8e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -96,6 +96,8 @@ flume = "0.11"
 futures-lite = "2"
 getrandom = "0.2"
 glam = "0.29"
+# TODO: Use `half` directly again after <https://github.com/starkat99/half-rs/pull/111> is released upstream.
+half = { package = "half-2", version = "2.4.1" }
 hashbrown = { version = "0.14.5", default-features = false, features = [
     "ahash",
     "inline-more",
@@ -142,6 +144,7 @@ strum = { version = "0.26.3", default-features = false, features = ["derive"] }
 trybuild = "1"
 tracy-client = "0.17"
 thiserror = { version = "2", default-features = false }
+walkdir = "2"
 winit = { version = "0.29", features = ["android-native-activity"] }
 
 # Metal dependencies
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 4458405bbfd..a84ab9d2ba7 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -41,10 +41,17 @@ msl-out = []
 ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
 msl-out-if-target-apple = []
 
-serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"]
+serialize = [
+    "dep:serde",
+    "bitflags/serde",
+    "half/serde",
+    "hashbrown/serde",
+    "indexmap/serde",
+]
 deserialize = [
     "dep:serde",
     "bitflags/serde",
+    "half/serde",
     "hashbrown/serde",
     "indexmap/serde",
 ]
@@ -84,9 +91,11 @@ termcolor = { version = "1.4.1" }
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
 hashbrown.workspace = true
+half = { workspace = true, features = ["arbitrary", "num-traits"] }
 rustc-hash.workspace = true
 indexmap.workspace = true
 log = "0.4"
+num-traits = "0.2"
 strum = { workspace = true, optional = true }
 spirv = { version = "0.3", optional = true }
 thiserror.workspace = true
@@ -118,3 +127,4 @@ ron = "0.8.0"
 rspirv = { version = "0.11", git = "https://github.com/gfx-rs/rspirv", rev = "b969f175d5663258b4891e44b76c1544da9661ab" }
 serde = { workspace = true, features = ["default", "derive"] }
 spirv = { version = "0.3", features = ["deserialize"] }
+walkdir.workspace = true
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index 0798fac82d1..3c93084a2f4 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -2669,6 +2669,9 @@ impl<'a, W: Write> Writer<'a, W> {
                     // decimal part even it's zero which is needed for a valid glsl float constant
                     crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?,
                     crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                    crate::Literal::F16(_) => {
+                        return Err(Error::Custom("GLSL has no 16-bit float type".into()));
+                    }
                     // Unsigned integers need a `u` at the end
                     //
                     // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index a8283388ceb..eac1cae5749 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -2634,6 +2634,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 // decimal part even it's zero
                 crate::Literal::F64(value) => write!(self.out, "{value:?}L")?,
                 crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                crate::Literal::F16(value) => write!(self.out, "{value:?}h")?,
                 crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                 // HLSL has no suffix for explicit i32 literals, but not using any suffix
                 // makes the type ambiguous which prevents overload resolution from
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index bc77a7f6d0c..0fe26270f82 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -5,6 +5,8 @@ use crate::{
     proc::{self, index, ExpressionKindTracker, NameKey, TypeResolution},
     valid, FastHashMap, FastHashSet,
 };
+use half::f16;
+use num_traits::real::Real;
 #[cfg(test)]
 use std::ptr;
 use std::{
@@ -174,9 +176,11 @@ impl Display for TypeContext<'_> {
                 write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
             }
             crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
-            crate::TypeInner::Matrix { columns, rows, .. } => {
-                put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
-            }
+            crate::TypeInner::Matrix {
+                columns,
+                rows,
+                scalar,
+            } => put_numeric_type(out, scalar, &[rows, columns]),
             crate::TypeInner::Pointer { base, space } => {
                 let sub = Self {
                     handle: base,
@@ -413,8 +417,12 @@ impl crate::Scalar {
         match self {
             Self {
                 kind: Sk::Float,
-                width: _,
+                width: 4,
             } => "float",
+            Self {
+                kind: Sk::Float,
+                width: 2,
+            } => "half",
             Self {
                 kind: Sk::Sint,
                 width: 4,
@@ -471,7 +479,7 @@ fn should_pack_struct_member(
     match *ty_inner {
         crate::TypeInner::Vector {
             size: crate::VectorSize::Tri,
-            scalar: scalar @ crate::Scalar { width: 4, .. },
+            scalar: scalar @ crate::Scalar { width: 4 | 2, .. },
         } if is_tight => Some(scalar),
         _ => None,
     }
@@ -1446,6 +1454,21 @@ impl<W: Write> Writer<W> {
                 crate::Literal::F64(_) => {
                     return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
                 }
+                crate::Literal::F16(value) => {
+                    if value.is_infinite() {
+                        let sign = if value.is_sign_negative() { "-" } else { "" };
+                        write!(self.out, "{sign}INFINITY")?;
+                    } else if value.is_nan() {
+                        write!(self.out, "NAN")?;
+                    } else {
+                        let suffix = if value.fract() == f16::from_f32(0.0) {
+                            ".0h"
+                        } else {
+                            "h"
+                        };
+                        write!(self.out, "{value}{suffix}")?;
+                    }
+                }
                 crate::Literal::F32(value) => {
                     if value.is_infinite() {
                         let sign = if value.is_sign_negative() { "-" } else { "" };
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 6b26c1c2aac..9c85608b706 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -8,6 +8,7 @@ use super::{
     WriterFlags,
 };
 use crate::{arena::Handle, proc::index::GuardedIndex, Statement};
+use arrayvec::ArrayVec;
 use spirv::Word;
 
 fn get_dimension(type_inner: &crate::TypeInner) -> Dimension {
@@ -1476,159 +1477,7 @@ impl BlockContext<'_> {
                 expr,
                 kind,
                 convert,
-            } => {
-                use crate::ScalarKind as Sk;
-
-                let expr_id = self.cached[expr];
-                let (src_scalar, src_size, is_matrix) =
-                    match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) {
-                        crate::TypeInner::Scalar(scalar) => (scalar, None, false),
-                        crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false),
-                        crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true),
-                        ref other => {
-                            log::error!("As source {:?}", other);
-                            return Err(Error::Validation("Unexpected Expression::As source"));
-                        }
-                    };
-
-                enum Cast {
-                    Identity,
-                    Unary(spirv::Op),
-                    Binary(spirv::Op, Word),
-                    Ternary(spirv::Op, Word, Word),
-                }
-
-                let cast = if is_matrix {
-                    // we only support identity casts for matrices
-                    Cast::Unary(spirv::Op::CopyObject)
-                } else {
-                    match (src_scalar.kind, kind, convert) {
-                        // Filter out identity casts. Some Adreno drivers are
-                        // confused by no-op OpBitCast instructions.
-                        (src_kind, kind, convert)
-                            if src_kind == kind
-                                && convert.filter(|&width| width != src_scalar.width).is_none() =>
-                        {
-                            Cast::Identity
-                        }
-                        (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
-                        (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
-                        // casting to a bool - generate `OpXxxNotEqual`
-                        (_, Sk::Bool, Some(_)) => {
-                            let op = match src_scalar.kind {
-                                Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
-                                Sk::Float => spirv::Op::FUnordNotEqual,
-                                Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, src_scalar)?;
-                            let zero_id = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: src_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    self.writer.get_constant_composite(ty, &self.temp_list)
-                                }
-                                None => zero_scalar_id,
-                            };
-
-                            Cast::Binary(op, zero_id)
-                        }
-                        // casting from a bool - generate `OpSelect`
-                        (Sk::Bool, _, Some(dst_width)) => {
-                            let dst_scalar = crate::Scalar {
-                                kind,
-                                width: dst_width,
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, dst_scalar)?;
-                            let one_scalar_id =
-                                self.writer.get_constant_scalar_with(1, dst_scalar)?;
-                            let (accept_id, reject_id) = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: dst_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    let vec0_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    self.temp_list.fill(one_scalar_id);
-
-                                    let vec1_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    (vec1_id, vec0_id)
-                                }
-                                None => (one_scalar_id, zero_scalar_id),
-                            };
-
-                            Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
-                        }
-                        (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
-                        (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
-                        (Sk::Float, Sk::Float, Some(dst_width))
-                            if src_scalar.width != dst_width =>
-                        {
-                            Cast::Unary(spirv::Op::FConvert)
-                        }
-                        (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
-                        (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
-                        (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        // We assume it's either an identity cast, or int-uint.
-                        _ => Cast::Unary(spirv::Op::Bitcast),
-                    }
-                };
-
-                let id = self.gen_id();
-                let instruction = match cast {
-                    Cast::Identity => None,
-                    Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)),
-                    Cast::Binary(op, operand) => Some(Instruction::binary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        operand,
-                    )),
-                    Cast::Ternary(op, op1, op2) => Some(Instruction::ternary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        op1,
-                        op2,
-                    )),
-                };
-                if let Some(instruction) = instruction {
-                    block.body.push(instruction);
-                    id
-                } else {
-                    expr_id
-                }
-            }
+            } => self.write_as_expression(expr, convert, kind, block, result_type_id)?,
             crate::Expression::ImageLoad {
                 image,
                 coordinate,
@@ -1784,6 +1633,237 @@ impl BlockContext<'_> {
         Ok(())
     }
 
+    /// Helper which focuses on generating the `As` expressions and the various conversions
+    /// that need to happen because of that.
+    fn write_as_expression(
+        &mut self,
+        expr: Handle<crate::Expression>,
+        convert: Option<u8>,
+        kind: crate::ScalarKind,
+
+        block: &mut Block,
+        result_type_id: u32,
+    ) -> Result<u32, Error> {
+        use crate::ScalarKind as Sk;
+        let expr_id = self.cached[expr];
+        let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types);
+
+        // Matrix casts needs special treatment in SPIR-V, as the cast functions
+        // can take vectors or scalars, but not matrices. In order to cast a matrix
+        // we need to cast each column of the matrix individually and construct a new
+        // matrix from the converted columns.
+        if let crate::TypeInner::Matrix {
+            columns,
+            rows,
+            scalar,
+        } = *ty
+        {
+            let Some(convert) = convert else {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            };
+
+            if convert == scalar.width {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            }
+
+            if kind != Sk::Float {
+                // Only float conversions are supported for matrices.
+                return Err(Error::Validation("Matrices must be floats"));
+            }
+
+            // Type of each extracted column
+            let column_src_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar,
+                })));
+
+            // Type of the column after conversion
+            let column_dst_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar: crate::Scalar {
+                        kind,
+                        width: convert,
+                    },
+                })));
+
+            let mut components = ArrayVec::<Word, 4>::new();
+
+            for column in 0..columns as usize {
+                let column_id = self.gen_id();
+                block.body.push(Instruction::composite_extract(
+                    column_src_ty,
+                    column_id,
+                    expr_id,
+                    &[column as u32],
+                ));
+
+                let column_conv_id = self.gen_id();
+                block.body.push(Instruction::unary(
+                    spirv::Op::FConvert,
+                    column_dst_ty,
+                    column_conv_id,
+                    column_id,
+                ));
+
+                components.push(column_conv_id);
+            }
+
+            let construct_id = self.gen_id();
+
+            block.body.push(Instruction::composite_construct(
+                result_type_id,
+                construct_id,
+                &components,
+            ));
+
+            return Ok(construct_id);
+        }
+
+        let (src_scalar, src_size) = match *ty {
+            crate::TypeInner::Scalar(scalar) => (scalar, None),
+            crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)),
+            ref other => {
+                log::error!("As source {:?}", other);
+                return Err(Error::Validation("Unexpected Expression::As source"));
+            }
+        };
+
+        enum Cast {
+            Identity,
+            Unary(spirv::Op),
+            Binary(spirv::Op, Word),
+            Ternary(spirv::Op, Word, Word),
+        }
+        let cast = match (src_scalar.kind, kind, convert) {
+            // Filter out identity casts. Some Adreno drivers are
+            // confused by no-op OpBitCast instructions.
+            (src_kind, kind, convert)
+                if src_kind == kind
+                    && convert.filter(|&width| width != src_scalar.width).is_none() =>
+            {
+                Cast::Identity
+            }
+            (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
+            (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
+            // casting to a bool - generate `OpXxxNotEqual`
+            (_, Sk::Bool, Some(_)) => {
+                let op = match src_scalar.kind {
+                    Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
+                    Sk::Float => spirv::Op::FUnordNotEqual,
+                    Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?;
+                let zero_id = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: src_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        self.writer.get_constant_composite(ty, &self.temp_list)
+                    }
+                    None => zero_scalar_id,
+                };
+
+                Cast::Binary(op, zero_id)
+            }
+            // casting from a bool - generate `OpSelect`
+            (Sk::Bool, _, Some(dst_width)) => {
+                let dst_scalar = crate::Scalar {
+                    kind,
+                    width: dst_width,
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?;
+                let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?;
+                let (accept_id, reject_id) = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: dst_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        self.temp_list.fill(one_scalar_id);
+
+                        let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        (vec1_id, vec0_id)
+                    }
+                    None => (one_scalar_id, zero_scalar_id),
+                };
+
+                Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
+            }
+            (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
+            (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
+            (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::FConvert)
+            }
+            (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
+            (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
+            (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            // We assume it's either an identity cast, or int-uint.
+            _ => Cast::Unary(spirv::Op::Bitcast),
+        };
+        Ok(match cast {
+            Cast::Identity => expr_id,
+            Cast::Unary(op) => {
+                let id = self.gen_id();
+                block
+                    .body
+                    .push(Instruction::unary(op, result_type_id, id, expr_id));
+                id
+            }
+            Cast::Binary(op, operand) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::binary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    operand,
+                ));
+                id
+            }
+            Cast::Ternary(op, op1, op2) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::ternary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    op1,
+                    op2,
+                ));
+                id
+            }
+        })
+    }
+
     /// Build an `OpAccessChain` instruction.
     ///
     /// Emit any needed bounds-checking expressions to `block`.
diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs
index 57bfb4e9f31..bb4b4f98c1e 100644
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -406,6 +406,10 @@ impl super::Instruction {
         instruction
     }
 
+    pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self {
+        Self::constant(result_type_id, id, &[low])
+    }
+
     pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self {
         Self::constant(result_type_id, id, &[value])
     }
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index cc0c227bece..56f9985790d 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -1049,6 +1049,15 @@ impl Writer {
                 if bits == 64 {
                     self.capabilities_used.insert(spirv::Capability::Float64);
                 }
+                if bits == 16 {
+                    self.capabilities_used.insert(spirv::Capability::Float16);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageInputOutput16);
+                }
                 Instruction::type_float(id, bits)
             }
             Sk::Bool => Instruction::type_bool(id),
@@ -1117,6 +1126,19 @@ impl Writer {
                 )?;
                 self.use_extension("SPV_EXT_shader_atomic_float_add");
             }
+            // 16 bit floating-point support requires Float16 capability
+            crate::TypeInner::Matrix {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Vector {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Scalar(crate::Scalar::F16) => {
+                self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?;
+                self.use_extension("SPV_KHR_16bit_storage");
+            }
             _ => {}
         }
         Ok(())
@@ -1410,6 +1432,10 @@ impl Writer {
                 Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32)
             }
             crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
+            crate::Literal::F16(value) => {
+                let low = value.to_bits();
+                Instruction::constant_16bit(type_id, id, low as u32)
+            }
             crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
             crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
             crate::Literal::U64(value) => {
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index a7cd8f95c9b..cbcf1079856 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -1,3 +1,5 @@
+use hashbrown::HashSet;
+
 use super::Error;
 use crate::back::wgsl::polyfill::InversePolyfill;
 use crate::{
@@ -125,6 +127,9 @@ impl<W: Write> Writer<W> {
             }
         }
 
+        // Write all `enable` declarations
+        self.write_enable_declarations(module)?;
+
         // Write all structs
         for (handle, ty) in module.types.iter() {
             if let TypeInner::Struct { ref members, .. } = ty.inner {
@@ -217,6 +222,41 @@ impl<W: Write> Writer<W> {
         Ok(())
     }
 
+    /// Helper method which writes all the `enable` declarations
+    /// needed for a module.
+    fn write_enable_declarations(&mut self, module: &Module) -> BackendResult {
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+        enum WrittenDeclarations {
+            F16,
+        }
+
+        let mut written_declarations = HashSet::new();
+
+        // Write all the `enable` declarations
+        for (_, ty) in module.types.iter() {
+            match ty.inner {
+                TypeInner::Scalar(scalar)
+                | TypeInner::Vector { scalar, .. }
+                | TypeInner::Matrix { scalar, .. } => {
+                    if scalar == crate::Scalar::F16
+                        && !written_declarations.contains(&WrittenDeclarations::F16)
+                    {
+                        writeln!(self.out, "enable f16;")?;
+                        written_declarations.insert(WrittenDeclarations::F16);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        if !written_declarations.is_empty() {
+            // Empty line for readability
+            writeln!(self.out)?;
+        }
+
+        Ok(())
+    }
+
     /// Helper method used to write struct name
     ///
     /// # Notes
@@ -1262,6 +1302,7 @@ impl<W: Write> Writer<W> {
 
         match expressions[expr] {
             Expression::Literal(literal) => match literal {
+                crate::Literal::F16(value) => write!(self.out, "{value}h")?,
                 crate::Literal::F32(value) => write!(self.out, "{value}f")?,
                 crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                 crate::Literal::I32(value) => {
@@ -2026,6 +2067,10 @@ const fn scalar_kind_str(scalar: crate::Scalar) -> &'static str {
             kind: Sk::Float,
             width: 4,
         } => "f32",
+        Scalar {
+            kind: Sk::Float,
+            width: 2,
+        } => "f16",
         Scalar {
             kind: Sk::Sint,
             width: 4,
diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs
index 92962db00d7..201ad5333d2 100644
--- a/naga/src/front/glsl/error.rs
+++ b/naga/src/front/glsl/error.rs
@@ -102,9 +102,15 @@ pub enum ErrorKind {
     /// Unsupported matrix of the form matCx2
     ///
     /// Our IR expects matrices of the form matCx2 to have a stride of 8 however
-    /// matrices in the std140 layout have a stride of at least 16
-    #[error("unsupported matrix of the form matCx2 in std140 block layout")]
-    UnsupportedMatrixTypeInStd140,
+    /// matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 },
+    /// Unsupported matrix of the form f16matCxR
+    ///
+    /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count,
+    /// however matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 },
     /// A variable with the same name already exists in the current scope.
     #[error("Variable already declared: {0}")]
     VariableAlreadyDeclared(String),
diff --git a/naga/src/front/glsl/offset.rs b/naga/src/front/glsl/offset.rs
index 6e8d5ada107..d3d29ca4285 100644
--- a/naga/src/front/glsl/offset.rs
+++ b/naga/src/front/glsl/offset.rs
@@ -120,11 +120,25 @@ pub fn calculate_offset(
             }
 
             // See comment on the error kind
-            if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi {
-                errors.push(Error {
-                    kind: ErrorKind::UnsupportedMatrixTypeInStd140,
-                    meta,
-                });
+            if StructLayout::Std140 == layout {
+                // Do the f16 test first, as it's more specific
+                if scalar == Scalar::F16 {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedF16MatrixInStd140 {
+                            columns: columns as u8,
+                            rows: rows as u8,
+                        },
+                        meta,
+                    });
+                }
+                if rows == crate::VectorSize::Bi {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 {
+                            columns: columns as u8,
+                        },
+                        meta,
+                    });
+                }
             }
 
             (align, align * columns as u32)
diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs
index ad5e188fd95..d4bd5a69fa4 100644
--- a/naga/src/front/glsl/types.rs
+++ b/naga/src/front/glsl/types.rs
@@ -10,6 +10,10 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
             name: None,
             inner: TypeInner::Scalar(Scalar::BOOL),
         }),
+        "float16_t" => Some(Type {
+            name: None,
+            inner: TypeInner::Scalar(Scalar::F16),
+        }),
         "float" => Some(Type {
             name: None,
             inner: TypeInner::Scalar(Scalar::F32),
@@ -40,6 +44,7 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
                     "i" => Scalar::I32,
                     "u" => Scalar::U32,
                     "d" => Scalar::F64,
+                    "f16" => Scalar::F16,
                     _ => return None,
                 })
             }
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index b8087fc8b0f..473011dbf2f 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -36,6 +36,7 @@ mod null;
 use convert::*;
 pub use error::Error;
 use function::*;
+use half::f16;
 
 use crate::{
     arena::{Arena, Handle, UniqueArena},
@@ -80,6 +81,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
     "SPV_KHR_vulkan_memory_model",
     "SPV_KHR_multiview",
     "SPV_EXT_shader_atomic_float_add",
+    "SPV_KHR_16bit_storage",
 ];
 pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];
 
@@ -5598,6 +5600,9 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
             }) => {
                 let low = self.next()?;
                 match width {
+                    // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal
+                    // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word.
+                    2 => crate::Literal::F16(f16::from_bits(low as u16)),
                     4 => crate::Literal::F32(f32::from_bits(low)),
                     8 => {
                         inst.expect(5)?;
diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs
index 7bdbf12d2c7..504c0a1f904 100644
--- a/naga/src/front/wgsl/error.rs
+++ b/naga/src/front/wgsl/error.rs
@@ -146,8 +146,6 @@ pub enum NumberError {
     Invalid,
     #[error("numeric literal not representable by target type")]
     NotRepresentable,
-    #[error("unimplemented f16 type")]
-    UnimplementedF16,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -1008,19 +1006,18 @@ impl<'a> Error<'a> {
                 )],
             },
             Error::EnableExtensionNotEnabled { kind, span } => ParseError {
-                message: format!("`{}` enable-extension is not enabled", kind.to_ident()),
+                message: format!("the `{}` language extension is not enabled", kind.to_ident()),
                 labels: vec![(
                     span,
                     format!(
                         concat!(
-                            "the `{}` enable-extension is needed for this functionality, ",
-                            "but it is not currently enabled"
+                            "the `{0}` language extension is needed for this functionality, ",
+                            "but it is not currently enabled."
                         ),
                         kind.to_ident()
                     )
                     .into(),
                 )],
-                #[allow(irrefutable_let_patterns)]
                 notes: if let EnableExtension::Unimplemented(kind) = kind {
                     vec![format!(
                         concat!(
@@ -1032,7 +1029,12 @@ impl<'a> Error<'a> {
                         kind.tracking_issue_num()
                     )]
                 } else {
-                    vec![]
+                    vec![
+                        format!(
+                            "You can enable this extension by adding `enable {};` at the top of the shader.",
+                            kind.to_ident()
+                        ),
+                    ]
                 },
             },
             Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError {
diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs
index 7cd3ef90f81..14b969a685d 100644
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -1886,6 +1886,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
         let expr: Typed<crate::Expression> = match *expr {
             ast::Expression::Literal(literal) => {
                 let literal = match literal {
+                    ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f),
                     ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
                     ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
                     ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 00c19d877da..1e07ff39e6c 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -1,4 +1,7 @@
 use super::Error;
+use crate::front::wgsl::parse::directive::enable_extension::{
+    EnableExtensions, ImplementedEnableExtension,
+};
 use crate::front::wgsl::Scalar;
 use crate::Span;
 
@@ -112,10 +115,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<crate::StorageFormat
     })
 }
 
-pub fn get_scalar_type(word: &str) -> Option<Scalar> {
+pub fn get_scalar_type(
+    enable_extensions: &EnableExtensions,
+    span: Span,
+    word: &str,
+) -> Result<Option<Scalar>, Error<'static>> {
     use crate::ScalarKind as Sk;
-    match word {
-        // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }),
+    let scalar = match word {
+        "f16" => Some(Scalar {
+            kind: Sk::Float,
+            width: 2,
+        }),
         "f32" => Some(Scalar {
             kind: Sk::Float,
             width: 4,
@@ -145,7 +155,18 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
             width: crate::BOOL_WIDTH,
         }),
         _ => None,
+    };
+
+    if matches!(scalar, Some(Scalar::F16))
+        && !enable_extensions.contains(ImplementedEnableExtension::F16)
+    {
+        return Err(Error::EnableExtensionNotEnabled {
+            span,
+            kind: ImplementedEnableExtension::F16.into(),
+        });
     }
+
+    Ok(scalar)
 }
 
 pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> {
diff --git a/naga/src/front/wgsl/parse/directive/enable_extension.rs b/naga/src/front/wgsl/parse/directive/enable_extension.rs
index 147ec0b5e04..3efa3b52835 100644
--- a/naga/src/front/wgsl/parse/directive/enable_extension.rs
+++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs
@@ -5,24 +5,29 @@ use crate::{front::wgsl::error::Error, Span};
 
 /// Tracks the status of every enable-extension known to Naga.
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub struct EnableExtensions {}
+pub struct EnableExtensions {
+    /// Whether `enable f16;` was written earlier in the shader module.
+    f16: bool,
+}
 
 impl EnableExtensions {
     pub(crate) const fn empty() -> Self {
-        Self {}
+        Self { f16: false }
     }
 
     /// Add an enable-extension to the set requested by a module.
-    #[allow(unreachable_code)]
     pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) {
-        let _field: &mut bool = match ext {};
-        *_field = true;
+        let field = match ext {
+            ImplementedEnableExtension::F16 => &mut self.f16,
+        };
+        *field = true;
     }
 
     /// Query whether an enable-extension tracked here has been requested.
-    #[allow(unused)]
     pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool {
-        match ext {}
+        match ext {
+            ImplementedEnableExtension::F16 => self.f16,
+        }
     }
 }
 
@@ -37,7 +42,6 @@ impl Default for EnableExtensions {
 /// WGSL spec.: <https://www.w3.org/TR/WGSL/#enable-extensions-sec>
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
 pub enum EnableExtension {
-    #[allow(unused)]
     Implemented(ImplementedEnableExtension),
     Unimplemented(UnimplementedEnableExtension),
 }
@@ -50,7 +54,7 @@ impl EnableExtension {
     /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible.
     pub(crate) fn from_ident(word: &str, span: Span) -> Result<Self, Error<'_>> {
         Ok(match word {
-            Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16),
+            Self::F16 => Self::Implemented(ImplementedEnableExtension::F16),
             Self::CLIP_DISTANCES => {
                 Self::Unimplemented(UnimplementedEnableExtension::ClipDistances)
             }
@@ -64,9 +68,10 @@ impl EnableExtension {
     /// Maps this [`EnableExtension`] into the sentinel word associated with it in WGSL.
     pub const fn to_ident(self) -> &'static str {
         match self {
-            Self::Implemented(kind) => match kind {},
+            Self::Implemented(kind) => match kind {
+                ImplementedEnableExtension::F16 => Self::F16,
+            },
             Self::Unimplemented(kind) => match kind {
-                UnimplementedEnableExtension::F16 => Self::F16,
                 UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES,
                 UnimplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING,
             },
@@ -76,17 +81,24 @@ impl EnableExtension {
 
 /// A variant of [`EnableExtension::Implemented`].
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub enum ImplementedEnableExtension {}
-
-/// A variant of [`EnableExtension::Unimplemented`].
-#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub enum UnimplementedEnableExtension {
+pub enum ImplementedEnableExtension {
     /// Enables `f16`/`half` primitive support in all shader languages.
     ///
     /// In the WGSL standard, this corresponds to [`enable f16;`].
     ///
     /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16
     F16,
+}
+
+impl From<ImplementedEnableExtension> for EnableExtension {
+    fn from(value: ImplementedEnableExtension) -> Self {
+        Self::Implemented(value)
+    }
+}
+
+/// A variant of [`EnableExtension::Unimplemented`].
+#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
+pub enum UnimplementedEnableExtension {
     /// Enables the `clip_distances` variable in WGSL.
     ///
     /// In the WGSL standard, this corresponds to [`enable clip_distances;`].
@@ -104,7 +116,6 @@ pub enum UnimplementedEnableExtension {
 impl UnimplementedEnableExtension {
     pub(crate) const fn tracking_issue_num(self) -> u16 {
         match self {
-            Self::F16 => 4384,
             Self::ClipDistances => 6236,
             Self::DualSourceBlending => 6402,
         }
diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs
index d55720972e9..8d64f52ac6a 100644
--- a/naga/src/front/wgsl/parse/lexer.rs
+++ b/naga/src/front/wgsl/parse/lexer.rs
@@ -408,14 +408,17 @@ impl<'a> Lexer<'a> {
     /// Parses a generic scalar type, for example `<f32>`.
     pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<Scalar, Error<'a>> {
         self.expect_generic_paren('<')?;
-        let pair = match self.next() {
+        let (scalar, _span) = match self.next() {
             (Token::Word(word), span) => {
-                conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
             }
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+            (_, span) => return Err(Error::UnknownScalarType(span)),
+        };
+
         self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok(scalar)
     }
 
     /// Parses a generic scalar type, for example `<f32>`.
@@ -425,14 +428,18 @@ impl<'a> Lexer<'a> {
         &mut self,
     ) -> Result<(Scalar, Span), Error<'a>> {
         self.expect_generic_paren('<')?;
-        let pair = match self.next() {
-            (Token::Word(word), span) => conv::get_scalar_type(word)
-                .map(|scalar| (scalar, span))
-                .ok_or(Error::UnknownScalarType(span)),
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+
+        let (scalar, span) = match self.next() {
+            (Token::Word(word), span) => {
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
+            }
+            (_, span) => return Err(Error::UnknownScalarType(span)),
+        };
+
         self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok((scalar, span))
     }
 
     pub(in crate::front::wgsl) fn next_storage_access(
@@ -493,6 +500,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) {
 
 #[test]
 fn test_numbers() {
+    use half::f16;
     // WGSL spec examples //
 
     // decimal integer
@@ -517,14 +525,16 @@ fn test_numbers() {
             Token::Number(Ok(Number::AbstractFloat(0.01))),
             Token::Number(Ok(Number::AbstractFloat(12.34))),
             Token::Number(Ok(Number::F32(0.))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
             Token::Number(Ok(Number::AbstractFloat(0.001))),
             Token::Number(Ok(Number::AbstractFloat(43.75))),
             Token::Number(Ok(Number::F32(16.))),
             Token::Number(Ok(Number::AbstractFloat(0.1875))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
             Token::Number(Ok(Number::AbstractFloat(0.12109375))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
         ],
     );
 
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index 23b2984e75a..f4115014b8b 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -3,9 +3,7 @@ use crate::diagnostic_filter::{
     ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule,
 };
 use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken};
-use crate::front::wgsl::parse::directive::enable_extension::{
-    EnableExtension, EnableExtensions, UnimplementedEnableExtension,
-};
+use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions};
 use crate::front::wgsl::parse::directive::language_extension::LanguageExtension;
 use crate::front::wgsl::parse::directive::DirectiveKind;
 use crate::front::wgsl::parse::lexer::{Lexer, Token};
@@ -346,7 +344,7 @@ impl Parser {
         span: Span,
         ctx: &mut ExpressionContext<'a, '_, '_>,
     ) -> Result<Option<ast::ConstructorType<'a>>, Error<'a>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
             return Ok(Some(ast::ConstructorType::Scalar(scalar)));
         }
 
@@ -375,6 +373,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec2h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "vec3" => ast::ConstructorType::PartialVector {
                 size: crate::VectorSize::Tri,
             },
@@ -399,6 +404,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec3h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "vec4" => ast::ConstructorType::PartialVector {
                 size: crate::VectorSize::Quad,
             },
@@ -423,6 +435,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec4h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Bi,
@@ -435,6 +454,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Tri,
@@ -447,6 +474,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Quad,
@@ -459,6 +494,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Bi,
@@ -471,6 +514,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Tri,
@@ -483,6 +534,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Quad,
@@ -495,6 +554,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Bi,
@@ -507,6 +574,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Tri,
@@ -519,6 +594,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Quad,
@@ -531,6 +614,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "array" => ast::ConstructorType::PartialArray,
             "atomic"
             | "binding_array"
@@ -725,15 +816,17 @@ impl Parser {
             }
             (Token::Number(res), span) => {
                 let _ = lexer.next();
-                let num = res.map_err(|err| match err {
-                    super::error::NumberError::UnimplementedF16 => {
-                        Error::EnableExtensionNotEnabled {
-                            kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16),
+                let num = res.map_err(|err| Error::BadNumber(span, err))?;
+
+                if let Some(enable_extension) = num.required_enable_extension() {
+                    if !lexer.enable_extensions.contains(enable_extension) {
+                        return Err(Error::EnableExtensionNotEnabled {
+                            kind: enable_extension.into(),
                             span,
-                        }
+                        });
                     }
-                    err => Error::BadNumber(span, err),
-                })?;
+                }
+
                 ast::Expression::Literal(ast::Literal::Number(num))
             }
             (Token::Word("RAY_FLAG_NONE"), _) => {
@@ -1268,9 +1361,10 @@ impl Parser {
         &mut self,
         lexer: &mut Lexer<'a>,
         word: &'a str,
+        span: Span,
         ctx: &mut ExpressionContext<'a, '_, '_>,
     ) -> Result<Option<ast::Type<'a>>, Error<'a>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
             return Ok(Some(ast::Type::Scalar(scalar)));
         }
 
@@ -1298,6 +1392,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec2h" => ast::Type::Vector {
+                size: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "vec3" => {
                 let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                 ast::Type::Vector {
@@ -1321,6 +1420,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec3h" => ast::Type::Vector {
+                size: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "vec4" => {
                 let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                 ast::Type::Vector {
@@ -1344,6 +1448,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec4h" => ast::Type::Vector {
+                size: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)?
             }
@@ -1353,6 +1462,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)?
             }
@@ -1362,6 +1477,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)?
             }
@@ -1371,6 +1492,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)?
             }
@@ -1380,6 +1507,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)?
             }
@@ -1389,6 +1522,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)?
             }
@@ -1398,6 +1537,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)?
             }
@@ -1407,6 +1552,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)?
             }
@@ -1416,6 +1567,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)?
             }
@@ -1425,6 +1582,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "atomic" => {
                 let scalar = lexer.next_scalar_generic()?;
                 ast::Type::Atomic(scalar)
@@ -1681,7 +1844,7 @@ impl Parser {
 
             let (name, span) = lexer.next_ident_with_span()?;
 
-            let ty = match this.type_decl_impl(lexer, name, ctx)? {
+            let ty = match this.type_decl_impl(lexer, name, span, ctx)? {
                 Some(ty) => ty,
                 None => {
                     ctx.unresolved.insert(ast::Dependency {
diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs
index 72795de6b42..87dfb0a02cb 100644
--- a/naga/src/front/wgsl/parse/number.rs
+++ b/naga/src/front/wgsl/parse/number.rs
@@ -1,5 +1,7 @@
 use crate::front::wgsl::error::NumberError;
+use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension;
 use crate::front::wgsl::parse::lexer::Token;
+use half::f16;
 
 /// When using this type assume no Abstract Int/Float for now
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -16,12 +18,23 @@ pub enum Number {
     I64(i64),
     /// Concrete u64
     U64(u64),
+    /// Concrete f16
+    F16(f16),
     /// Concrete f32
     F32(f32),
     /// Concrete f64
     F64(f64),
 }
 
+impl Number {
+    pub(super) const fn required_enable_extension(&self) -> Option<ImplementedEnableExtension> {
+        match *self {
+            Number::F16(_) => Some(ImplementedEnableExtension::F16),
+            _ => None,
+        }
+    }
+}
+
 pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) {
     let (result, rest) = parse(input);
     (Token::Number(result), rest)
@@ -367,7 +380,8 @@ fn parse_hex_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
             // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
             _ => Err(NumberError::NotRepresentable),
         },
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        // TODO: f16 is not supported by hexf_parse
+        Some(FloatKind::F16) => Err(NumberError::NotRepresentable),
         Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) {
             Ok(num) => Ok(Number::F32(num)),
             // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
@@ -403,7 +417,12 @@ fn parse_dec_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
                 .then_some(Number::F64(num))
                 .ok_or(NumberError::NotRepresentable)
         }
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        Some(FloatKind::F16) => {
+            let num = input.parse::<f16>().unwrap(); // will never fail
+            num.is_finite()
+                .then_some(Number::F16(num))
+                .ok_or(NumberError::NotRepresentable)
+        }
     }
 }
 
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 2e917d34e04..d78bdfb7afb 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -271,6 +271,7 @@ pub use crate::span::{SourceLocation, Span, SpanContext, WithSpan};
 #[cfg(feature = "arbitrary")]
 use arbitrary::Arbitrary;
 use diagnostic_filter::DiagnosticFilterNode;
+use half::f16;
 #[cfg(feature = "deserialize")]
 use serde::Deserialize;
 #[cfg(feature = "serialize")]
@@ -895,6 +896,7 @@ pub enum Literal {
     F64(f64),
     /// May not be NaN or infinity.
     F32(f32),
+    F16(f16),
     U32(u32),
     I32(i32),
     U64(u64),
diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index be7c503a017..ed713bf9210 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -1,6 +1,8 @@
 use std::iter;
 
 use arrayvec::ArrayVec;
+use half::f16;
+use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero};
 
 use crate::{
     arena::{Arena, Handle, HandleVec, UniqueArena},
@@ -199,6 +201,7 @@ gen_component_wise_extractor! {
     literals: [
         AbstractFloat => AbstractFloat: f64,
         F32 => F32: f32,
+        F16 => F16: f16,
         AbstractInt => AbstractInt: i64,
         U32 => U32: u32,
         I32 => I32: i32,
@@ -219,6 +222,7 @@ gen_component_wise_extractor! {
     literals: [
         AbstractFloat => Abstract: f64,
         F32 => F32: f32,
+        F16 => F16: f16,
     ],
     scalar_kinds: [
         Float,
@@ -244,6 +248,7 @@ gen_component_wise_extractor! {
         AbstractFloat => AbstractFloat: f64,
         AbstractInt => AbstractInt: i64,
         F32 => F32: f32,
+        F16 => F16: f16,
         I32 => I32: i32,
     ],
     scalar_kinds: [
@@ -1103,6 +1108,7 @@ impl<'a> ConstantEvaluator<'a> {
                 component_wise_scalar(self, span, [arg], |args| match args {
                     Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])),
                     Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])),
+                    Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])),
                     Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
                     Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
                     Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
@@ -1134,9 +1140,13 @@ impl<'a> ConstantEvaluator<'a> {
                     }
                 )
             }
-            crate::MathFunction::Saturate => {
-                component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) })
-            }
+            crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e {
+                Float::F16([e]) => Ok(Float::F16(
+                    [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))],
+                )),
+                Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])),
+                Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])),
+            }),
 
             // trigonometry
             crate::MathFunction::Cos => {
@@ -1213,6 +1223,9 @@ impl<'a> ConstantEvaluator<'a> {
                 component_wise_float(self, span, [arg], |e| match e {
                     Float::Abstract([e]) => Ok(Float::Abstract([round_ties_even(e)])),
                     Float::F32([e]) => Ok(Float::F32([(round_ties_even(e as f64) as f32)])),
+                    Float::F16([e]) => {
+                        Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))]))
+                    }
                 })
             }
             crate::MathFunction::Fract => {
@@ -1258,15 +1271,27 @@ impl<'a> ConstantEvaluator<'a> {
                 )
             }
             crate::MathFunction::Step => {
-                component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| {
-                    Ok([if edge <= x { 1.0 } else { 0.0 }])
+                component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x {
+                    Float::Abstract([edge, x]) => {
+                        Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }]))
+                    }
+                    Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])),
+                    Float::F16([edge, x]) => Ok(Float::F16([if edge <= x {
+                        f16::one()
+                    } else {
+                        f16::zero()
+                    }])),
                 })
             }
             crate::MathFunction::Sqrt => {
                 component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) })
             }
             crate::MathFunction::InverseSqrt => {
-                component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) })
+                component_wise_float(self, span, [arg], |e| match e {
+                    Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])),
+                    Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])),
+                    Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])),
+                })
             }
 
             // bits
@@ -1561,6 +1586,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v,
                         Literal::U32(v) => v as i32,
                         Literal::F32(v) => v as i32,
+                        Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf
                         Literal::Bool(v) => v as i32,
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
@@ -1572,6 +1598,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v as u32,
                         Literal::U32(v) => v,
                         Literal::F32(v) => v as u32,
+                        Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf
                         Literal::Bool(v) => v as u32,
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
@@ -1587,6 +1614,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(v) => v as i64,
                         Literal::I64(v) => v,
                         Literal::U64(v) => v as i64,
+                        Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf
                         Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
                     }),
@@ -1598,9 +1626,22 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(v) => v as u64,
                         Literal::I64(v) => v as u64,
                         Literal::U64(v) => v,
+                        Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf
                         Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
                     }),
+                    Sc::F16 => Literal::F16(match literal {
+                        Literal::F16(v) => v,
+                        Literal::F32(v) => f16::from_f32(v),
+                        Literal::F64(v) => f16::from_f64(v),
+                        Literal::Bool(v) => f16::from_u32(v as u32).unwrap(),
+                        Literal::I64(v) => f16::from_i64(v).unwrap(),
+                        Literal::U64(v) => f16::from_u64(v).unwrap(),
+                        Literal::I32(v) => f16::from_i32(v).unwrap(),
+                        Literal::U32(v) => f16::from_u32(v).unwrap(),
+                        Literal::AbstractFloat(v) => f16::try_from_abstract(v)?,
+                        Literal::AbstractInt(v) => f16::try_from_abstract(v)?,
+                    }),
                     Sc::F32 => Literal::F32(match literal {
                         Literal::I32(v) => v as f32,
                         Literal::U32(v) => v as f32,
@@ -1609,12 +1650,14 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
                         }
+                        Literal::F16(v) => f16::to_f32(v),
                         Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => f32::try_from_abstract(v)?,
                     }),
                     Sc::F64 => Literal::F64(match literal {
                         Literal::I32(v) => v as f64,
                         Literal::U32(v) => v as f64,
+                        Literal::F16(v) => f16::to_f64(v),
                         Literal::F32(v) => v as f64,
                         Literal::F64(v) => v,
                         Literal::Bool(v) => v as u32 as f64,
@@ -1626,6 +1669,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v != 0,
                         Literal::U32(v) => v != 0,
                         Literal::F32(v) => v != 0.0,
+                        Literal::F16(v) => v != f16::zero(),
                         Literal::Bool(v) => v,
                         Literal::F64(_)
                         | Literal::I64(_)
@@ -1780,6 +1824,7 @@ impl<'a> ConstantEvaluator<'a> {
                     Literal::I32(v) => Literal::I32(v.wrapping_neg()),
                     Literal::I64(v) => Literal::I64(v.wrapping_neg()),
                     Literal::F32(v) => Literal::F32(-v),
+                    Literal::F16(v) => Literal::F16(-v),
                     Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()),
                     Literal::AbstractFloat(v) => Literal::AbstractFloat(-v),
                     _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg),
@@ -1928,6 +1973,14 @@ impl<'a> ConstantEvaluator<'a> {
                                 _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
                             })
                         }
+                        (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op {
+                            BinaryOperator::Add => a + b,
+                            BinaryOperator::Subtract => a - b,
+                            BinaryOperator::Multiply => a * b,
+                            BinaryOperator::Divide => a / b,
+                            BinaryOperator::Modulo => a % b,
+                            _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
+                        }),
                         (Literal::AbstractInt(a), Literal::AbstractInt(b)) => {
                             Literal::AbstractInt(match op {
                                 BinaryOperator::Add => a.checked_add(b).ok_or_else(|| {
@@ -2532,6 +2585,32 @@ impl TryFromAbstract<f64> for u64 {
     }
 }
 
+impl TryFromAbstract<f64> for f16 {
+    fn try_from_abstract(value: f64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_f64(value);
+        if f.is_infinite() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f)
+    }
+}
+
+impl TryFromAbstract<i64> for f16 {
+    fn try_from_abstract(value: i64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_i64(value);
+        if f.is_none() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f.unwrap())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::vec;
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index a6a19f70eda..ef095cff399 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -79,6 +79,7 @@ impl From<super::StorageFormat> for super::Scalar {
 pub enum HashableLiteral {
     F64(u64),
     F32(u32),
+    F16(u16),
     U32(u32),
     I32(i32),
     U64(u64),
@@ -93,6 +94,7 @@ impl From<crate::Literal> for HashableLiteral {
         match l {
             crate::Literal::F64(v) => Self::F64(v.to_bits()),
             crate::Literal::F32(v) => Self::F32(v.to_bits()),
+            crate::Literal::F16(v) => Self::F16(v.to_bits()),
             crate::Literal::U32(v) => Self::U32(v),
             crate::Literal::I32(v) => Self::I32(v),
             crate::Literal::U64(v) => Self::U64(v),
@@ -131,6 +133,7 @@ impl crate::Literal {
         match *self {
             Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
             Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
+            Self::F16(_) => 2,
             Self::Bool(_) => crate::BOOL_WIDTH,
             Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
         }
@@ -139,6 +142,7 @@ impl crate::Literal {
         match *self {
             Self::F64(_) => crate::Scalar::F64,
             Self::F32(_) => crate::Scalar::F32,
+            Self::F16(_) => crate::Scalar::F16,
             Self::U32(_) => crate::Scalar::U32,
             Self::I32(_) => crate::Scalar::I32,
             Self::U64(_) => crate::Scalar::U64,
diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs
index 351f4c53686..9b44f05db54 100644
--- a/naga/src/proc/type_methods.rs
+++ b/naga/src/proc/type_methods.rs
@@ -20,6 +20,10 @@ impl crate::ScalarKind {
 }
 
 impl crate::Scalar {
+    pub const F16: Self = Self {
+        kind: crate::ScalarKind::Float,
+        width: 2,
+    };
     pub const I32: Self = Self {
         kind: crate::ScalarKind::Sint,
         width: 4,
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index b0c54a3df4a..15e01b661ce 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1767,7 +1767,7 @@ impl super::Validator {
     }
 
     pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> {
-        self.check_width(literal.scalar())?;
+        let _ = self.check_width(literal.scalar())?;
         check_literal_value(literal)?;
 
         Ok(())
diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs
index a6fff61d826..3ae16b232bb 100644
--- a/naga/src/valid/interface.rs
+++ b/naga/src/valid/interface.rs
@@ -1,6 +1,6 @@
 use super::{
     analyzer::{FunctionInfo, GlobalUse},
-    Capabilities, Disalignment, FunctionError, ModuleInfo,
+    Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError,
 };
 use crate::arena::{Handle, UniqueArena};
 
@@ -39,6 +39,8 @@ pub enum GlobalVariableError {
     InitializerNotAllowed(crate::AddressSpace),
     #[error("Storage address space doesn't support write-only access")]
     StorageAddressSpaceWriteOnlyNotSupported,
+    #[error("Type is not valid for use as a push constant")]
+    InvalidPushConstantType(#[source] PushConstantError),
 }
 
 #[derive(Clone, Debug, thiserror::Error)]
@@ -569,6 +571,9 @@ impl super::Validator {
                         Capabilities::PUSH_CONSTANT,
                     ));
                 }
+                if let Err(ref err) = type_info.push_constant_compatibility {
+                    return Err(GlobalVariableError::InvalidPushConstantType(err.clone()));
+                }
                 (
                     TypeFlags::DATA
                         | TypeFlags::COPY
diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs
index 8560404703a..3ffed6a8e42 100644
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -28,7 +28,7 @@ pub use expression::{check_literal_value, LiteralError};
 pub use expression::{ConstExpressionError, ExpressionError};
 pub use function::{CallError, FunctionError, LocalVariableError};
 pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
-pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};
+pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError};
 
 use self::handles::InvalidHandleError;
 
@@ -158,6 +158,8 @@ bitflags::bitflags! {
         const TEXTURE_ATOMIC = 1 << 23;
         /// Support for atomic operations on 64-bit images.
         const TEXTURE_INT64_ATOMIC = 1 << 24;
+        /// Support for 16-bit floating-point types.
+        const SHADER_FLOAT16 = 1 << 25;
     }
 }
 
diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs
index 8c6825b842e..837e00e2399 100644
--- a/naga/src/valid/type.rs
+++ b/naga/src/valid/type.rs
@@ -167,8 +167,16 @@ pub enum WidthError {
     Abstract,
 }
 
+#[derive(Clone, Debug, thiserror::Error)]
+#[cfg_attr(test, derive(PartialEq))]
+pub enum PushConstantError {
+    #[error("The scalar type {0:?} is not supported in push constants")]
+    InvalidScalar(crate::Scalar),
+}
+
 // Only makes sense if `flags.contains(HOST_SHAREABLE)`
 type LayoutCompatibility = Result<Alignment, (Handle<crate::Type>, Disalignment)>;
+type PushConstantCompatibility = Result<(), PushConstantError>;
 
 fn check_member_layout(
     accum: &mut LayoutCompatibility,
@@ -219,6 +227,7 @@ pub(super) struct TypeInfo {
     pub flags: TypeFlags,
     pub uniform_layout: LayoutCompatibility,
     pub storage_layout: LayoutCompatibility,
+    pub push_constant_compatibility: PushConstantCompatibility,
 }
 
 impl TypeInfo {
@@ -227,6 +236,7 @@ impl TypeInfo {
             flags: TypeFlags::empty(),
             uniform_layout: Ok(Alignment::ONE),
             storage_layout: Ok(Alignment::ONE),
+            push_constant_compatibility: Ok(()),
         }
     }
 
@@ -235,6 +245,7 @@ impl TypeInfo {
             flags,
             uniform_layout: Ok(alignment),
             storage_layout: Ok(alignment),
+            push_constant_compatibility: Ok(()),
         }
     }
 }
@@ -248,11 +259,15 @@ impl super::Validator {
         }
     }
 
-    pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> {
+    pub(super) const fn check_width(
+        &self,
+        scalar: crate::Scalar,
+    ) -> Result<PushConstantCompatibility, WidthError> {
+        let mut push_constant_compatibility = Ok(());
         let good = match scalar.kind {
             crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH,
-            crate::ScalarKind::Float => {
-                if scalar.width == 8 {
+            crate::ScalarKind::Float => match scalar.width {
+                8 => {
                     if !self.capabilities.contains(Capabilities::FLOAT64) {
                         return Err(WidthError::MissingCapability {
                             name: "f64",
@@ -260,10 +275,21 @@ impl super::Validator {
                         });
                     }
                     true
-                } else {
-                    scalar.width == 4
                 }
-            }
+                2 => {
+                    if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) {
+                        return Err(WidthError::MissingCapability {
+                            name: "f16",
+                            flag: "FLOAT16",
+                        });
+                    }
+
+                    push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar));
+
+                    true
+                }
+                _ => scalar.width == 4,
+            },
             crate::ScalarKind::Sint => {
                 if scalar.width == 8 {
                     if !self.capabilities.contains(Capabilities::SHADER_INT64) {
@@ -295,7 +321,7 @@ impl super::Validator {
             }
         };
         if good {
-            Ok(())
+            Ok(push_constant_compatibility)
         } else {
             Err(WidthError::Invalid(scalar.kind, scalar.width))
         }
@@ -315,13 +341,13 @@ impl super::Validator {
         use crate::TypeInner as Ti;
         Ok(match gctx.types[handle].inner {
             Ti::Scalar(scalar) => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                 let shareable = if scalar.kind.is_numeric() {
                     TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                 } else {
                     TypeFlags::empty()
                 };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -330,16 +356,18 @@ impl super::Validator {
                         | TypeFlags::CREATION_RESOLVED
                         | shareable,
                     Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Vector { size, scalar } => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                 let shareable = if scalar.kind.is_numeric() {
                     TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                 } else {
                     TypeFlags::empty()
                 };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -348,7 +376,9 @@ impl super::Validator {
                         | TypeFlags::CREATION_RESOLVED
                         | shareable,
                     Alignment::from(size) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Matrix {
                 columns: _,
@@ -358,8 +388,8 @@ impl super::Validator {
                 if scalar.kind != crate::ScalarKind::Float {
                     return Err(TypeError::MatrixElementNotFloat);
                 }
-                self.check_width(scalar)?;
-                TypeInfo::new(
+                let push_constant_compatibility = self.check_width(scalar)?;
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -368,7 +398,9 @@ impl super::Validator {
                         | TypeFlags::CONSTRUCTIBLE
                         | TypeFlags::CREATION_RESOLVED,
                     Alignment::from(rows) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Atomic(scalar) => {
                 match scalar {
@@ -463,7 +495,7 @@ impl super::Validator {
                 // However, some cases are trivial: All our implicit base types
                 // are DATA and SIZED, so we can never return
                 // `InvalidPointerBase` or `InvalidPointerToUnsized`.
-                self.check_width(scalar)?;
+                let _ = self.check_width(scalar)?;
 
                 // `Validator::validate_function` actually checks the address
                 // space of pointer arguments explicitly before checking the
@@ -549,6 +581,7 @@ impl super::Validator {
                     flags: base_info.flags & type_info_mask,
                     uniform_layout,
                     storage_layout,
+                    push_constant_compatibility: base_info.push_constant_compatibility.clone(),
                 }
             }
             Ti::Struct { ref members, span } => {
@@ -629,6 +662,10 @@ impl super::Validator {
                         base_info.storage_layout,
                         handle,
                     );
+                    if base_info.push_constant_compatibility.is_err() {
+                        ti.push_constant_compatibility =
+                            base_info.push_constant_compatibility.clone();
+                    }
 
                     // Validate rule: If a structure member itself has a structure type S,
                     // then the number of bytes between the start of that member and
diff --git a/naga/tests/example_wgsl.rs b/naga/tests/example_wgsl.rs
index 115d8769a7f..cc6a942a096 100644
--- a/naga/tests/example_wgsl.rs
+++ b/naga/tests/example_wgsl.rs
@@ -1,55 +1,51 @@
 #![cfg(feature = "wgsl-in")]
 
 use naga::{front::wgsl, valid::Validator};
-use std::{fs, path::PathBuf};
+use std::{ffi::OsStr, fs, path::Path};
 
 /// Runs through all example shaders and ensures they are valid wgsl.
 #[test]
 pub fn parse_example_wgsl() {
-    let read_dir = match PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-        .join("examples")
-        .read_dir()
-    {
-        Ok(iter) => iter,
-        Err(e) => {
-            log::error!("Unable to open the examples folder: {:?}", e);
-            return;
-        }
-    };
-    for example_entry in read_dir {
-        let read_files = match example_entry {
-            Ok(dir_entry) => match dir_entry.path().read_dir() {
-                Ok(iter) => iter,
-                Err(_) => continue,
-            },
-            Err(e) => {
-                log::warn!("Skipping example: {:?}", e);
-                continue;
-            }
+    let example_path = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .unwrap()
+        .join("examples");
+
+    println!("Looking for examples in {}", example_path.display());
+
+    let mut example_paths = Vec::new();
+    for example_entry in walkdir::WalkDir::new(example_path) {
+        let Ok(dir_entry) = example_entry else {
+            continue;
         };
-        for file_entry in read_files {
-            let shader = match file_entry {
-                Ok(entry) => match entry.path().extension() {
-                    Some(ostr) if ostr == "wgsl" => {
-                        println!("Validating {:?}", entry.path());
-                        fs::read_to_string(entry.path()).unwrap_or_default()
-                    }
-                    _ => continue,
-                },
-                Err(e) => {
-                    log::warn!("Skipping file: {:?}", e);
-                    continue;
-                }
-            };
-
-            let module = wgsl::parse_str(&shader).unwrap();
-            //TODO: re-use the validator
-            Validator::new(
-                naga::valid::ValidationFlags::all(),
-                naga::valid::Capabilities::all(),
-            )
-            .validate(&module)
-            .unwrap();
+
+        if !dir_entry.file_type().is_file() {
+            continue;
+        }
+
+        let path = dir_entry.path();
+
+        if path.extension().map(OsStr::to_string_lossy).as_deref() == Some("wgsl") {
+            example_paths.push(path.to_path_buf());
         }
     }
+
+    assert_ne!(example_paths.len(), 0, "No examples found!");
+
+    println!("Found {} examples", example_paths.len());
+
+    for example_path in example_paths {
+        println!("\tParsing {}", example_path.display());
+
+        let shader = fs::read_to_string(&example_path).unwrap();
+
+        let module = wgsl::parse_str(&shader).unwrap();
+        //TODO: re-use the validator
+        Validator::new(
+            naga::valid::ValidationFlags::all(),
+            naga::valid::Capabilities::all(),
+        )
+        .validate(&module)
+        .unwrap();
+    }
 }
diff --git a/naga/tests/in/extra.wgsl b/naga/tests/in/extra.wgsl
index ef68f4aa80e..1a7ab91ff56 100644
--- a/naga/tests/in/extra.wgsl
+++ b/naga/tests/in/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
     index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 var<push_constant> pc: PushConstants;
 
diff --git a/naga/tests/in/f16.param.ron b/naga/tests/in/f16.param.ron
new file mode 100644
index 00000000000..cf105af810b
--- /dev/null
+++ b/naga/tests/in/f16.param.ron
@@ -0,0 +1,22 @@
+(
+	god_mode: true,
+	spv: (
+		version: (1, 0),
+	),
+	hlsl: (
+		shader_model: V6_2,
+		binding_map: {},
+		fake_missing_bindings: true,
+		special_constants_binding: Some((space: 1, register: 0)),
+		push_constants_target: Some((space: 0, register: 0)),
+		zero_initialize_workgroup_memory: true,
+	),
+	msl: (
+		lang_version: (1, 0),
+		per_entry_point_map: {},
+		inline_samplers: [],
+		spirv_cross_compatibility: false,
+		fake_missing_bindings: true,
+		zero_initialize_workgroup_memory: true,
+	),
+)
diff --git a/naga/tests/in/f16.wgsl b/naga/tests/in/f16.wgsl
new file mode 100644
index 00000000000..dbef9aedfc9
--- /dev/null
+++ b/naga/tests/in/f16.wgsl
@@ -0,0 +1,127 @@
+enable f16;
+
+var<private> private_variable: f16 = 1h;
+const constant_variable: f16 = f16(15.2);
+
+struct UniformCompatible {
+   // Other types
+   val_u32: u32,
+   val_i32: i32,
+   val_f32: f32,
+
+   // f16
+   val_f16: f16,
+   val_f16_2: vec2<f16>,
+   val_f16_3: vec3<f16>,
+   val_f16_4: vec4<f16>,
+   final_value: f16,
+
+   val_mat2x2: mat2x2<f16>,
+   val_mat2x3: mat2x3<f16>,
+   val_mat2x4: mat2x4<f16>,
+   val_mat3x2: mat3x2<f16>,
+   val_mat3x3: mat3x3<f16>,
+   val_mat3x4: mat3x4<f16>,
+   val_mat4x2: mat4x2<f16>,
+   val_mat4x3: mat4x3<f16>,
+   val_mat4x4: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+   val_f16_array_2: array<f16, 2>,
+}
+
+struct LayoutTest {
+   scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32
+}
+
+@group(0) @binding(0)
+var<uniform> input_uniform: UniformCompatible;
+
+@group(0) @binding(1)
+var<storage> input_storage: UniformCompatible;
+
+@group(0) @binding(2)
+var<storage> input_arrays: StorageCompatible;
+
+@group(0) @binding(3)
+var<storage, read_write> output: UniformCompatible;
+
+@group(0) @binding(4)
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+   var val: f16 = f16(constant_variable);
+   // A number too big for f16
+   val += 1h - 33333h;
+   // Constructing an f16 from an AbstractInt
+   val += val + f16(5.);
+   // Constructing a f16 from other types and other types from f16.
+   val += f16(input_uniform.val_f32 + f32(val));
+   // Constructing a vec3<i64> from a i64
+   val += vec3<f16>(input_uniform.val_f16).z;
+
+   // Reading/writing to a uniform/storage buffer
+   output.val_f16 = input_uniform.val_f16 + input_storage.val_f16;
+   output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2;
+   output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3;
+   output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4;
+
+   output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2;
+   output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3;
+   output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4;
+   output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2;
+   output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3;
+   output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4;
+   output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2;
+   output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3;
+   output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4;
+
+   output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2;
+
+   // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
+   // constants based on the size of the type, and which are arguments.
+
+   // Numeric functions
+   val += abs(val);
+   val += clamp(val, val, val);
+   val += dot(vec2(val), vec2(val));
+   val += max(val, val);
+   val += min(val, val);
+   val += sign(val);
+   
+   val += f16(1.0);
+
+   // We use the shorthand aliases here to ensure the aliases
+   // work correctly.
+
+   // Cast vectors to/from f32
+   let float_vec2 = vec2f(input_uniform.val_f16_2);
+   output.val_f16_2 = vec2h(float_vec2);
+
+   let float_vec3 = vec3f(input_uniform.val_f16_3);
+   output.val_f16_3 = vec3h(float_vec3);
+
+   let float_vec4 = vec4f(input_uniform.val_f16_4);
+   output.val_f16_4 = vec4h(float_vec4);
+
+   // Cast matrices to/from f32
+   output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2));
+   output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3));
+   output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4));
+   output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2));
+   output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3));
+   output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4));
+   output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2));
+   output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3));
+   output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4));
+
+   // Make sure all the variables are used.
+   return val;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+   output.final_value = f16_function(2h);
+}
+
diff --git a/naga/tests/in/glsl/f16-glsl.comp b/naga/tests/in/glsl/f16-glsl.comp
new file mode 100644
index 00000000000..af8c89cb8a3
--- /dev/null
+++ b/naga/tests/in/glsl/f16-glsl.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
diff --git a/naga/tests/in/spv/f16-spv.comp b/naga/tests/in/spv/f16-spv.comp
new file mode 100644
index 00000000000..af8c89cb8a3
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
diff --git a/naga/tests/in/spv/f16-spv.param.ron b/naga/tests/in/spv/f16-spv.param.ron
new file mode 100644
index 00000000000..c70ada99392
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.param.ron
@@ -0,0 +1,3 @@
+(
+    god_mode: true,
+)
\ No newline at end of file
diff --git a/naga/tests/in/spv/f16-spv.spv b/naga/tests/in/spv/f16-spv.spv
new file mode 100644
index 00000000000..7e7f1763ef8
Binary files /dev/null and b/naga/tests/in/spv/f16-spv.spv differ
diff --git a/naga/tests/in/spv/f16-spv.spvasm b/naga/tests/in/spv/f16-spv.spvasm
new file mode 100644
index 00000000000..806de327548
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.spvasm
@@ -0,0 +1,130 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 11
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageBuffer16BitAccess
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %B "B"
+               OpMemberName %B 0 "b_1"
+               OpMemberName %B 1 "b_vec2"
+               OpMemberName %B 2 "b_vec3"
+               OpMemberName %B 3 "b_vec4"
+               OpMemberName %B 4 "b_mat2"
+               OpMemberName %B 5 "b_mat2x3"
+               OpMemberName %B 6 "b_mat2x4"
+               OpMemberName %B 7 "b_mat3x2"
+               OpMemberName %B 8 "b_mat3"
+               OpMemberName %B 9 "b_mat3x4"
+               OpMemberName %B 10 "b_mat4x2"
+               OpMemberName %B 11 "b_mat4x3"
+               OpMemberName %B 12 "b_mat4"
+               OpName %_ ""
+               OpName %A "A"
+               OpMemberName %A 0 "a_1"
+               OpMemberName %A 1 "a_vec2"
+               OpMemberName %A 2 "a_vec3"
+               OpMemberName %A 3 "a_vec4"
+               OpName %__0 ""
+               OpDecorate %B BufferBlock
+               OpMemberDecorate %B 0 Offset 0
+               OpMemberDecorate %B 1 Offset 4
+               OpMemberDecorate %B 2 Offset 8
+               OpMemberDecorate %B 3 Offset 16
+               OpMemberDecorate %B 4 ColMajor
+               OpMemberDecorate %B 4 MatrixStride 4
+               OpMemberDecorate %B 4 Offset 24
+               OpMemberDecorate %B 5 ColMajor
+               OpMemberDecorate %B 5 MatrixStride 8
+               OpMemberDecorate %B 5 Offset 32
+               OpMemberDecorate %B 6 ColMajor
+               OpMemberDecorate %B 6 MatrixStride 8
+               OpMemberDecorate %B 6 Offset 48
+               OpMemberDecorate %B 7 ColMajor
+               OpMemberDecorate %B 7 MatrixStride 4
+               OpMemberDecorate %B 7 Offset 64
+               OpMemberDecorate %B 8 ColMajor
+               OpMemberDecorate %B 8 MatrixStride 8
+               OpMemberDecorate %B 8 Offset 80
+               OpMemberDecorate %B 9 ColMajor
+               OpMemberDecorate %B 9 MatrixStride 8
+               OpMemberDecorate %B 9 Offset 104
+               OpMemberDecorate %B 10 ColMajor
+               OpMemberDecorate %B 10 MatrixStride 4
+               OpMemberDecorate %B 10 Offset 128
+               OpMemberDecorate %B 11 ColMajor
+               OpMemberDecorate %B 11 MatrixStride 8
+               OpMemberDecorate %B 11 Offset 144
+               OpMemberDecorate %B 12 ColMajor
+               OpMemberDecorate %B 12 MatrixStride 8
+               OpMemberDecorate %B 12 Offset 176
+               OpDecorate %_ Binding 1
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %A Block
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 4
+               OpMemberDecorate %A 2 Offset 8
+               OpMemberDecorate %A 3 Offset 16
+               OpDecorate %__0 Binding 0
+               OpDecorate %__0 DescriptorSet 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half
+%_ptr_Uniform_B = OpTypePointer Uniform %B
+          %_ = OpVariable %_ptr_Uniform_B Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+          %A = OpTypeStruct %half %v2half %v3half %v4half
+%_ptr_Uniform_A = OpTypePointer Uniform %A
+        %__0 = OpVariable %_ptr_Uniform_A Uniform
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0
+         %29 = OpLoad %half %28
+         %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0
+               OpStore %30 %29
+         %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1
+         %34 = OpLoad %v2half %33
+         %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1
+               OpStore %35 %34
+         %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2
+         %39 = OpLoad %v3half %38
+         %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2
+               OpStore %40 %39
+         %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3
+         %44 = OpLoad %v4half %43
+         %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3
+               OpStore %45 %44
+               OpReturn
+               OpFunctionEnd
diff --git a/naga/tests/out/hlsl/f16.hlsl b/naga/tests/out/hlsl/f16.hlsl
new file mode 100644
index 00000000000..05e23872122
--- /dev/null
+++ b/naga/tests/out/hlsl/f16.hlsl
@@ -0,0 +1,351 @@
+struct NagaConstants {
+    int first_vertex;
+    int first_instance;
+    uint other;
+};
+ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    half2 val_f16_2_;
+    int _pad5_0;
+    half3 val_f16_3_;
+    half4 val_f16_4_;
+    half final_value;
+    half2 val_mat2x2__0; half2 val_mat2x2__1;
+    int _pad9_0;
+    row_major half2x3 val_mat2x3_;
+    row_major half2x4 val_mat2x4_;
+    half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2;
+    int _pad12_0;
+    row_major half3x3 val_mat3x3_;
+    row_major half3x4 val_mat3x4_;
+    half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3;
+    row_major half4x3 val_mat4x3_;
+    row_major half4x4 val_mat4x4_;
+};
+
+struct StorageCompatible {
+    half val_f16_array_2_[2];
+};
+
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    int _pad2_0;
+    half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    uint larger;
+};
+
+static const half constant_variable = 15.203125h;
+
+static half private_variable = 1.0h;
+cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
+ByteAddressBuffer input_storage : register(t1);
+ByteAddressBuffer input_arrays : register(t2);
+RWByteAddressBuffer output : register(u3);
+RWByteAddressBuffer output_arrays : register(u4);
+
+half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) {
+    return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1);
+}
+
+void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) {
+    obj.val_mat2x2__0 = mat[0];
+    obj.val_mat2x2__1 = mat[1];
+}
+
+void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0 = vec; break; }
+    case 1: { obj.val_mat2x2__1 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; }
+    }
+}
+
+half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) {
+    return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2);
+}
+
+void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) {
+    obj.val_mat3x2__0 = mat[0];
+    obj.val_mat3x2__1 = mat[1];
+    obj.val_mat3x2__2 = mat[2];
+}
+
+void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0 = vec; break; }
+    case 1: { obj.val_mat3x2__1 = vec; break; }
+    case 2: { obj.val_mat3x2__2 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; }
+    }
+}
+
+half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) {
+    return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3);
+}
+
+void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) {
+    obj.val_mat4x2__0 = mat[0];
+    obj.val_mat4x2__1 = mat[1];
+    obj.val_mat4x2__2 = mat[2];
+    obj.val_mat4x2__3 = mat[3];
+}
+
+void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0 = vec; break; }
+    case 1: { obj.val_mat4x2__1 = vec; break; }
+    case 2: { obj.val_mat4x2__2 = vec; break; }
+    case 3: { obj.val_mat4x2__3 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; }
+    case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; }
+    }
+}
+
+typedef half ret_Constructarray2_half_[2];
+ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) {
+    half ret[2] = { arg0, arg1 };
+    return ret;
+}
+
+half f16_function(half x)
+{
+    half val = 15.203125h;
+
+    half _e4 = val;
+    val = (_e4 + -33344.0h);
+    half _e6 = val;
+    half _e9 = val;
+    val = (_e9 + (_e6 + 5.0h));
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = (_e18 + half((_e13 + float(_e14))));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = (_e25 + (_e22).xxx.z);
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.Load<half>(12);
+    output.Store(12, (_e31 + _e34));
+    half2 _e40 = input_uniform.val_f16_2_;
+    half2 _e43 = input_storage.Load<half2>(16);
+    output.Store(16, (_e40 + _e43));
+    half3 _e49 = input_uniform.val_f16_3_;
+    half3 _e52 = input_storage.Load<half3>(24);
+    output.Store(24, (_e49 + _e52));
+    half4 _e58 = input_uniform.val_f16_4_;
+    half4 _e61 = input_storage.Load<half4>(32);
+    output.Store(32, (_e58 + _e61));
+    half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    half2x2 _e70 = half2x2(input_storage.Load<half2>(44+0), input_storage.Load<half2>(44+4));
+    {
+        half2x2 _value2 = (_e67 + _e70);
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e76 = input_uniform.val_mat2x3_;
+    half2x3 _e79 = half2x3(input_storage.Load<half3>(56+0), input_storage.Load<half3>(56+8));
+    {
+        half2x3 _value2 = (_e76 + _e79);
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e85 = input_uniform.val_mat2x4_;
+    half2x4 _e88 = half2x4(input_storage.Load<half4>(72+0), input_storage.Load<half4>(72+8));
+    {
+        half2x4 _value2 = (_e85 + _e88);
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    half3x2 _e97 = half3x2(input_storage.Load<half2>(88+0), input_storage.Load<half2>(88+4), input_storage.Load<half2>(88+8));
+    {
+        half3x2 _value2 = (_e94 + _e97);
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e103 = input_uniform.val_mat3x3_;
+    half3x3 _e106 = half3x3(input_storage.Load<half3>(104+0), input_storage.Load<half3>(104+8), input_storage.Load<half3>(104+16));
+    {
+        half3x3 _value2 = (_e103 + _e106);
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e112 = input_uniform.val_mat3x4_;
+    half3x4 _e115 = half3x4(input_storage.Load<half4>(128+0), input_storage.Load<half4>(128+8), input_storage.Load<half4>(128+16));
+    {
+        half3x4 _value2 = (_e112 + _e115);
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    half4x2 _e124 = half4x2(input_storage.Load<half2>(152+0), input_storage.Load<half2>(152+4), input_storage.Load<half2>(152+8), input_storage.Load<half2>(152+12));
+    {
+        half4x2 _value2 = (_e121 + _e124);
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e130 = input_uniform.val_mat4x3_;
+    half4x3 _e133 = half4x3(input_storage.Load<half3>(168+0), input_storage.Load<half3>(168+8), input_storage.Load<half3>(168+16), input_storage.Load<half3>(168+24));
+    {
+        half4x3 _value2 = (_e130 + _e133);
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e139 = input_uniform.val_mat4x4_;
+    half4x4 _e142 = half4x4(input_storage.Load<half4>(200+0), input_storage.Load<half4>(200+8), input_storage.Load<half4>(200+16), input_storage.Load<half4>(200+24));
+    {
+        half4x4 _value2 = (_e139 + _e142);
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e148[2] = Constructarray2_half_(input_arrays.Load<half>(0+0), input_arrays.Load<half>(0+2));
+    {
+        half _value2[2] = _e148;
+        output_arrays.Store(0+0, _value2[0]);
+        output_arrays.Store(0+2, _value2[1]);
+    }
+    half _e149 = val;
+    half _e151 = val;
+    val = (_e151 + abs(_e149));
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = (_e164 + dot((_e159).xx, (_e161).xx));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    half _e176 = val;
+    half _e178 = val;
+    val = (_e178 + sign(_e176));
+    half _e181 = val;
+    val = (_e181 + 1.0h);
+    half2 _e185 = input_uniform.val_f16_2_;
+    float2 float_vec2_ = float2(_e185);
+    output.Store(16, half2(float_vec2_));
+    half3 _e192 = input_uniform.val_f16_3_;
+    float3 float_vec3_ = float3(_e192);
+    output.Store(24, half3(float_vec3_));
+    half4 _e199 = input_uniform.val_f16_4_;
+    float4 float_vec4_ = float4(_e199);
+    output.Store(32, half4(float_vec4_));
+    half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    {
+        half2x2 _value2 = half2x2(float2x2(_e208));
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e215 = input_uniform.val_mat2x3_;
+    {
+        half2x3 _value2 = half2x3(float2x3(_e215));
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e222 = input_uniform.val_mat2x4_;
+    {
+        half2x4 _value2 = half2x4(float2x4(_e222));
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    {
+        half3x2 _value2 = half3x2(float3x2(_e229));
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e236 = input_uniform.val_mat3x3_;
+    {
+        half3x3 _value2 = half3x3(float3x3(_e236));
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e243 = input_uniform.val_mat3x4_;
+    {
+        half3x4 _value2 = half3x4(float3x4(_e243));
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    {
+        half4x2 _value2 = half4x2(float4x2(_e250));
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e257 = input_uniform.val_mat4x3_;
+    {
+        half4x3 _value2 = half4x3(float4x3(_e257));
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e264 = input_uniform.val_mat4x4_;
+    {
+        half4x4 _value2 = half4x4(float4x4(_e264));
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e267 = val;
+    return _e267;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const half _e3 = f16_function(2.0h);
+    output.Store(40, _e3);
+    return;
+}
diff --git a/naga/tests/out/hlsl/f16.ron b/naga/tests/out/hlsl/f16.ron
new file mode 100644
index 00000000000..b396a4626e3
--- /dev/null
+++ b/naga/tests/out/hlsl/f16.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_2",
+        ),
+    ],
+)
diff --git a/naga/tests/out/msl/extra.msl b/naga/tests/out/msl/extra.msl
index 8288dfad92f..4d6bb568f3a 100644
--- a/naga/tests/out/msl/extra.msl
+++ b/naga/tests/out/msl/extra.msl
@@ -6,7 +6,7 @@ using metal::uint;
 
 struct PushConstants {
     uint index;
-    char _pad1[12];
+    char _pad1[4];
     metal::float2 double_;
 };
 struct FragmentIn {
diff --git a/naga/tests/out/msl/f16.msl b/naga/tests/out/msl/f16.msl
new file mode 100644
index 00000000000..40be58e9015
--- /dev/null
+++ b/naga/tests/out/msl/f16.msl
@@ -0,0 +1,177 @@
+// language: metal1.0
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using metal::uint;
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    char _pad4[2];
+    metal::half2 val_f16_2_;
+    char _pad5[4];
+    metal::half3 val_f16_3_;
+    metal::half4 val_f16_4_;
+    half final_value;
+    char _pad8[2];
+    metal::half2x2 val_mat2x2_;
+    char _pad9[4];
+    metal::half2x3 val_mat2x3_;
+    metal::half2x4 val_mat2x4_;
+    metal::half3x2 val_mat3x2_;
+    char _pad12[4];
+    metal::half3x3 val_mat3x3_;
+    metal::half3x4 val_mat3x4_;
+    metal::half4x2 val_mat4x2_;
+    metal::half4x3 val_mat4x3_;
+    metal::half4x4 val_mat4x4_;
+};
+struct type_16 {
+    half inner[2];
+};
+struct StorageCompatible {
+    type_16 val_f16_array_2_;
+};
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    char _pad2[4];
+    metal::packed_half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    char _pad5[2];
+    uint larger;
+};
+constant half constant_variable = 15.203125h;
+
+half f16_function(
+    half x,
+    constant UniformCompatible& input_uniform,
+    device UniformCompatible const& input_storage,
+    device StorageCompatible const& input_arrays,
+    device UniformCompatible& output,
+    device StorageCompatible& output_arrays
+) {
+    half val = 15.203125h;
+    half _e4 = val;
+    val = _e4 + -33344.0h;
+    half _e6 = val;
+    half _e9 = val;
+    val = _e9 + (_e6 + 5.0h);
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = _e18 + static_cast<half>(_e13 + static_cast<float>(_e14));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = _e25 + metal::half3(_e22).z;
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.val_f16_;
+    output.val_f16_ = _e31 + _e34;
+    metal::half2 _e40 = input_uniform.val_f16_2_;
+    metal::half2 _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = _e40 + _e43;
+    metal::half3 _e49 = input_uniform.val_f16_3_;
+    metal::half3 _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = _e49 + _e52;
+    metal::half4 _e58 = input_uniform.val_f16_4_;
+    metal::half4 _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = _e58 + _e61;
+    metal::half2x2 _e67 = input_uniform.val_mat2x2_;
+    metal::half2x2 _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = _e67 + _e70;
+    metal::half2x3 _e76 = input_uniform.val_mat2x3_;
+    metal::half2x3 _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = _e76 + _e79;
+    metal::half2x4 _e85 = input_uniform.val_mat2x4_;
+    metal::half2x4 _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = _e85 + _e88;
+    metal::half3x2 _e94 = input_uniform.val_mat3x2_;
+    metal::half3x2 _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = _e94 + _e97;
+    metal::half3x3 _e103 = input_uniform.val_mat3x3_;
+    metal::half3x3 _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = _e103 + _e106;
+    metal::half3x4 _e112 = input_uniform.val_mat3x4_;
+    metal::half3x4 _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = _e112 + _e115;
+    metal::half4x2 _e121 = input_uniform.val_mat4x2_;
+    metal::half4x2 _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = _e121 + _e124;
+    metal::half4x3 _e130 = input_uniform.val_mat4x3_;
+    metal::half4x3 _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = _e130 + _e133;
+    metal::half4x4 _e139 = input_uniform.val_mat4x4_;
+    metal::half4x4 _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = _e139 + _e142;
+    type_16 _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    half _e149 = val;
+    half _e151 = val;
+    val = _e151 + metal::abs(_e149);
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = _e157 + metal::clamp(_e153, _e154, _e155);
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = _e169 + metal::max(_e166, _e167);
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = _e174 + metal::min(_e171, _e172);
+    half _e176 = val;
+    half _e178 = val;
+    val = _e178 + metal::sign(_e176);
+    half _e181 = val;
+    val = _e181 + 1.0h;
+    metal::half2 _e185 = input_uniform.val_f16_2_;
+    metal::float2 float_vec2_ = static_cast<metal::float2>(_e185);
+    output.val_f16_2_ = static_cast<metal::half2>(float_vec2_);
+    metal::half3 _e192 = input_uniform.val_f16_3_;
+    metal::float3 float_vec3_ = static_cast<metal::float3>(_e192);
+    output.val_f16_3_ = static_cast<metal::half3>(float_vec3_);
+    metal::half4 _e199 = input_uniform.val_f16_4_;
+    metal::float4 float_vec4_ = static_cast<metal::float4>(_e199);
+    output.val_f16_4_ = static_cast<metal::half4>(float_vec4_);
+    metal::half2x2 _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208));
+    metal::half2x3 _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215));
+    metal::half2x4 _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222));
+    metal::half3x2 _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229));
+    metal::half3x3 _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236));
+    metal::half3x4 _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243));
+    metal::half4x2 _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250));
+    metal::half4x3 _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257));
+    metal::half4x4 _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264));
+    half _e267 = val;
+    return _e267;
+}
+
+kernel void main_(
+  constant UniformCompatible& input_uniform [[user(fake0)]]
+, device UniformCompatible const& input_storage [[user(fake0)]]
+, device StorageCompatible const& input_arrays [[user(fake0)]]
+, device UniformCompatible& output [[user(fake0)]]
+, device StorageCompatible& output_arrays [[user(fake0)]]
+) {
+    half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays);
+    output.final_value = _e3;
+    return;
+}
diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
index f174ad3b382..7fa0c3f0de4 100644
--- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
+++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 125
+; Bound: 123
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
@@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64"
-OpEntryPoint GLCompute %77 "test_atomic_compare_exchange_u64"
+OpEntryPoint GLCompute %76 "test_atomic_compare_exchange_u64"
 OpExecutionMode %19 LocalSize 1 1 1
-OpExecutionMode %77 LocalSize 1 1 1
+OpExecutionMode %76 LocalSize 1 1 1
 OpDecorate %5 ArrayStride 8
 OpDecorate %8 ArrayStride 8
 OpMemberDecorate %10 0 Offset 0
@@ -57,12 +57,12 @@ OpMemberDecorate %16 0 Offset 0
 %51 = OpTypeInt 32 1
 %50 = OpConstant  %51  1
 %52 = OpConstant  %3  64
-%78 = OpTypePointer StorageBuffer %8
-%80 = OpConstant  %7  10
-%83 = OpTypePointer Function %7
-%84 = OpConstantNull  %7
-%86 = OpConstantNull  %9
-%99 = OpTypePointer StorageBuffer %7
+%77 = OpTypePointer StorageBuffer %8
+%79 = OpConstant  %7  10
+%82 = OpTypePointer Function %7
+%83 = OpConstantNull  %7
+%85 = OpConstantNull  %9
+%98 = OpTypePointer StorageBuffer %7
 %19 = OpFunction  %2  None %20
 %18 = OpLabel
 %27 = OpVariable  %28  Function %22
@@ -106,16 +106,16 @@ OpBranch %61
 %61 = OpLabel
 %63 = OpLoad  %4  %29
 %64 = OpIAdd  %4  %63 %25
-%66 = OpLoad  %3  %27
-%67 = OpLoad  %4  %29
-%69 = OpAccessChain  %47  %23 %66
-%70 = OpAtomicCompareExchange  %4  %69 %50 %52 %52 %64 %67
-%71 = OpIEqual  %9  %70 %67
-%68 = OpCompositeConstruct  %10  %70 %71
-%72 = OpCompositeExtract  %4  %68 0
-OpStore %29 %72
-%73 = OpCompositeExtract  %9  %68 1
-OpStore %32 %73
+%65 = OpLoad  %3  %27
+%66 = OpLoad  %4  %29
+%68 = OpAccessChain  %47  %23 %65
+%69 = OpAtomicCompareExchange  %4  %68 %50 %52 %52 %64 %66
+%70 = OpIEqual  %9  %69 %66
+%67 = OpCompositeConstruct  %10  %69 %70
+%71 = OpCompositeExtract  %4  %67 0
+OpStore %29 %71
+%72 = OpCompositeExtract  %9  %67 1
+OpStore %32 %72
 OpBranch %62
 %62 = OpLabel
 OpBranch %56
@@ -126,80 +126,80 @@ OpBranch %45
 %45 = OpLabel
 OpBranch %39
 %39 = OpLabel
-%74 = OpLoad  %3  %27
-%75 = OpIAdd  %3  %74 %26
-OpStore %27 %75
+%73 = OpLoad  %3  %27
+%74 = OpIAdd  %3  %73 %26
+OpStore %27 %74
 OpBranch %36
 %37 = OpLabel
 OpReturn
 OpFunctionEnd
-%77 = OpFunction  %2  None %20
-%76 = OpLabel
-%81 = OpVariable  %28  Function %22
-%82 = OpVariable  %83  Function %84
-%85 = OpVariable  %33  Function %86
-%79 = OpAccessChain  %78  %15 %22
+%76 = OpFunction  %2  None %20
+%75 = OpLabel
+%80 = OpVariable  %28  Function %22
+%81 = OpVariable  %82  Function %83
+%84 = OpVariable  %33  Function %85
+%78 = OpAccessChain  %77  %15 %22
+OpBranch %86
+%86 = OpLabel
 OpBranch %87
 %87 = OpLabel
-OpBranch %88
-%88 = OpLabel
-OpLoopMerge %89 %91 None
-OpBranch %90
-%90 = OpLabel
-%92 = OpLoad  %3  %81
-%93 = OpULessThan  %9  %92 %6
-OpSelectionMerge %94 None
-OpBranchConditional %93 %94 %95
-%95 = OpLabel
+OpLoopMerge %88 %90 None
 OpBranch %89
+%89 = OpLabel
+%91 = OpLoad  %3  %80
+%92 = OpULessThan  %9  %91 %6
+OpSelectionMerge %93 None
+OpBranchConditional %92 %93 %94
 %94 = OpLabel
-OpBranch %96
-%96 = OpLabel
-%98 = OpLoad  %3  %81
-%100 = OpAccessChain  %99  %79 %98
-%101 = OpAtomicLoad  %7  %100 %50 %52
-OpStore %82 %101
-OpStore %85 %24
-OpBranch %102
-%102 = OpLabel
-OpLoopMerge %103 %105 None
-OpBranch %104
-%104 = OpLabel
-%106 = OpLoad  %9  %85
-%107 = OpLogicalNot  %9  %106
-OpSelectionMerge %108 None
-OpBranchConditional %107 %108 %109
-%109 = OpLabel
+OpBranch %88
+%93 = OpLabel
+OpBranch %95
+%95 = OpLabel
+%97 = OpLoad  %3  %80
+%99 = OpAccessChain  %98  %78 %97
+%100 = OpAtomicLoad  %7  %99 %50 %52
+OpStore %81 %100
+OpStore %84 %24
+OpBranch %101
+%101 = OpLabel
+OpLoopMerge %102 %104 None
 OpBranch %103
+%103 = OpLabel
+%105 = OpLoad  %9  %84
+%106 = OpLogicalNot  %9  %105
+OpSelectionMerge %107 None
+OpBranchConditional %106 %107 %108
 %108 = OpLabel
+OpBranch %102
+%107 = OpLabel
+OpBranch %109
+%109 = OpLabel
+%111 = OpLoad  %7  %81
+%112 = OpIAdd  %7  %111 %79
+%113 = OpLoad  %3  %80
+%114 = OpLoad  %7  %81
+%116 = OpAccessChain  %98  %78 %113
+%117 = OpAtomicCompareExchange  %7  %116 %50 %52 %52 %112 %114
+%118 = OpIEqual  %9  %117 %114
+%115 = OpCompositeConstruct  %11  %117 %118
+%119 = OpCompositeExtract  %7  %115 0
+OpStore %81 %119
+%120 = OpCompositeExtract  %9  %115 1
+OpStore %84 %120
 OpBranch %110
 %110 = OpLabel
-%112 = OpLoad  %7  %82
-%113 = OpIAdd  %7  %112 %80
-%115 = OpLoad  %3  %81
-%116 = OpLoad  %7  %82
-%118 = OpAccessChain  %99  %79 %115
-%119 = OpAtomicCompareExchange  %7  %118 %50 %52 %52 %113 %116
-%120 = OpIEqual  %9  %119 %116
-%117 = OpCompositeConstruct  %11  %119 %120
-%121 = OpCompositeExtract  %7  %117 0
-OpStore %82 %121
-%122 = OpCompositeExtract  %9  %117 1
-OpStore %85 %122
-OpBranch %111
-%111 = OpLabel
-OpBranch %105
-%105 = OpLabel
-OpBranch %102
-%103 = OpLabel
-OpBranch %97
-%97 = OpLabel
-OpBranch %91
-%91 = OpLabel
-%123 = OpLoad  %3  %81
-%124 = OpIAdd  %3  %123 %26
-OpStore %81 %124
-OpBranch %88
-%89 = OpLabel
+OpBranch %104
+%104 = OpLabel
+OpBranch %101
+%102 = OpLabel
+OpBranch %96
+%96 = OpLabel
+OpBranch %90
+%90 = OpLabel
+%121 = OpLoad  %3  %80
+%122 = OpIAdd  %3  %121 %26
+OpStore %80 %122
+OpBranch %87
+%88 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/extra.spvasm b/naga/tests/out/spv/extra.spvasm
index 9c434a8ce26..0e84427bad4 100644
--- a/naga/tests/out/spv/extra.spvasm
+++ b/naga/tests/out/spv/extra.spvasm
@@ -1,76 +1,74 @@
 ; SPIR-V
 ; Version: 1.2
 ; Generator: rspirv
-; Bound: 48
+; Bound: 47
 OpCapability Shader
-OpCapability Float64
 OpCapability Geometry
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %23 "main" %15 %18 %21
-OpExecutionMode %23 OriginUpperLeft
+OpEntryPoint Fragment %22 "main" %14 %17 %20
+OpExecutionMode %22 OriginUpperLeft
 OpMemberDecorate %6 0 Offset 0
-OpMemberDecorate %6 1 Offset 16
-OpMemberDecorate %9 0 Offset 0
-OpMemberDecorate %9 1 Offset 16
-OpDecorate %11 Block
-OpMemberDecorate %11 0 Offset 0
-OpDecorate %15 Location 0
-OpDecorate %18 BuiltIn PrimitiveId
-OpDecorate %18 Flat
-OpDecorate %21 Location 0
+OpMemberDecorate %6 1 Offset 8
+OpMemberDecorate %8 0 Offset 0
+OpMemberDecorate %8 1 Offset 16
+OpDecorate %10 Block
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %14 Location 0
+OpDecorate %17 BuiltIn PrimitiveId
+OpDecorate %17 Flat
+OpDecorate %20 Location 0
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
-%5 = OpTypeFloat 64
+%5 = OpTypeFloat 32
 %4 = OpTypeVector %5 2
 %6 = OpTypeStruct %3 %4
-%8 = OpTypeFloat 32
-%7 = OpTypeVector %8 4
-%9 = OpTypeStruct %7 %3
-%11 = OpTypeStruct %6
-%12 = OpTypePointer PushConstant %11
-%10 = OpVariable  %12  PushConstant
-%16 = OpTypePointer Input %7
-%15 = OpVariable  %16  Input
-%19 = OpTypePointer Input %3
-%18 = OpVariable  %19  Input
-%22 = OpTypePointer Output %7
-%21 = OpVariable  %22  Output
-%24 = OpTypeFunction %2
-%25 = OpTypePointer PushConstant %6
-%26 = OpConstant  %3  0
-%28 = OpConstant  %8  1.0
-%29 = OpTypeVector %8 3
-%30 = OpConstantComposite  %29  %28 %28 %28
-%33 = OpTypePointer PushConstant %3
-%36 = OpTypeBool
-%23 = OpFunction  %2  None %24
-%13 = OpLabel
-%17 = OpLoad  %7  %15
-%20 = OpLoad  %3  %18
-%14 = OpCompositeConstruct  %9  %17 %20
-%27 = OpAccessChain  %25  %10 %26
-OpBranch %31
-%31 = OpLabel
-%32 = OpCompositeExtract  %3  %14 1
-%34 = OpAccessChain  %33  %27 %26
-%35 = OpLoad  %3  %34
-%37 = OpIEqual  %36  %32 %35
-OpSelectionMerge %38 None
-OpBranchConditional %37 %39 %40
-%39 = OpLabel
-%41 = OpCompositeExtract  %7  %14 0
-OpStore %21 %41
+%7 = OpTypeVector %5 4
+%8 = OpTypeStruct %7 %3
+%10 = OpTypeStruct %6
+%11 = OpTypePointer PushConstant %10
+%9 = OpVariable  %11  PushConstant
+%15 = OpTypePointer Input %7
+%14 = OpVariable  %15  Input
+%18 = OpTypePointer Input %3
+%17 = OpVariable  %18  Input
+%21 = OpTypePointer Output %7
+%20 = OpVariable  %21  Output
+%23 = OpTypeFunction %2
+%24 = OpTypePointer PushConstant %6
+%25 = OpConstant  %3  0
+%27 = OpConstant  %5  1.0
+%28 = OpTypeVector %5 3
+%29 = OpConstantComposite  %28  %27 %27 %27
+%32 = OpTypePointer PushConstant %3
+%35 = OpTypeBool
+%22 = OpFunction  %2  None %23
+%12 = OpLabel
+%16 = OpLoad  %7  %14
+%19 = OpLoad  %3  %17
+%13 = OpCompositeConstruct  %8  %16 %19
+%26 = OpAccessChain  %24  %9 %25
+OpBranch %30
+%30 = OpLabel
+%31 = OpCompositeExtract  %3  %13 1
+%33 = OpAccessChain  %32  %26 %25
+%34 = OpLoad  %3  %33
+%36 = OpIEqual  %35  %31 %34
+OpSelectionMerge %37 None
+OpBranchConditional %36 %38 %39
+%38 = OpLabel
+%40 = OpCompositeExtract  %7  %13 0
+OpStore %20 %40
 OpReturn
-%40 = OpLabel
-%42 = OpCompositeExtract  %7  %14 0
-%43 = OpVectorShuffle  %29  %42 %42 0 1 2
-%44 = OpFSub  %29  %30 %43
-%45 = OpCompositeExtract  %7  %14 0
-%46 = OpCompositeExtract  %8  %45 3
-%47 = OpCompositeConstruct  %7  %44 %46
-OpStore %21 %47
+%39 = OpLabel
+%41 = OpCompositeExtract  %7  %13 0
+%42 = OpVectorShuffle  %28  %41 %41 0 1 2
+%43 = OpFSub  %28  %29 %42
+%44 = OpCompositeExtract  %7  %13 0
+%45 = OpCompositeExtract  %5  %44 3
+%46 = OpCompositeConstruct  %7  %43 %45
+OpStore %20 %46
 OpReturn
-%38 = OpLabel
+%37 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/f16.spvasm b/naga/tests/out/spv/f16.spvasm
new file mode 100644
index 00000000000..3d8fcea7a75
--- /dev/null
+++ b/naga/tests/out/spv/f16.spvasm
@@ -0,0 +1,633 @@
+; SPIR-V
+; Version: 1.0
+; Generator: rspirv
+; Bound: 511
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability StorageInputOutput16
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %499 "main"
+OpExecutionMode %499 LocalSize 1 1 1
+OpMemberDecorate %19 0 Offset 0
+OpMemberDecorate %19 1 Offset 4
+OpMemberDecorate %19 2 Offset 8
+OpMemberDecorate %19 3 Offset 12
+OpMemberDecorate %19 4 Offset 16
+OpMemberDecorate %19 5 Offset 24
+OpMemberDecorate %19 6 Offset 32
+OpMemberDecorate %19 7 Offset 40
+OpMemberDecorate %19 8 Offset 44
+OpMemberDecorate %19 8 ColMajor
+OpMemberDecorate %19 8 MatrixStride 4
+OpMemberDecorate %19 9 Offset 56
+OpMemberDecorate %19 9 ColMajor
+OpMemberDecorate %19 9 MatrixStride 8
+OpMemberDecorate %19 10 Offset 72
+OpMemberDecorate %19 10 ColMajor
+OpMemberDecorate %19 10 MatrixStride 8
+OpMemberDecorate %19 11 Offset 88
+OpMemberDecorate %19 11 ColMajor
+OpMemberDecorate %19 11 MatrixStride 4
+OpMemberDecorate %19 12 Offset 104
+OpMemberDecorate %19 12 ColMajor
+OpMemberDecorate %19 12 MatrixStride 8
+OpMemberDecorate %19 13 Offset 128
+OpMemberDecorate %19 13 ColMajor
+OpMemberDecorate %19 13 MatrixStride 8
+OpMemberDecorate %19 14 Offset 152
+OpMemberDecorate %19 14 ColMajor
+OpMemberDecorate %19 14 MatrixStride 4
+OpMemberDecorate %19 15 Offset 168
+OpMemberDecorate %19 15 ColMajor
+OpMemberDecorate %19 15 MatrixStride 8
+OpMemberDecorate %19 16 Offset 200
+OpMemberDecorate %19 16 ColMajor
+OpMemberDecorate %19 16 MatrixStride 8
+OpDecorate %20 ArrayStride 2
+OpMemberDecorate %22 0 Offset 0
+OpMemberDecorate %23 0 Offset 0
+OpMemberDecorate %23 1 Offset 2
+OpMemberDecorate %23 2 Offset 8
+OpMemberDecorate %23 3 Offset 14
+OpMemberDecorate %23 4 Offset 16
+OpMemberDecorate %23 5 Offset 20
+OpDecorate %28 DescriptorSet 0
+OpDecorate %28 Binding 0
+OpDecorate %29 Block
+OpMemberDecorate %29 0 Offset 0
+OpDecorate %31 NonWritable
+OpDecorate %31 DescriptorSet 0
+OpDecorate %31 Binding 1
+OpDecorate %32 Block
+OpMemberDecorate %32 0 Offset 0
+OpDecorate %34 NonWritable
+OpDecorate %34 DescriptorSet 0
+OpDecorate %34 Binding 2
+OpDecorate %35 Block
+OpMemberDecorate %35 0 Offset 0
+OpDecorate %37 DescriptorSet 0
+OpDecorate %37 Binding 3
+OpDecorate %38 Block
+OpMemberDecorate %38 0 Offset 0
+OpDecorate %40 DescriptorSet 0
+OpDecorate %40 Binding 4
+OpDecorate %41 Block
+OpMemberDecorate %41 0 Offset 0
+%2 = OpTypeVoid
+%3 = OpTypeFloat 16
+%4 = OpTypeInt 32 0
+%5 = OpTypeInt 32 1
+%6 = OpTypeFloat 32
+%7 = OpTypeVector %3 2
+%8 = OpTypeVector %3 3
+%9 = OpTypeVector %3 4
+%10 = OpTypeMatrix %7 2
+%11 = OpTypeMatrix %8 2
+%12 = OpTypeMatrix %9 2
+%13 = OpTypeMatrix %7 3
+%14 = OpTypeMatrix %8 3
+%15 = OpTypeMatrix %9 3
+%16 = OpTypeMatrix %7 4
+%17 = OpTypeMatrix %8 4
+%18 = OpTypeMatrix %9 4
+%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18
+%21 = OpConstant  %4  2
+%20 = OpTypeArray %3 %21
+%22 = OpTypeStruct %20
+%23 = OpTypeStruct %3 %3 %8 %3 %3 %4
+%24 = OpConstant  %3  2.1524e-41
+%25 = OpConstant  %3  2.7121e-41
+%27 = OpTypePointer Private %3
+%26 = OpVariable  %27  Private %24
+%29 = OpTypeStruct %19
+%30 = OpTypePointer Uniform %29
+%28 = OpVariable  %30  Uniform
+%32 = OpTypeStruct %19
+%33 = OpTypePointer StorageBuffer %32
+%31 = OpVariable  %33  StorageBuffer
+%35 = OpTypeStruct %22
+%36 = OpTypePointer StorageBuffer %35
+%34 = OpVariable  %36  StorageBuffer
+%38 = OpTypeStruct %19
+%39 = OpTypePointer StorageBuffer %38
+%37 = OpVariable  %39  StorageBuffer
+%41 = OpTypeStruct %22
+%42 = OpTypePointer StorageBuffer %41
+%40 = OpVariable  %42  StorageBuffer
+%46 = OpTypeFunction %3 %3
+%47 = OpTypePointer Uniform %19
+%48 = OpConstant  %4  0
+%50 = OpTypePointer StorageBuffer %19
+%52 = OpTypePointer StorageBuffer %22
+%56 = OpConstant  %3  8.8991e-41
+%57 = OpConstant  %3  2.4753e-41
+%59 = OpTypePointer Function %3
+%67 = OpTypePointer Uniform %6
+%76 = OpTypePointer Uniform %3
+%77 = OpConstant  %4  3
+%84 = OpTypePointer StorageBuffer %3
+%91 = OpTypePointer StorageBuffer %7
+%92 = OpTypePointer Uniform %7
+%93 = OpConstant  %4  4
+%100 = OpTypePointer StorageBuffer %8
+%101 = OpTypePointer Uniform %8
+%102 = OpConstant  %4  5
+%109 = OpTypePointer StorageBuffer %9
+%110 = OpTypePointer Uniform %9
+%111 = OpConstant  %4  6
+%118 = OpTypePointer StorageBuffer %10
+%119 = OpTypePointer Uniform %10
+%120 = OpConstant  %4  8
+%133 = OpTypePointer StorageBuffer %11
+%134 = OpTypePointer Uniform %11
+%135 = OpConstant  %4  9
+%148 = OpTypePointer StorageBuffer %12
+%149 = OpTypePointer Uniform %12
+%150 = OpConstant  %4  10
+%163 = OpTypePointer StorageBuffer %13
+%164 = OpTypePointer Uniform %13
+%165 = OpConstant  %4  11
+%181 = OpTypePointer StorageBuffer %14
+%182 = OpTypePointer Uniform %14
+%183 = OpConstant  %4  12
+%199 = OpTypePointer StorageBuffer %15
+%200 = OpTypePointer Uniform %15
+%201 = OpConstant  %4  13
+%217 = OpTypePointer StorageBuffer %16
+%218 = OpTypePointer Uniform %16
+%219 = OpConstant  %4  14
+%238 = OpTypePointer StorageBuffer %17
+%239 = OpTypePointer Uniform %17
+%240 = OpConstant  %4  15
+%259 = OpTypePointer StorageBuffer %18
+%260 = OpTypePointer Uniform %18
+%261 = OpConstant  %4  16
+%280 = OpTypePointer StorageBuffer %20
+%319 = OpTypeVector %6 2
+%325 = OpTypeVector %6 3
+%331 = OpTypeVector %6 4
+%337 = OpTypeMatrix %319 2
+%351 = OpTypeMatrix %325 2
+%365 = OpTypeMatrix %331 2
+%379 = OpTypeMatrix %319 3
+%397 = OpTypeMatrix %325 3
+%415 = OpTypeMatrix %331 3
+%433 = OpTypeMatrix %319 4
+%455 = OpTypeMatrix %325 4
+%477 = OpTypeMatrix %331 4
+%500 = OpTypeFunction %2
+%506 = OpConstant  %3  2.2959e-41
+%509 = OpConstant  %4  7
+%45 = OpFunction  %3  None %46
+%44 = OpFunctionParameter  %3
+%43 = OpLabel
+%58 = OpVariable  %59  Function %25
+%49 = OpAccessChain  %47  %28 %48
+%51 = OpAccessChain  %50  %31 %48
+%53 = OpAccessChain  %52  %34 %48
+%54 = OpAccessChain  %50  %37 %48
+%55 = OpAccessChain  %52  %40 %48
+OpBranch %60
+%60 = OpLabel
+%61 = OpLoad  %3  %58
+%62 = OpFAdd  %3  %61 %56
+OpStore %58 %62
+%63 = OpLoad  %3  %58
+%64 = OpFAdd  %3  %63 %57
+%65 = OpLoad  %3  %58
+%66 = OpFAdd  %3  %65 %64
+OpStore %58 %66
+%68 = OpAccessChain  %67  %49 %21
+%69 = OpLoad  %6  %68
+%70 = OpLoad  %3  %58
+%71 = OpFConvert  %6  %70
+%72 = OpFAdd  %6  %69 %71
+%73 = OpFConvert  %3  %72
+%74 = OpLoad  %3  %58
+%75 = OpFAdd  %3  %74 %73
+OpStore %58 %75
+%78 = OpAccessChain  %76  %49 %77
+%79 = OpLoad  %3  %78
+%80 = OpCompositeConstruct  %8  %79 %79 %79
+%81 = OpCompositeExtract  %3  %80 2
+%82 = OpLoad  %3  %58
+%83 = OpFAdd  %3  %82 %81
+OpStore %58 %83
+%85 = OpAccessChain  %76  %49 %77
+%86 = OpLoad  %3  %85
+%87 = OpAccessChain  %84  %51 %77
+%88 = OpLoad  %3  %87
+%89 = OpFAdd  %3  %86 %88
+%90 = OpAccessChain  %84  %54 %77
+OpStore %90 %89
+%94 = OpAccessChain  %92  %49 %93
+%95 = OpLoad  %7  %94
+%96 = OpAccessChain  %91  %51 %93
+%97 = OpLoad  %7  %96
+%98 = OpFAdd  %7  %95 %97
+%99 = OpAccessChain  %91  %54 %93
+OpStore %99 %98
+%103 = OpAccessChain  %101  %49 %102
+%104 = OpLoad  %8  %103
+%105 = OpAccessChain  %100  %51 %102
+%106 = OpLoad  %8  %105
+%107 = OpFAdd  %8  %104 %106
+%108 = OpAccessChain  %100  %54 %102
+OpStore %108 %107
+%112 = OpAccessChain  %110  %49 %111
+%113 = OpLoad  %9  %112
+%114 = OpAccessChain  %109  %51 %111
+%115 = OpLoad  %9  %114
+%116 = OpFAdd  %9  %113 %115
+%117 = OpAccessChain  %109  %54 %111
+OpStore %117 %116
+%121 = OpAccessChain  %119  %49 %120
+%122 = OpLoad  %10  %121
+%123 = OpAccessChain  %118  %51 %120
+%124 = OpLoad  %10  %123
+%126 = OpCompositeExtract  %7  %122 0
+%127 = OpCompositeExtract  %7  %124 0
+%128 = OpFAdd  %7  %126 %127
+%129 = OpCompositeExtract  %7  %122 1
+%130 = OpCompositeExtract  %7  %124 1
+%131 = OpFAdd  %7  %129 %130
+%125 = OpCompositeConstruct  %10  %128 %131
+%132 = OpAccessChain  %118  %54 %120
+OpStore %132 %125
+%136 = OpAccessChain  %134  %49 %135
+%137 = OpLoad  %11  %136
+%138 = OpAccessChain  %133  %51 %135
+%139 = OpLoad  %11  %138
+%141 = OpCompositeExtract  %8  %137 0
+%142 = OpCompositeExtract  %8  %139 0
+%143 = OpFAdd  %8  %141 %142
+%144 = OpCompositeExtract  %8  %137 1
+%145 = OpCompositeExtract  %8  %139 1
+%146 = OpFAdd  %8  %144 %145
+%140 = OpCompositeConstruct  %11  %143 %146
+%147 = OpAccessChain  %133  %54 %135
+OpStore %147 %140
+%151 = OpAccessChain  %149  %49 %150
+%152 = OpLoad  %12  %151
+%153 = OpAccessChain  %148  %51 %150
+%154 = OpLoad  %12  %153
+%156 = OpCompositeExtract  %9  %152 0
+%157 = OpCompositeExtract  %9  %154 0
+%158 = OpFAdd  %9  %156 %157
+%159 = OpCompositeExtract  %9  %152 1
+%160 = OpCompositeExtract  %9  %154 1
+%161 = OpFAdd  %9  %159 %160
+%155 = OpCompositeConstruct  %12  %158 %161
+%162 = OpAccessChain  %148  %54 %150
+OpStore %162 %155
+%166 = OpAccessChain  %164  %49 %165
+%167 = OpLoad  %13  %166
+%168 = OpAccessChain  %163  %51 %165
+%169 = OpLoad  %13  %168
+%171 = OpCompositeExtract  %7  %167 0
+%172 = OpCompositeExtract  %7  %169 0
+%173 = OpFAdd  %7  %171 %172
+%174 = OpCompositeExtract  %7  %167 1
+%175 = OpCompositeExtract  %7  %169 1
+%176 = OpFAdd  %7  %174 %175
+%177 = OpCompositeExtract  %7  %167 2
+%178 = OpCompositeExtract  %7  %169 2
+%179 = OpFAdd  %7  %177 %178
+%170 = OpCompositeConstruct  %13  %173 %176 %179
+%180 = OpAccessChain  %163  %54 %165
+OpStore %180 %170
+%184 = OpAccessChain  %182  %49 %183
+%185 = OpLoad  %14  %184
+%186 = OpAccessChain  %181  %51 %183
+%187 = OpLoad  %14  %186
+%189 = OpCompositeExtract  %8  %185 0
+%190 = OpCompositeExtract  %8  %187 0
+%191 = OpFAdd  %8  %189 %190
+%192 = OpCompositeExtract  %8  %185 1
+%193 = OpCompositeExtract  %8  %187 1
+%194 = OpFAdd  %8  %192 %193
+%195 = OpCompositeExtract  %8  %185 2
+%196 = OpCompositeExtract  %8  %187 2
+%197 = OpFAdd  %8  %195 %196
+%188 = OpCompositeConstruct  %14  %191 %194 %197
+%198 = OpAccessChain  %181  %54 %183
+OpStore %198 %188
+%202 = OpAccessChain  %200  %49 %201
+%203 = OpLoad  %15  %202
+%204 = OpAccessChain  %199  %51 %201
+%205 = OpLoad  %15  %204
+%207 = OpCompositeExtract  %9  %203 0
+%208 = OpCompositeExtract  %9  %205 0
+%209 = OpFAdd  %9  %207 %208
+%210 = OpCompositeExtract  %9  %203 1
+%211 = OpCompositeExtract  %9  %205 1
+%212 = OpFAdd  %9  %210 %211
+%213 = OpCompositeExtract  %9  %203 2
+%214 = OpCompositeExtract  %9  %205 2
+%215 = OpFAdd  %9  %213 %214
+%206 = OpCompositeConstruct  %15  %209 %212 %215
+%216 = OpAccessChain  %199  %54 %201
+OpStore %216 %206
+%220 = OpAccessChain  %218  %49 %219
+%221 = OpLoad  %16  %220
+%222 = OpAccessChain  %217  %51 %219
+%223 = OpLoad  %16  %222
+%225 = OpCompositeExtract  %7  %221 0
+%226 = OpCompositeExtract  %7  %223 0
+%227 = OpFAdd  %7  %225 %226
+%228 = OpCompositeExtract  %7  %221 1
+%229 = OpCompositeExtract  %7  %223 1
+%230 = OpFAdd  %7  %228 %229
+%231 = OpCompositeExtract  %7  %221 2
+%232 = OpCompositeExtract  %7  %223 2
+%233 = OpFAdd  %7  %231 %232
+%234 = OpCompositeExtract  %7  %221 3
+%235 = OpCompositeExtract  %7  %223 3
+%236 = OpFAdd  %7  %234 %235
+%224 = OpCompositeConstruct  %16  %227 %230 %233 %236
+%237 = OpAccessChain  %217  %54 %219
+OpStore %237 %224
+%241 = OpAccessChain  %239  %49 %240
+%242 = OpLoad  %17  %241
+%243 = OpAccessChain  %238  %51 %240
+%244 = OpLoad  %17  %243
+%246 = OpCompositeExtract  %8  %242 0
+%247 = OpCompositeExtract  %8  %244 0
+%248 = OpFAdd  %8  %246 %247
+%249 = OpCompositeExtract  %8  %242 1
+%250 = OpCompositeExtract  %8  %244 1
+%251 = OpFAdd  %8  %249 %250
+%252 = OpCompositeExtract  %8  %242 2
+%253 = OpCompositeExtract  %8  %244 2
+%254 = OpFAdd  %8  %252 %253
+%255 = OpCompositeExtract  %8  %242 3
+%256 = OpCompositeExtract  %8  %244 3
+%257 = OpFAdd  %8  %255 %256
+%245 = OpCompositeConstruct  %17  %248 %251 %254 %257
+%258 = OpAccessChain  %238  %54 %240
+OpStore %258 %245
+%262 = OpAccessChain  %260  %49 %261
+%263 = OpLoad  %18  %262
+%264 = OpAccessChain  %259  %51 %261
+%265 = OpLoad  %18  %264
+%267 = OpCompositeExtract  %9  %263 0
+%268 = OpCompositeExtract  %9  %265 0
+%269 = OpFAdd  %9  %267 %268
+%270 = OpCompositeExtract  %9  %263 1
+%271 = OpCompositeExtract  %9  %265 1
+%272 = OpFAdd  %9  %270 %271
+%273 = OpCompositeExtract  %9  %263 2
+%274 = OpCompositeExtract  %9  %265 2
+%275 = OpFAdd  %9  %273 %274
+%276 = OpCompositeExtract  %9  %263 3
+%277 = OpCompositeExtract  %9  %265 3
+%278 = OpFAdd  %9  %276 %277
+%266 = OpCompositeConstruct  %18  %269 %272 %275 %278
+%279 = OpAccessChain  %259  %54 %261
+OpStore %279 %266
+%281 = OpAccessChain  %280  %53 %48
+%282 = OpLoad  %20  %281
+%283 = OpAccessChain  %280  %55 %48
+OpStore %283 %282
+%284 = OpLoad  %3  %58
+%285 = OpExtInst  %3  %1 FAbs %284
+%286 = OpLoad  %3  %58
+%287 = OpFAdd  %3  %286 %285
+OpStore %58 %287
+%288 = OpLoad  %3  %58
+%289 = OpLoad  %3  %58
+%290 = OpLoad  %3  %58
+%291 = OpExtInst  %3  %1 FClamp %288 %289 %290
+%292 = OpLoad  %3  %58
+%293 = OpFAdd  %3  %292 %291
+OpStore %58 %293
+%294 = OpLoad  %3  %58
+%295 = OpCompositeConstruct  %7  %294 %294
+%296 = OpLoad  %3  %58
+%297 = OpCompositeConstruct  %7  %296 %296
+%298 = OpDot  %3  %295 %297
+%299 = OpLoad  %3  %58
+%300 = OpFAdd  %3  %299 %298
+OpStore %58 %300
+%301 = OpLoad  %3  %58
+%302 = OpLoad  %3  %58
+%303 = OpExtInst  %3  %1 FMax %301 %302
+%304 = OpLoad  %3  %58
+%305 = OpFAdd  %3  %304 %303
+OpStore %58 %305
+%306 = OpLoad  %3  %58
+%307 = OpLoad  %3  %58
+%308 = OpExtInst  %3  %1 FMin %306 %307
+%309 = OpLoad  %3  %58
+%310 = OpFAdd  %3  %309 %308
+OpStore %58 %310
+%311 = OpLoad  %3  %58
+%312 = OpExtInst  %3  %1 FSign %311
+%313 = OpLoad  %3  %58
+%314 = OpFAdd  %3  %313 %312
+OpStore %58 %314
+%315 = OpLoad  %3  %58
+%316 = OpFAdd  %3  %315 %24
+OpStore %58 %316
+%317 = OpAccessChain  %92  %49 %93
+%318 = OpLoad  %7  %317
+%320 = OpFConvert  %319  %318
+%321 = OpFConvert  %7  %320
+%322 = OpAccessChain  %91  %54 %93
+OpStore %322 %321
+%323 = OpAccessChain  %101  %49 %102
+%324 = OpLoad  %8  %323
+%326 = OpFConvert  %325  %324
+%327 = OpFConvert  %8  %326
+%328 = OpAccessChain  %100  %54 %102
+OpStore %328 %327
+%329 = OpAccessChain  %110  %49 %111
+%330 = OpLoad  %9  %329
+%332 = OpFConvert  %331  %330
+%333 = OpFConvert  %9  %332
+%334 = OpAccessChain  %109  %54 %111
+OpStore %334 %333
+%335 = OpAccessChain  %119  %49 %120
+%336 = OpLoad  %10  %335
+%338 = OpCompositeExtract  %7  %336 0
+%339 = OpFConvert  %319  %338
+%340 = OpCompositeExtract  %7  %336 1
+%341 = OpFConvert  %319  %340
+%342 = OpCompositeConstruct  %337  %339 %341
+%343 = OpCompositeExtract  %319  %342 0
+%344 = OpFConvert  %7  %343
+%345 = OpCompositeExtract  %319  %342 1
+%346 = OpFConvert  %7  %345
+%347 = OpCompositeConstruct  %10  %344 %346
+%348 = OpAccessChain  %118  %54 %120
+OpStore %348 %347
+%349 = OpAccessChain  %134  %49 %135
+%350 = OpLoad  %11  %349
+%352 = OpCompositeExtract  %8  %350 0
+%353 = OpFConvert  %325  %352
+%354 = OpCompositeExtract  %8  %350 1
+%355 = OpFConvert  %325  %354
+%356 = OpCompositeConstruct  %351  %353 %355
+%357 = OpCompositeExtract  %325  %356 0
+%358 = OpFConvert  %8  %357
+%359 = OpCompositeExtract  %325  %356 1
+%360 = OpFConvert  %8  %359
+%361 = OpCompositeConstruct  %11  %358 %360
+%362 = OpAccessChain  %133  %54 %135
+OpStore %362 %361
+%363 = OpAccessChain  %149  %49 %150
+%364 = OpLoad  %12  %363
+%366 = OpCompositeExtract  %9  %364 0
+%367 = OpFConvert  %331  %366
+%368 = OpCompositeExtract  %9  %364 1
+%369 = OpFConvert  %331  %368
+%370 = OpCompositeConstruct  %365  %367 %369
+%371 = OpCompositeExtract  %331  %370 0
+%372 = OpFConvert  %9  %371
+%373 = OpCompositeExtract  %331  %370 1
+%374 = OpFConvert  %9  %373
+%375 = OpCompositeConstruct  %12  %372 %374
+%376 = OpAccessChain  %148  %54 %150
+OpStore %376 %375
+%377 = OpAccessChain  %164  %49 %165
+%378 = OpLoad  %13  %377
+%380 = OpCompositeExtract  %7  %378 0
+%381 = OpFConvert  %319  %380
+%382 = OpCompositeExtract  %7  %378 1
+%383 = OpFConvert  %319  %382
+%384 = OpCompositeExtract  %7  %378 2
+%385 = OpFConvert  %319  %384
+%386 = OpCompositeConstruct  %379  %381 %383 %385
+%387 = OpCompositeExtract  %319  %386 0
+%388 = OpFConvert  %7  %387
+%389 = OpCompositeExtract  %319  %386 1
+%390 = OpFConvert  %7  %389
+%391 = OpCompositeExtract  %319  %386 2
+%392 = OpFConvert  %7  %391
+%393 = OpCompositeConstruct  %13  %388 %390 %392
+%394 = OpAccessChain  %163  %54 %165
+OpStore %394 %393
+%395 = OpAccessChain  %182  %49 %183
+%396 = OpLoad  %14  %395
+%398 = OpCompositeExtract  %8  %396 0
+%399 = OpFConvert  %325  %398
+%400 = OpCompositeExtract  %8  %396 1
+%401 = OpFConvert  %325  %400
+%402 = OpCompositeExtract  %8  %396 2
+%403 = OpFConvert  %325  %402
+%404 = OpCompositeConstruct  %397  %399 %401 %403
+%405 = OpCompositeExtract  %325  %404 0
+%406 = OpFConvert  %8  %405
+%407 = OpCompositeExtract  %325  %404 1
+%408 = OpFConvert  %8  %407
+%409 = OpCompositeExtract  %325  %404 2
+%410 = OpFConvert  %8  %409
+%411 = OpCompositeConstruct  %14  %406 %408 %410
+%412 = OpAccessChain  %181  %54 %183
+OpStore %412 %411
+%413 = OpAccessChain  %200  %49 %201
+%414 = OpLoad  %15  %413
+%416 = OpCompositeExtract  %9  %414 0
+%417 = OpFConvert  %331  %416
+%418 = OpCompositeExtract  %9  %414 1
+%419 = OpFConvert  %331  %418
+%420 = OpCompositeExtract  %9  %414 2
+%421 = OpFConvert  %331  %420
+%422 = OpCompositeConstruct  %415  %417 %419 %421
+%423 = OpCompositeExtract  %331  %422 0
+%424 = OpFConvert  %9  %423
+%425 = OpCompositeExtract  %331  %422 1
+%426 = OpFConvert  %9  %425
+%427 = OpCompositeExtract  %331  %422 2
+%428 = OpFConvert  %9  %427
+%429 = OpCompositeConstruct  %15  %424 %426 %428
+%430 = OpAccessChain  %199  %54 %201
+OpStore %430 %429
+%431 = OpAccessChain  %218  %49 %219
+%432 = OpLoad  %16  %431
+%434 = OpCompositeExtract  %7  %432 0
+%435 = OpFConvert  %319  %434
+%436 = OpCompositeExtract  %7  %432 1
+%437 = OpFConvert  %319  %436
+%438 = OpCompositeExtract  %7  %432 2
+%439 = OpFConvert  %319  %438
+%440 = OpCompositeExtract  %7  %432 3
+%441 = OpFConvert  %319  %440
+%442 = OpCompositeConstruct  %433  %435 %437 %439 %441
+%443 = OpCompositeExtract  %319  %442 0
+%444 = OpFConvert  %7  %443
+%445 = OpCompositeExtract  %319  %442 1
+%446 = OpFConvert  %7  %445
+%447 = OpCompositeExtract  %319  %442 2
+%448 = OpFConvert  %7  %447
+%449 = OpCompositeExtract  %319  %442 3
+%450 = OpFConvert  %7  %449
+%451 = OpCompositeConstruct  %16  %444 %446 %448 %450
+%452 = OpAccessChain  %217  %54 %219
+OpStore %452 %451
+%453 = OpAccessChain  %239  %49 %240
+%454 = OpLoad  %17  %453
+%456 = OpCompositeExtract  %8  %454 0
+%457 = OpFConvert  %325  %456
+%458 = OpCompositeExtract  %8  %454 1
+%459 = OpFConvert  %325  %458
+%460 = OpCompositeExtract  %8  %454 2
+%461 = OpFConvert  %325  %460
+%462 = OpCompositeExtract  %8  %454 3
+%463 = OpFConvert  %325  %462
+%464 = OpCompositeConstruct  %455  %457 %459 %461 %463
+%465 = OpCompositeExtract  %325  %464 0
+%466 = OpFConvert  %8  %465
+%467 = OpCompositeExtract  %325  %464 1
+%468 = OpFConvert  %8  %467
+%469 = OpCompositeExtract  %325  %464 2
+%470 = OpFConvert  %8  %469
+%471 = OpCompositeExtract  %325  %464 3
+%472 = OpFConvert  %8  %471
+%473 = OpCompositeConstruct  %17  %466 %468 %470 %472
+%474 = OpAccessChain  %238  %54 %240
+OpStore %474 %473
+%475 = OpAccessChain  %260  %49 %261
+%476 = OpLoad  %18  %475
+%478 = OpCompositeExtract  %9  %476 0
+%479 = OpFConvert  %331  %478
+%480 = OpCompositeExtract  %9  %476 1
+%481 = OpFConvert  %331  %480
+%482 = OpCompositeExtract  %9  %476 2
+%483 = OpFConvert  %331  %482
+%484 = OpCompositeExtract  %9  %476 3
+%485 = OpFConvert  %331  %484
+%486 = OpCompositeConstruct  %477  %479 %481 %483 %485
+%487 = OpCompositeExtract  %331  %486 0
+%488 = OpFConvert  %9  %487
+%489 = OpCompositeExtract  %331  %486 1
+%490 = OpFConvert  %9  %489
+%491 = OpCompositeExtract  %331  %486 2
+%492 = OpFConvert  %9  %491
+%493 = OpCompositeExtract  %331  %486 3
+%494 = OpFConvert  %9  %493
+%495 = OpCompositeConstruct  %18  %488 %490 %492 %494
+%496 = OpAccessChain  %259  %54 %261
+OpStore %496 %495
+%497 = OpLoad  %3  %58
+OpReturnValue %497
+OpFunctionEnd
+%499 = OpFunction  %2  None %500
+%498 = OpLabel
+%501 = OpAccessChain  %47  %28 %48
+%502 = OpAccessChain  %50  %31 %48
+%503 = OpAccessChain  %52  %34 %48
+%504 = OpAccessChain  %50  %37 %48
+%505 = OpAccessChain  %52  %40 %48
+OpBranch %507
+%507 = OpLabel
+%508 = OpFunctionCall  %3  %45 %506
+%510 = OpAccessChain  %84  %504 %509
+OpStore %510 %508
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/image.spvasm b/naga/tests/out/spv/image.spvasm
index 7a34bed86e0..94d8521267e 100644
--- a/naga/tests/out/spv/image.spvasm
+++ b/naga/tests/out/spv/image.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 546
+; Bound: 545
 OpCapability Shader
 OpCapability Image1D
 OpCapability Sampled1D
@@ -10,19 +10,19 @@ OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %99 "main" %96
-OpEntryPoint GLCompute %190 "depth_load" %188
-OpEntryPoint Vertex %210 "queries" %208
-OpEntryPoint Vertex %262 "levels_queries" %261
-OpEntryPoint Fragment %293 "texture_sample" %292
-OpEntryPoint Fragment %439 "texture_sample_comparison" %437
-OpEntryPoint Fragment %495 "gather" %494
-OpEntryPoint Fragment %529 "depth_no_comparison" %528
+OpEntryPoint GLCompute %189 "depth_load" %187
+OpEntryPoint Vertex %209 "queries" %207
+OpEntryPoint Vertex %261 "levels_queries" %260
+OpEntryPoint Fragment %292 "texture_sample" %291
+OpEntryPoint Fragment %438 "texture_sample_comparison" %436
+OpEntryPoint Fragment %494 "gather" %493
+OpEntryPoint Fragment %528 "depth_no_comparison" %527
 OpExecutionMode %99 LocalSize 16 1 1
-OpExecutionMode %190 LocalSize 16 1 1
-OpExecutionMode %293 OriginUpperLeft
-OpExecutionMode %439 OriginUpperLeft
-OpExecutionMode %495 OriginUpperLeft
-OpExecutionMode %529 OriginUpperLeft
+OpExecutionMode %189 LocalSize 16 1 1
+OpExecutionMode %292 OriginUpperLeft
+OpExecutionMode %438 OriginUpperLeft
+OpExecutionMode %494 OriginUpperLeft
+OpExecutionMode %528 OriginUpperLeft
 OpName %31 "image_mipmapped_src"
 OpName %33 "image_multisampled_src"
 OpName %35 "image_depth_multisampled_src"
@@ -50,16 +50,16 @@ OpName %76 "lhs"
 OpName %77 "rhs"
 OpName %96 "local_id"
 OpName %99 "main"
-OpName %188 "local_id"
-OpName %190 "depth_load"
-OpName %210 "queries"
-OpName %262 "levels_queries"
-OpName %293 "texture_sample"
-OpName %306 "a"
-OpName %439 "texture_sample_comparison"
-OpName %444 "a"
-OpName %495 "gather"
-OpName %529 "depth_no_comparison"
+OpName %187 "local_id"
+OpName %189 "depth_load"
+OpName %209 "queries"
+OpName %261 "levels_queries"
+OpName %292 "texture_sample"
+OpName %305 "a"
+OpName %438 "texture_sample_comparison"
+OpName %443 "a"
+OpName %494 "gather"
+OpName %528 "depth_no_comparison"
 OpDecorate %31 DescriptorSet 0
 OpDecorate %31 Binding 0
 OpDecorate %33 DescriptorSet 0
@@ -108,13 +108,13 @@ OpDecorate %70 Binding 3
 OpDecorate %72 DescriptorSet 1
 OpDecorate %72 Binding 4
 OpDecorate %96 BuiltIn LocalInvocationId
-OpDecorate %188 BuiltIn LocalInvocationId
-OpDecorate %208 BuiltIn Position
-OpDecorate %261 BuiltIn Position
-OpDecorate %292 Location 0
-OpDecorate %437 Location 0
-OpDecorate %494 Location 0
-OpDecorate %528 Location 0
+OpDecorate %187 BuiltIn LocalInvocationId
+OpDecorate %207 BuiltIn Position
+OpDecorate %260 BuiltIn Position
+OpDecorate %291 Location 0
+OpDecorate %436 Location 0
+OpDecorate %493 Location 0
+OpDecorate %527 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeInt 32 0
 %3 = OpTypeImage %4 2D 0 0 0 1 Unknown
@@ -206,40 +206,40 @@ OpDecorate %528 Location 0
 %111 = OpTypeVector %4 2
 %119 = OpTypeVector %4 4
 %130 = OpTypeVector %14 3
-%188 = OpVariable  %97  Input
-%209 = OpTypePointer Output %23
-%208 = OpVariable  %209  Output
-%219 = OpConstant  %4  0
-%261 = OpVariable  %209  Output
-%292 = OpVariable  %209  Output
-%299 = OpConstant  %7  0.5
-%300 = OpTypeVector %7 2
-%301 = OpConstantComposite  %300  %299 %299
-%302 = OpTypeVector %7 3
-%303 = OpConstantComposite  %302  %299 %299 %299
-%304 = OpConstant  %7  2.3
-%305 = OpConstant  %7  2.0
-%307 = OpTypePointer Function %23
-%308 = OpConstantNull  %23
-%311 = OpTypeSampledImage %15
-%316 = OpTypeSampledImage %16
-%337 = OpTypeSampledImage %18
-%398 = OpTypeSampledImage %20
-%438 = OpTypePointer Output %7
-%437 = OpVariable  %438  Output
-%445 = OpTypePointer Function %7
-%446 = OpConstantNull  %7
-%448 = OpTypeSampledImage %25
-%453 = OpTypeSampledImage %26
-%466 = OpTypeSampledImage %27
-%473 = OpConstant  %7  0.0
-%494 = OpVariable  %209  Output
-%505 = OpConstant  %4  1
-%508 = OpConstant  %4  3
-%513 = OpTypeSampledImage %3
-%516 = OpTypeVector %14 4
-%517 = OpTypeSampledImage %17
-%528 = OpVariable  %209  Output
+%187 = OpVariable  %97  Input
+%208 = OpTypePointer Output %23
+%207 = OpVariable  %208  Output
+%218 = OpConstant  %4  0
+%260 = OpVariable  %208  Output
+%291 = OpVariable  %208  Output
+%298 = OpConstant  %7  0.5
+%299 = OpTypeVector %7 2
+%300 = OpConstantComposite  %299  %298 %298
+%301 = OpTypeVector %7 3
+%302 = OpConstantComposite  %301  %298 %298 %298
+%303 = OpConstant  %7  2.3
+%304 = OpConstant  %7  2.0
+%306 = OpTypePointer Function %23
+%307 = OpConstantNull  %23
+%310 = OpTypeSampledImage %15
+%315 = OpTypeSampledImage %16
+%336 = OpTypeSampledImage %18
+%397 = OpTypeSampledImage %20
+%437 = OpTypePointer Output %7
+%436 = OpVariable  %437  Output
+%444 = OpTypePointer Function %7
+%445 = OpConstantNull  %7
+%447 = OpTypeSampledImage %25
+%452 = OpTypeSampledImage %26
+%465 = OpTypeSampledImage %27
+%472 = OpConstant  %7  0.0
+%493 = OpVariable  %208  Output
+%504 = OpConstant  %4  1
+%507 = OpConstant  %4  3
+%512 = OpTypeSampledImage %3
+%515 = OpTypeVector %14 4
+%516 = OpTypeSampledImage %17
+%527 = OpVariable  %208  Output
 %74 = OpFunction  %13  None %75
 %76 = OpFunctionParameter  %13
 %77 = OpFunctionParameter  %13
@@ -322,404 +322,404 @@ OpBranch %110
 %169 = OpCompositeConstruct  %12  %162 %168
 %170 = OpImageFetch  %119  %104 %169 Lod %167
 %171 = OpCompositeExtract  %4  %98 0
-%173 = OpCompositeExtract  %4  %98 2
-%174 = OpBitcast  %14  %173
-%175 = OpImageFetch  %119  %105 %171 Lod %174
-%176 = OpCompositeExtract  %14  %116 0
-%177 = OpIAdd  %119  %120 %123
-%178 = OpIAdd  %119  %177 %124
-%179 = OpIAdd  %119  %178 %132
-%180 = OpIAdd  %119  %179 %139
-OpImageWrite %106 %176 %180
-%181 = OpCompositeExtract  %14  %116 0
-%182 = OpBitcast  %4  %181
-%183 = OpIAdd  %119  %148 %152
-%184 = OpIAdd  %119  %183 %154
-%185 = OpIAdd  %119  %184 %161
-%186 = OpIAdd  %119  %185 %170
-OpImageWrite %106 %182 %186
+%172 = OpCompositeExtract  %4  %98 2
+%173 = OpBitcast  %14  %172
+%174 = OpImageFetch  %119  %105 %171 Lod %173
+%175 = OpCompositeExtract  %14  %116 0
+%176 = OpIAdd  %119  %120 %123
+%177 = OpIAdd  %119  %176 %124
+%178 = OpIAdd  %119  %177 %132
+%179 = OpIAdd  %119  %178 %139
+OpImageWrite %106 %175 %179
+%180 = OpCompositeExtract  %14  %116 0
+%181 = OpBitcast  %4  %180
+%182 = OpIAdd  %119  %148 %152
+%183 = OpIAdd  %119  %182 %154
+%184 = OpIAdd  %119  %183 %161
+%185 = OpIAdd  %119  %184 %170
+OpImageWrite %106 %181 %185
 OpReturn
 OpFunctionEnd
-%190 = OpFunction  %2  None %100
-%187 = OpLabel
-%189 = OpLoad  %12  %188
-%191 = OpLoad  %6  %35
-%192 = OpLoad  %8  %37
-%193 = OpLoad  %10  %45
-OpBranch %194
-%194 = OpLabel
-%195 = OpImageQuerySize  %111  %192
-%196 = OpVectorShuffle  %111  %189 %189 0 1
-%197 = OpIMul  %111  %195 %196
-%198 = OpBitcast  %13  %197
-%199 = OpFunctionCall  %13  %74 %198 %109
-%200 = OpCompositeExtract  %4  %189 2
-%201 = OpBitcast  %14  %200
-%202 = OpImageFetch  %23  %191 %199 Sample %201
-%203 = OpCompositeExtract  %7  %202 0
-%204 = OpCompositeExtract  %14  %199 0
-%205 = OpConvertFToU  %4  %203
-%206 = OpCompositeConstruct  %119  %205 %205 %205 %205
-OpImageWrite %193 %204 %206
+%189 = OpFunction  %2  None %100
+%186 = OpLabel
+%188 = OpLoad  %12  %187
+%190 = OpLoad  %6  %35
+%191 = OpLoad  %8  %37
+%192 = OpLoad  %10  %45
+OpBranch %193
+%193 = OpLabel
+%194 = OpImageQuerySize  %111  %191
+%195 = OpVectorShuffle  %111  %188 %188 0 1
+%196 = OpIMul  %111  %194 %195
+%197 = OpBitcast  %13  %196
+%198 = OpFunctionCall  %13  %74 %197 %109
+%199 = OpCompositeExtract  %4  %188 2
+%200 = OpBitcast  %14  %199
+%201 = OpImageFetch  %23  %190 %198 Sample %200
+%202 = OpCompositeExtract  %7  %201 0
+%203 = OpCompositeExtract  %14  %198 0
+%204 = OpConvertFToU  %4  %202
+%205 = OpCompositeConstruct  %119  %204 %204 %204 %204
+OpImageWrite %192 %203 %205
 OpReturn
 OpFunctionEnd
-%210 = OpFunction  %2  None %100
-%207 = OpLabel
-%211 = OpLoad  %15  %47
-%212 = OpLoad  %16  %49
-%213 = OpLoad  %18  %54
-%214 = OpLoad  %19  %56
-%215 = OpLoad  %20  %58
-%216 = OpLoad  %21  %60
-%217 = OpLoad  %22  %62
-OpBranch %218
-%218 = OpLabel
-%220 = OpImageQuerySizeLod  %4  %211 %219
-%221 = OpBitcast  %14  %220
-%222 = OpImageQuerySizeLod  %4  %211 %221
-%223 = OpImageQuerySizeLod  %111  %212 %219
-%224 = OpImageQuerySizeLod  %111  %212 %29
-%225 = OpImageQuerySizeLod  %12  %213 %219
-%226 = OpVectorShuffle  %111  %225 %225 0 1
-%227 = OpImageQuerySizeLod  %12  %213 %29
-%228 = OpVectorShuffle  %111  %227 %227 0 1
-%229 = OpImageQuerySizeLod  %111  %214 %219
-%230 = OpImageQuerySizeLod  %111  %214 %29
-%231 = OpImageQuerySizeLod  %12  %215 %219
-%232 = OpVectorShuffle  %111  %231 %231 0 0
-%233 = OpImageQuerySizeLod  %12  %215 %29
-%234 = OpVectorShuffle  %111  %233 %233 0 0
-%235 = OpImageQuerySizeLod  %12  %216 %219
-%236 = OpImageQuerySizeLod  %12  %216 %29
-%237 = OpImageQuerySize  %111  %217
-%238 = OpCompositeExtract  %4  %223 1
-%239 = OpIAdd  %4  %220 %238
-%240 = OpCompositeExtract  %4  %224 1
-%241 = OpIAdd  %4  %239 %240
-%242 = OpCompositeExtract  %4  %226 1
-%243 = OpIAdd  %4  %241 %242
-%244 = OpCompositeExtract  %4  %228 1
-%245 = OpIAdd  %4  %243 %244
-%246 = OpCompositeExtract  %4  %229 1
-%247 = OpIAdd  %4  %245 %246
-%248 = OpCompositeExtract  %4  %230 1
-%249 = OpIAdd  %4  %247 %248
-%250 = OpCompositeExtract  %4  %232 1
-%251 = OpIAdd  %4  %249 %250
-%252 = OpCompositeExtract  %4  %234 1
-%253 = OpIAdd  %4  %251 %252
-%254 = OpCompositeExtract  %4  %235 2
-%255 = OpIAdd  %4  %253 %254
-%256 = OpCompositeExtract  %4  %236 2
-%257 = OpIAdd  %4  %255 %256
-%258 = OpConvertUToF  %7  %257
-%259 = OpCompositeConstruct  %23  %258 %258 %258 %258
-OpStore %208 %259
+%209 = OpFunction  %2  None %100
+%206 = OpLabel
+%210 = OpLoad  %15  %47
+%211 = OpLoad  %16  %49
+%212 = OpLoad  %18  %54
+%213 = OpLoad  %19  %56
+%214 = OpLoad  %20  %58
+%215 = OpLoad  %21  %60
+%216 = OpLoad  %22  %62
+OpBranch %217
+%217 = OpLabel
+%219 = OpImageQuerySizeLod  %4  %210 %218
+%220 = OpBitcast  %14  %219
+%221 = OpImageQuerySizeLod  %4  %210 %220
+%222 = OpImageQuerySizeLod  %111  %211 %218
+%223 = OpImageQuerySizeLod  %111  %211 %29
+%224 = OpImageQuerySizeLod  %12  %212 %218
+%225 = OpVectorShuffle  %111  %224 %224 0 1
+%226 = OpImageQuerySizeLod  %12  %212 %29
+%227 = OpVectorShuffle  %111  %226 %226 0 1
+%228 = OpImageQuerySizeLod  %111  %213 %218
+%229 = OpImageQuerySizeLod  %111  %213 %29
+%230 = OpImageQuerySizeLod  %12  %214 %218
+%231 = OpVectorShuffle  %111  %230 %230 0 0
+%232 = OpImageQuerySizeLod  %12  %214 %29
+%233 = OpVectorShuffle  %111  %232 %232 0 0
+%234 = OpImageQuerySizeLod  %12  %215 %218
+%235 = OpImageQuerySizeLod  %12  %215 %29
+%236 = OpImageQuerySize  %111  %216
+%237 = OpCompositeExtract  %4  %222 1
+%238 = OpIAdd  %4  %219 %237
+%239 = OpCompositeExtract  %4  %223 1
+%240 = OpIAdd  %4  %238 %239
+%241 = OpCompositeExtract  %4  %225 1
+%242 = OpIAdd  %4  %240 %241
+%243 = OpCompositeExtract  %4  %227 1
+%244 = OpIAdd  %4  %242 %243
+%245 = OpCompositeExtract  %4  %228 1
+%246 = OpIAdd  %4  %244 %245
+%247 = OpCompositeExtract  %4  %229 1
+%248 = OpIAdd  %4  %246 %247
+%249 = OpCompositeExtract  %4  %231 1
+%250 = OpIAdd  %4  %248 %249
+%251 = OpCompositeExtract  %4  %233 1
+%252 = OpIAdd  %4  %250 %251
+%253 = OpCompositeExtract  %4  %234 2
+%254 = OpIAdd  %4  %252 %253
+%255 = OpCompositeExtract  %4  %235 2
+%256 = OpIAdd  %4  %254 %255
+%257 = OpConvertUToF  %7  %256
+%258 = OpCompositeConstruct  %23  %257 %257 %257 %257
+OpStore %207 %258
 OpReturn
 OpFunctionEnd
-%262 = OpFunction  %2  None %100
-%260 = OpLabel
-%263 = OpLoad  %16  %49
-%264 = OpLoad  %18  %54
-%265 = OpLoad  %19  %56
-%266 = OpLoad  %20  %58
-%267 = OpLoad  %21  %60
-%268 = OpLoad  %22  %62
-OpBranch %269
-%269 = OpLabel
-%270 = OpImageQueryLevels  %4  %263
-%271 = OpImageQuerySizeLod  %12  %264 %219
-%272 = OpCompositeExtract  %4  %271 2
-%273 = OpImageQueryLevels  %4  %264
-%274 = OpImageQuerySizeLod  %12  %264 %219
-%275 = OpCompositeExtract  %4  %274 2
+%261 = OpFunction  %2  None %100
+%259 = OpLabel
+%262 = OpLoad  %16  %49
+%263 = OpLoad  %18  %54
+%264 = OpLoad  %19  %56
+%265 = OpLoad  %20  %58
+%266 = OpLoad  %21  %60
+%267 = OpLoad  %22  %62
+OpBranch %268
+%268 = OpLabel
+%269 = OpImageQueryLevels  %4  %262
+%270 = OpImageQuerySizeLod  %12  %263 %218
+%271 = OpCompositeExtract  %4  %270 2
+%272 = OpImageQueryLevels  %4  %263
+%273 = OpImageQuerySizeLod  %12  %263 %218
+%274 = OpCompositeExtract  %4  %273 2
+%275 = OpImageQueryLevels  %4  %264
 %276 = OpImageQueryLevels  %4  %265
-%277 = OpImageQueryLevels  %4  %266
-%278 = OpImageQuerySizeLod  %12  %266 %219
-%279 = OpCompositeExtract  %4  %278 2
-%280 = OpImageQueryLevels  %4  %267
-%281 = OpImageQuerySamples  %4  %268
-%282 = OpIAdd  %4  %272 %279
-%283 = OpIAdd  %4  %282 %281
-%284 = OpIAdd  %4  %283 %270
-%285 = OpIAdd  %4  %284 %273
-%286 = OpIAdd  %4  %285 %280
+%277 = OpImageQuerySizeLod  %12  %265 %218
+%278 = OpCompositeExtract  %4  %277 2
+%279 = OpImageQueryLevels  %4  %266
+%280 = OpImageQuerySamples  %4  %267
+%281 = OpIAdd  %4  %271 %278
+%282 = OpIAdd  %4  %281 %280
+%283 = OpIAdd  %4  %282 %269
+%284 = OpIAdd  %4  %283 %272
+%285 = OpIAdd  %4  %284 %279
+%286 = OpIAdd  %4  %285 %275
 %287 = OpIAdd  %4  %286 %276
-%288 = OpIAdd  %4  %287 %277
-%289 = OpConvertUToF  %7  %288
-%290 = OpCompositeConstruct  %23  %289 %289 %289 %289
-OpStore %261 %290
+%288 = OpConvertUToF  %7  %287
+%289 = OpCompositeConstruct  %23  %288 %288 %288 %288
+OpStore %260 %289
 OpReturn
 OpFunctionEnd
-%293 = OpFunction  %2  None %100
-%291 = OpLabel
-%306 = OpVariable  %307  Function %308
-%294 = OpLoad  %15  %47
-%295 = OpLoad  %16  %49
-%296 = OpLoad  %18  %54
-%297 = OpLoad  %20  %58
-%298 = OpLoad  %24  %64
-OpBranch %309
-%309 = OpLabel
-%310 = OpCompositeExtract  %7  %301 0
-%312 = OpSampledImage  %311  %294 %298
-%313 = OpImageSampleImplicitLod  %23  %312 %310
-%314 = OpLoad  %23  %306
-%315 = OpFAdd  %23  %314 %313
-OpStore %306 %315
-%317 = OpSampledImage  %316  %295 %298
-%318 = OpImageSampleImplicitLod  %23  %317 %301
-%319 = OpLoad  %23  %306
-%320 = OpFAdd  %23  %319 %318
-OpStore %306 %320
-%321 = OpSampledImage  %316  %295 %298
-%322 = OpImageSampleImplicitLod  %23  %321 %301 ConstOffset %30
-%323 = OpLoad  %23  %306
-%324 = OpFAdd  %23  %323 %322
-OpStore %306 %324
-%325 = OpSampledImage  %316  %295 %298
-%326 = OpImageSampleExplicitLod  %23  %325 %301 Lod %304
-%327 = OpLoad  %23  %306
-%328 = OpFAdd  %23  %327 %326
-OpStore %306 %328
-%329 = OpSampledImage  %316  %295 %298
-%330 = OpImageSampleExplicitLod  %23  %329 %301 Lod|ConstOffset %304 %30
-%331 = OpLoad  %23  %306
-%332 = OpFAdd  %23  %331 %330
-OpStore %306 %332
-%333 = OpSampledImage  %316  %295 %298
-%334 = OpImageSampleImplicitLod  %23  %333 %301 Bias|ConstOffset %305 %30
-%335 = OpLoad  %23  %306
-%336 = OpFAdd  %23  %335 %334
-OpStore %306 %336
-%338 = OpConvertUToF  %7  %219
-%339 = OpCompositeConstruct  %302  %301 %338
-%340 = OpSampledImage  %337  %296 %298
-%341 = OpImageSampleImplicitLod  %23  %340 %339
-%342 = OpLoad  %23  %306
-%343 = OpFAdd  %23  %342 %341
-OpStore %306 %343
-%344 = OpConvertUToF  %7  %219
-%345 = OpCompositeConstruct  %302  %301 %344
-%346 = OpSampledImage  %337  %296 %298
-%347 = OpImageSampleImplicitLod  %23  %346 %345 ConstOffset %30
-%348 = OpLoad  %23  %306
-%349 = OpFAdd  %23  %348 %347
-OpStore %306 %349
-%350 = OpConvertUToF  %7  %219
-%351 = OpCompositeConstruct  %302  %301 %350
-%352 = OpSampledImage  %337  %296 %298
-%353 = OpImageSampleExplicitLod  %23  %352 %351 Lod %304
-%354 = OpLoad  %23  %306
-%355 = OpFAdd  %23  %354 %353
-OpStore %306 %355
-%356 = OpConvertUToF  %7  %219
-%357 = OpCompositeConstruct  %302  %301 %356
-%358 = OpSampledImage  %337  %296 %298
-%359 = OpImageSampleExplicitLod  %23  %358 %357 Lod|ConstOffset %304 %30
-%360 = OpLoad  %23  %306
-%361 = OpFAdd  %23  %360 %359
-OpStore %306 %361
-%362 = OpConvertUToF  %7  %219
-%363 = OpCompositeConstruct  %302  %301 %362
-%364 = OpSampledImage  %337  %296 %298
-%365 = OpImageSampleImplicitLod  %23  %364 %363 Bias|ConstOffset %305 %30
-%366 = OpLoad  %23  %306
-%367 = OpFAdd  %23  %366 %365
-OpStore %306 %367
-%368 = OpConvertSToF  %7  %81
-%369 = OpCompositeConstruct  %302  %301 %368
-%370 = OpSampledImage  %337  %296 %298
-%371 = OpImageSampleImplicitLod  %23  %370 %369
-%372 = OpLoad  %23  %306
-%373 = OpFAdd  %23  %372 %371
-OpStore %306 %373
-%374 = OpConvertSToF  %7  %81
-%375 = OpCompositeConstruct  %302  %301 %374
-%376 = OpSampledImage  %337  %296 %298
-%377 = OpImageSampleImplicitLod  %23  %376 %375 ConstOffset %30
-%378 = OpLoad  %23  %306
-%379 = OpFAdd  %23  %378 %377
-OpStore %306 %379
-%380 = OpConvertSToF  %7  %81
-%381 = OpCompositeConstruct  %302  %301 %380
-%382 = OpSampledImage  %337  %296 %298
-%383 = OpImageSampleExplicitLod  %23  %382 %381 Lod %304
-%384 = OpLoad  %23  %306
-%385 = OpFAdd  %23  %384 %383
-OpStore %306 %385
-%386 = OpConvertSToF  %7  %81
-%387 = OpCompositeConstruct  %302  %301 %386
-%388 = OpSampledImage  %337  %296 %298
-%389 = OpImageSampleExplicitLod  %23  %388 %387 Lod|ConstOffset %304 %30
-%390 = OpLoad  %23  %306
-%391 = OpFAdd  %23  %390 %389
-OpStore %306 %391
-%392 = OpConvertSToF  %7  %81
-%393 = OpCompositeConstruct  %302  %301 %392
-%394 = OpSampledImage  %337  %296 %298
-%395 = OpImageSampleImplicitLod  %23  %394 %393 Bias|ConstOffset %305 %30
-%396 = OpLoad  %23  %306
-%397 = OpFAdd  %23  %396 %395
-OpStore %306 %397
-%399 = OpConvertUToF  %7  %219
-%400 = OpCompositeConstruct  %23  %303 %399
-%401 = OpSampledImage  %398  %297 %298
-%402 = OpImageSampleImplicitLod  %23  %401 %400
-%403 = OpLoad  %23  %306
-%404 = OpFAdd  %23  %403 %402
-OpStore %306 %404
-%405 = OpConvertUToF  %7  %219
-%406 = OpCompositeConstruct  %23  %303 %405
-%407 = OpSampledImage  %398  %297 %298
-%408 = OpImageSampleExplicitLod  %23  %407 %406 Lod %304
-%409 = OpLoad  %23  %306
-%410 = OpFAdd  %23  %409 %408
-OpStore %306 %410
-%411 = OpConvertUToF  %7  %219
-%412 = OpCompositeConstruct  %23  %303 %411
-%413 = OpSampledImage  %398  %297 %298
-%414 = OpImageSampleImplicitLod  %23  %413 %412 Bias %305
-%415 = OpLoad  %23  %306
-%416 = OpFAdd  %23  %415 %414
-OpStore %306 %416
-%417 = OpConvertSToF  %7  %81
-%418 = OpCompositeConstruct  %23  %303 %417
-%419 = OpSampledImage  %398  %297 %298
-%420 = OpImageSampleImplicitLod  %23  %419 %418
-%421 = OpLoad  %23  %306
-%422 = OpFAdd  %23  %421 %420
-OpStore %306 %422
-%423 = OpConvertSToF  %7  %81
-%424 = OpCompositeConstruct  %23  %303 %423
-%425 = OpSampledImage  %398  %297 %298
-%426 = OpImageSampleExplicitLod  %23  %425 %424 Lod %304
-%427 = OpLoad  %23  %306
-%428 = OpFAdd  %23  %427 %426
-OpStore %306 %428
-%429 = OpConvertSToF  %7  %81
-%430 = OpCompositeConstruct  %23  %303 %429
-%431 = OpSampledImage  %398  %297 %298
-%432 = OpImageSampleImplicitLod  %23  %431 %430 Bias %305
-%433 = OpLoad  %23  %306
-%434 = OpFAdd  %23  %433 %432
-OpStore %306 %434
-%435 = OpLoad  %23  %306
-OpStore %292 %435
+%292 = OpFunction  %2  None %100
+%290 = OpLabel
+%305 = OpVariable  %306  Function %307
+%293 = OpLoad  %15  %47
+%294 = OpLoad  %16  %49
+%295 = OpLoad  %18  %54
+%296 = OpLoad  %20  %58
+%297 = OpLoad  %24  %64
+OpBranch %308
+%308 = OpLabel
+%309 = OpCompositeExtract  %7  %300 0
+%311 = OpSampledImage  %310  %293 %297
+%312 = OpImageSampleImplicitLod  %23  %311 %309
+%313 = OpLoad  %23  %305
+%314 = OpFAdd  %23  %313 %312
+OpStore %305 %314
+%316 = OpSampledImage  %315  %294 %297
+%317 = OpImageSampleImplicitLod  %23  %316 %300
+%318 = OpLoad  %23  %305
+%319 = OpFAdd  %23  %318 %317
+OpStore %305 %319
+%320 = OpSampledImage  %315  %294 %297
+%321 = OpImageSampleImplicitLod  %23  %320 %300 ConstOffset %30
+%322 = OpLoad  %23  %305
+%323 = OpFAdd  %23  %322 %321
+OpStore %305 %323
+%324 = OpSampledImage  %315  %294 %297
+%325 = OpImageSampleExplicitLod  %23  %324 %300 Lod %303
+%326 = OpLoad  %23  %305
+%327 = OpFAdd  %23  %326 %325
+OpStore %305 %327
+%328 = OpSampledImage  %315  %294 %297
+%329 = OpImageSampleExplicitLod  %23  %328 %300 Lod|ConstOffset %303 %30
+%330 = OpLoad  %23  %305
+%331 = OpFAdd  %23  %330 %329
+OpStore %305 %331
+%332 = OpSampledImage  %315  %294 %297
+%333 = OpImageSampleImplicitLod  %23  %332 %300 Bias|ConstOffset %304 %30
+%334 = OpLoad  %23  %305
+%335 = OpFAdd  %23  %334 %333
+OpStore %305 %335
+%337 = OpConvertUToF  %7  %218
+%338 = OpCompositeConstruct  %301  %300 %337
+%339 = OpSampledImage  %336  %295 %297
+%340 = OpImageSampleImplicitLod  %23  %339 %338
+%341 = OpLoad  %23  %305
+%342 = OpFAdd  %23  %341 %340
+OpStore %305 %342
+%343 = OpConvertUToF  %7  %218
+%344 = OpCompositeConstruct  %301  %300 %343
+%345 = OpSampledImage  %336  %295 %297
+%346 = OpImageSampleImplicitLod  %23  %345 %344 ConstOffset %30
+%347 = OpLoad  %23  %305
+%348 = OpFAdd  %23  %347 %346
+OpStore %305 %348
+%349 = OpConvertUToF  %7  %218
+%350 = OpCompositeConstruct  %301  %300 %349
+%351 = OpSampledImage  %336  %295 %297
+%352 = OpImageSampleExplicitLod  %23  %351 %350 Lod %303
+%353 = OpLoad  %23  %305
+%354 = OpFAdd  %23  %353 %352
+OpStore %305 %354
+%355 = OpConvertUToF  %7  %218
+%356 = OpCompositeConstruct  %301  %300 %355
+%357 = OpSampledImage  %336  %295 %297
+%358 = OpImageSampleExplicitLod  %23  %357 %356 Lod|ConstOffset %303 %30
+%359 = OpLoad  %23  %305
+%360 = OpFAdd  %23  %359 %358
+OpStore %305 %360
+%361 = OpConvertUToF  %7  %218
+%362 = OpCompositeConstruct  %301  %300 %361
+%363 = OpSampledImage  %336  %295 %297
+%364 = OpImageSampleImplicitLod  %23  %363 %362 Bias|ConstOffset %304 %30
+%365 = OpLoad  %23  %305
+%366 = OpFAdd  %23  %365 %364
+OpStore %305 %366
+%367 = OpConvertSToF  %7  %81
+%368 = OpCompositeConstruct  %301  %300 %367
+%369 = OpSampledImage  %336  %295 %297
+%370 = OpImageSampleImplicitLod  %23  %369 %368
+%371 = OpLoad  %23  %305
+%372 = OpFAdd  %23  %371 %370
+OpStore %305 %372
+%373 = OpConvertSToF  %7  %81
+%374 = OpCompositeConstruct  %301  %300 %373
+%375 = OpSampledImage  %336  %295 %297
+%376 = OpImageSampleImplicitLod  %23  %375 %374 ConstOffset %30
+%377 = OpLoad  %23  %305
+%378 = OpFAdd  %23  %377 %376
+OpStore %305 %378
+%379 = OpConvertSToF  %7  %81
+%380 = OpCompositeConstruct  %301  %300 %379
+%381 = OpSampledImage  %336  %295 %297
+%382 = OpImageSampleExplicitLod  %23  %381 %380 Lod %303
+%383 = OpLoad  %23  %305
+%384 = OpFAdd  %23  %383 %382
+OpStore %305 %384
+%385 = OpConvertSToF  %7  %81
+%386 = OpCompositeConstruct  %301  %300 %385
+%387 = OpSampledImage  %336  %295 %297
+%388 = OpImageSampleExplicitLod  %23  %387 %386 Lod|ConstOffset %303 %30
+%389 = OpLoad  %23  %305
+%390 = OpFAdd  %23  %389 %388
+OpStore %305 %390
+%391 = OpConvertSToF  %7  %81
+%392 = OpCompositeConstruct  %301  %300 %391
+%393 = OpSampledImage  %336  %295 %297
+%394 = OpImageSampleImplicitLod  %23  %393 %392 Bias|ConstOffset %304 %30
+%395 = OpLoad  %23  %305
+%396 = OpFAdd  %23  %395 %394
+OpStore %305 %396
+%398 = OpConvertUToF  %7  %218
+%399 = OpCompositeConstruct  %23  %302 %398
+%400 = OpSampledImage  %397  %296 %297
+%401 = OpImageSampleImplicitLod  %23  %400 %399
+%402 = OpLoad  %23  %305
+%403 = OpFAdd  %23  %402 %401
+OpStore %305 %403
+%404 = OpConvertUToF  %7  %218
+%405 = OpCompositeConstruct  %23  %302 %404
+%406 = OpSampledImage  %397  %296 %297
+%407 = OpImageSampleExplicitLod  %23  %406 %405 Lod %303
+%408 = OpLoad  %23  %305
+%409 = OpFAdd  %23  %408 %407
+OpStore %305 %409
+%410 = OpConvertUToF  %7  %218
+%411 = OpCompositeConstruct  %23  %302 %410
+%412 = OpSampledImage  %397  %296 %297
+%413 = OpImageSampleImplicitLod  %23  %412 %411 Bias %304
+%414 = OpLoad  %23  %305
+%415 = OpFAdd  %23  %414 %413
+OpStore %305 %415
+%416 = OpConvertSToF  %7  %81
+%417 = OpCompositeConstruct  %23  %302 %416
+%418 = OpSampledImage  %397  %296 %297
+%419 = OpImageSampleImplicitLod  %23  %418 %417
+%420 = OpLoad  %23  %305
+%421 = OpFAdd  %23  %420 %419
+OpStore %305 %421
+%422 = OpConvertSToF  %7  %81
+%423 = OpCompositeConstruct  %23  %302 %422
+%424 = OpSampledImage  %397  %296 %297
+%425 = OpImageSampleExplicitLod  %23  %424 %423 Lod %303
+%426 = OpLoad  %23  %305
+%427 = OpFAdd  %23  %426 %425
+OpStore %305 %427
+%428 = OpConvertSToF  %7  %81
+%429 = OpCompositeConstruct  %23  %302 %428
+%430 = OpSampledImage  %397  %296 %297
+%431 = OpImageSampleImplicitLod  %23  %430 %429 Bias %304
+%432 = OpLoad  %23  %305
+%433 = OpFAdd  %23  %432 %431
+OpStore %305 %433
+%434 = OpLoad  %23  %305
+OpStore %291 %434
 OpReturn
 OpFunctionEnd
-%439 = OpFunction  %2  None %100
-%436 = OpLabel
-%444 = OpVariable  %445  Function %446
-%440 = OpLoad  %24  %66
-%441 = OpLoad  %25  %68
-%442 = OpLoad  %26  %70
-%443 = OpLoad  %27  %72
-OpBranch %447
-%447 = OpLabel
-%449 = OpSampledImage  %448  %441 %440
-%450 = OpImageSampleDrefImplicitLod  %7  %449 %301 %299
-%451 = OpLoad  %7  %444
-%452 = OpFAdd  %7  %451 %450
-OpStore %444 %452
-%454 = OpConvertUToF  %7  %219
-%455 = OpCompositeConstruct  %302  %301 %454
-%456 = OpSampledImage  %453  %442 %440
-%457 = OpImageSampleDrefImplicitLod  %7  %456 %455 %299
-%458 = OpLoad  %7  %444
-%459 = OpFAdd  %7  %458 %457
-OpStore %444 %459
-%460 = OpConvertSToF  %7  %81
-%461 = OpCompositeConstruct  %302  %301 %460
-%462 = OpSampledImage  %453  %442 %440
-%463 = OpImageSampleDrefImplicitLod  %7  %462 %461 %299
-%464 = OpLoad  %7  %444
-%465 = OpFAdd  %7  %464 %463
-OpStore %444 %465
-%467 = OpSampledImage  %466  %443 %440
-%468 = OpImageSampleDrefImplicitLod  %7  %467 %303 %299
-%469 = OpLoad  %7  %444
-%470 = OpFAdd  %7  %469 %468
-OpStore %444 %470
-%471 = OpSampledImage  %448  %441 %440
-%472 = OpImageSampleDrefExplicitLod  %7  %471 %301 %299 Lod %473
-%474 = OpLoad  %7  %444
-%475 = OpFAdd  %7  %474 %472
-OpStore %444 %475
-%476 = OpConvertUToF  %7  %219
-%477 = OpCompositeConstruct  %302  %301 %476
-%478 = OpSampledImage  %453  %442 %440
-%479 = OpImageSampleDrefExplicitLod  %7  %478 %477 %299 Lod %473
-%480 = OpLoad  %7  %444
-%481 = OpFAdd  %7  %480 %479
-OpStore %444 %481
-%482 = OpConvertSToF  %7  %81
-%483 = OpCompositeConstruct  %302  %301 %482
-%484 = OpSampledImage  %453  %442 %440
-%485 = OpImageSampleDrefExplicitLod  %7  %484 %483 %299 Lod %473
-%486 = OpLoad  %7  %444
-%487 = OpFAdd  %7  %486 %485
-OpStore %444 %487
-%488 = OpSampledImage  %466  %443 %440
-%489 = OpImageSampleDrefExplicitLod  %7  %488 %303 %299 Lod %473
-%490 = OpLoad  %7  %444
-%491 = OpFAdd  %7  %490 %489
-OpStore %444 %491
-%492 = OpLoad  %7  %444
-OpStore %437 %492
+%438 = OpFunction  %2  None %100
+%435 = OpLabel
+%443 = OpVariable  %444  Function %445
+%439 = OpLoad  %24  %66
+%440 = OpLoad  %25  %68
+%441 = OpLoad  %26  %70
+%442 = OpLoad  %27  %72
+OpBranch %446
+%446 = OpLabel
+%448 = OpSampledImage  %447  %440 %439
+%449 = OpImageSampleDrefImplicitLod  %7  %448 %300 %298
+%450 = OpLoad  %7  %443
+%451 = OpFAdd  %7  %450 %449
+OpStore %443 %451
+%453 = OpConvertUToF  %7  %218
+%454 = OpCompositeConstruct  %301  %300 %453
+%455 = OpSampledImage  %452  %441 %439
+%456 = OpImageSampleDrefImplicitLod  %7  %455 %454 %298
+%457 = OpLoad  %7  %443
+%458 = OpFAdd  %7  %457 %456
+OpStore %443 %458
+%459 = OpConvertSToF  %7  %81
+%460 = OpCompositeConstruct  %301  %300 %459
+%461 = OpSampledImage  %452  %441 %439
+%462 = OpImageSampleDrefImplicitLod  %7  %461 %460 %298
+%463 = OpLoad  %7  %443
+%464 = OpFAdd  %7  %463 %462
+OpStore %443 %464
+%466 = OpSampledImage  %465  %442 %439
+%467 = OpImageSampleDrefImplicitLod  %7  %466 %302 %298
+%468 = OpLoad  %7  %443
+%469 = OpFAdd  %7  %468 %467
+OpStore %443 %469
+%470 = OpSampledImage  %447  %440 %439
+%471 = OpImageSampleDrefExplicitLod  %7  %470 %300 %298 Lod %472
+%473 = OpLoad  %7  %443
+%474 = OpFAdd  %7  %473 %471
+OpStore %443 %474
+%475 = OpConvertUToF  %7  %218
+%476 = OpCompositeConstruct  %301  %300 %475
+%477 = OpSampledImage  %452  %441 %439
+%478 = OpImageSampleDrefExplicitLod  %7  %477 %476 %298 Lod %472
+%479 = OpLoad  %7  %443
+%480 = OpFAdd  %7  %479 %478
+OpStore %443 %480
+%481 = OpConvertSToF  %7  %81
+%482 = OpCompositeConstruct  %301  %300 %481
+%483 = OpSampledImage  %452  %441 %439
+%484 = OpImageSampleDrefExplicitLod  %7  %483 %482 %298 Lod %472
+%485 = OpLoad  %7  %443
+%486 = OpFAdd  %7  %485 %484
+OpStore %443 %486
+%487 = OpSampledImage  %465  %442 %439
+%488 = OpImageSampleDrefExplicitLod  %7  %487 %302 %298 Lod %472
+%489 = OpLoad  %7  %443
+%490 = OpFAdd  %7  %489 %488
+OpStore %443 %490
+%491 = OpLoad  %7  %443
+OpStore %436 %491
 OpReturn
 OpFunctionEnd
-%495 = OpFunction  %2  None %100
-%493 = OpLabel
-%496 = OpLoad  %16  %49
-%497 = OpLoad  %3  %51
-%498 = OpLoad  %17  %52
-%499 = OpLoad  %24  %64
-%500 = OpLoad  %24  %66
-%501 = OpLoad  %25  %68
-OpBranch %502
-%502 = OpLabel
-%503 = OpSampledImage  %316  %496 %499
-%504 = OpImageGather  %23  %503 %301 %505
-%506 = OpSampledImage  %316  %496 %499
-%507 = OpImageGather  %23  %506 %301 %508 ConstOffset %30
-%509 = OpSampledImage  %448  %501 %500
-%510 = OpImageDrefGather  %23  %509 %301 %299
-%511 = OpSampledImage  %448  %501 %500
-%512 = OpImageDrefGather  %23  %511 %301 %299 ConstOffset %30
-%514 = OpSampledImage  %513  %497 %499
-%515 = OpImageGather  %119  %514 %301 %219
-%518 = OpSampledImage  %517  %498 %499
-%519 = OpImageGather  %516  %518 %301 %219
-%520 = OpConvertUToF  %23  %515
-%521 = OpConvertSToF  %23  %519
-%522 = OpFAdd  %23  %520 %521
-%523 = OpFAdd  %23  %504 %507
-%524 = OpFAdd  %23  %523 %510
-%525 = OpFAdd  %23  %524 %512
-%526 = OpFAdd  %23  %525 %522
-OpStore %494 %526
+%494 = OpFunction  %2  None %100
+%492 = OpLabel
+%495 = OpLoad  %16  %49
+%496 = OpLoad  %3  %51
+%497 = OpLoad  %17  %52
+%498 = OpLoad  %24  %64
+%499 = OpLoad  %24  %66
+%500 = OpLoad  %25  %68
+OpBranch %501
+%501 = OpLabel
+%502 = OpSampledImage  %315  %495 %498
+%503 = OpImageGather  %23  %502 %300 %504
+%505 = OpSampledImage  %315  %495 %498
+%506 = OpImageGather  %23  %505 %300 %507 ConstOffset %30
+%508 = OpSampledImage  %447  %500 %499
+%509 = OpImageDrefGather  %23  %508 %300 %298
+%510 = OpSampledImage  %447  %500 %499
+%511 = OpImageDrefGather  %23  %510 %300 %298 ConstOffset %30
+%513 = OpSampledImage  %512  %496 %498
+%514 = OpImageGather  %119  %513 %300 %218
+%517 = OpSampledImage  %516  %497 %498
+%518 = OpImageGather  %515  %517 %300 %218
+%519 = OpConvertUToF  %23  %514
+%520 = OpConvertSToF  %23  %518
+%521 = OpFAdd  %23  %519 %520
+%522 = OpFAdd  %23  %503 %506
+%523 = OpFAdd  %23  %522 %509
+%524 = OpFAdd  %23  %523 %511
+%525 = OpFAdd  %23  %524 %521
+OpStore %493 %525
 OpReturn
 OpFunctionEnd
-%529 = OpFunction  %2  None %100
-%527 = OpLabel
-%530 = OpLoad  %24  %64
-%531 = OpLoad  %25  %68
-OpBranch %532
-%532 = OpLabel
-%533 = OpSampledImage  %448  %531 %530
-%534 = OpImageSampleImplicitLod  %23  %533 %301
-%535 = OpCompositeExtract  %7  %534 0
-%536 = OpSampledImage  %448  %531 %530
-%537 = OpImageGather  %23  %536 %301 %219
-%538 = OpSampledImage  %448  %531 %530
-%540 = OpConvertSToF  %7  %29
-%539 = OpImageSampleExplicitLod  %23  %538 %301 Lod %540
-%541 = OpCompositeExtract  %7  %539 0
-%542 = OpCompositeConstruct  %23  %535 %535 %535 %535
-%543 = OpFAdd  %23  %542 %537
-%544 = OpCompositeConstruct  %23  %541 %541 %541 %541
-%545 = OpFAdd  %23  %543 %544
-OpStore %528 %545
+%528 = OpFunction  %2  None %100
+%526 = OpLabel
+%529 = OpLoad  %24  %64
+%530 = OpLoad  %25  %68
+OpBranch %531
+%531 = OpLabel
+%532 = OpSampledImage  %447  %530 %529
+%533 = OpImageSampleImplicitLod  %23  %532 %300
+%534 = OpCompositeExtract  %7  %533 0
+%535 = OpSampledImage  %447  %530 %529
+%536 = OpImageGather  %23  %535 %300 %218
+%537 = OpSampledImage  %447  %530 %529
+%539 = OpConvertSToF  %7  %29
+%538 = OpImageSampleExplicitLod  %23  %537 %300 Lod %539
+%540 = OpCompositeExtract  %7  %538 0
+%541 = OpCompositeConstruct  %23  %534 %534 %534 %534
+%542 = OpFAdd  %23  %541 %536
+%543 = OpCompositeConstruct  %23  %540 %540 %540 %540
+%544 = OpFAdd  %23  %542 %543
+OpStore %527 %544
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/extra.wgsl b/naga/tests/out/wgsl/extra.wgsl
index 25f894e7f5d..574c35ab0b4 100644
--- a/naga/tests/out/wgsl/extra.wgsl
+++ b/naga/tests/out/wgsl/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
     index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 
 struct FragmentIn {
diff --git a/naga/tests/out/wgsl/f16-glsl.comp.wgsl b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
new file mode 100644
index 00000000000..e12dcb14086
--- /dev/null
+++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
@@ -0,0 +1,47 @@
+enable f16;
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+@group(0) @binding(0) 
+var<uniform> global: A;
+@group(0) @binding(1) 
+var<storage, read_write> global_1: B;
+
+fn main_1() {
+    let _e16 = global.a_1_;
+    global_1.b_1_ = _e16;
+    let _e17 = global.a_vec2_;
+    global_1.b_vec2_ = _e17;
+    let _e18 = global.a_vec3_;
+    global_1.b_vec3_ = _e18;
+    let _e19 = global.a_vec4_;
+    global_1.b_vec4_ = _e19;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+    return;
+}
diff --git a/naga/tests/out/wgsl/f16-spv.wgsl b/naga/tests/out/wgsl/f16-spv.wgsl
new file mode 100644
index 00000000000..4c29ace8c34
--- /dev/null
+++ b/naga/tests/out/wgsl/f16-spv.wgsl
@@ -0,0 +1,46 @@
+enable f16;
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+@group(0) @binding(1) 
+var<storage, read_write> unnamed: B;
+@group(0) @binding(0) 
+var<uniform> unnamed_1: A;
+
+fn main_1() {
+    let _e3 = unnamed_1.a_1_;
+    unnamed.b_1_ = _e3;
+    let _e6 = unnamed_1.a_vec2_;
+    unnamed.b_vec2_ = _e6;
+    let _e9 = unnamed_1.a_vec3_;
+    unnamed.b_vec3_ = _e9;
+    let _e12 = unnamed_1.a_vec4_;
+    unnamed.b_vec4_ = _e12;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+}
diff --git a/naga/tests/out/wgsl/f16.wgsl b/naga/tests/out/wgsl/f16.wgsl
new file mode 100644
index 00000000000..a45edce3b58
--- /dev/null
+++ b/naga/tests/out/wgsl/f16.wgsl
@@ -0,0 +1,167 @@
+enable f16;
+
+struct UniformCompatible {
+    val_u32_: u32,
+    val_i32_: i32,
+    val_f32_: f32,
+    val_f16_: f16,
+    val_f16_2_: vec2<f16>,
+    val_f16_3_: vec3<f16>,
+    val_f16_4_: vec4<f16>,
+    final_value: f16,
+    val_mat2x2_: mat2x2<f16>,
+    val_mat2x3_: mat2x3<f16>,
+    val_mat2x4_: mat2x4<f16>,
+    val_mat3x2_: mat3x2<f16>,
+    val_mat3x3_: mat3x3<f16>,
+    val_mat3x4_: mat3x4<f16>,
+    val_mat4x2_: mat4x2<f16>,
+    val_mat4x3_: mat4x3<f16>,
+    val_mat4x4_: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+    val_f16_array_2_: array<f16, 2>,
+}
+
+struct LayoutTest {
+    scalar1_: f16,
+    scalar2_: f16,
+    v3_: vec3<f16>,
+    tuck_in: f16,
+    scalar4_: f16,
+    larger: u32,
+}
+
+const constant_variable: f16 = 15.203125h;
+
+var<private> private_variable: f16 = 1h;
+@group(0) @binding(0) 
+var<uniform> input_uniform: UniformCompatible;
+@group(0) @binding(1) 
+var<storage> input_storage: UniformCompatible;
+@group(0) @binding(2) 
+var<storage> input_arrays: StorageCompatible;
+@group(0) @binding(3) 
+var<storage, read_write> output: UniformCompatible;
+@group(0) @binding(4) 
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+    var val: f16 = 15.203125h;
+
+    let _e4 = val;
+    val = (_e4 + -33344h);
+    let _e6 = val;
+    let _e9 = val;
+    val = (_e9 + (_e6 + 5h));
+    let _e13 = input_uniform.val_f32_;
+    let _e14 = val;
+    let _e18 = val;
+    val = (_e18 + f16((_e13 + f32(_e14))));
+    let _e22 = input_uniform.val_f16_;
+    let _e25 = val;
+    val = (_e25 + vec3(_e22).z);
+    let _e31 = input_uniform.val_f16_;
+    let _e34 = input_storage.val_f16_;
+    output.val_f16_ = (_e31 + _e34);
+    let _e40 = input_uniform.val_f16_2_;
+    let _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = (_e40 + _e43);
+    let _e49 = input_uniform.val_f16_3_;
+    let _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = (_e49 + _e52);
+    let _e58 = input_uniform.val_f16_4_;
+    let _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = (_e58 + _e61);
+    let _e67 = input_uniform.val_mat2x2_;
+    let _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = (_e67 + _e70);
+    let _e76 = input_uniform.val_mat2x3_;
+    let _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = (_e76 + _e79);
+    let _e85 = input_uniform.val_mat2x4_;
+    let _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = (_e85 + _e88);
+    let _e94 = input_uniform.val_mat3x2_;
+    let _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = (_e94 + _e97);
+    let _e103 = input_uniform.val_mat3x3_;
+    let _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = (_e103 + _e106);
+    let _e112 = input_uniform.val_mat3x4_;
+    let _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = (_e112 + _e115);
+    let _e121 = input_uniform.val_mat4x2_;
+    let _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = (_e121 + _e124);
+    let _e130 = input_uniform.val_mat4x3_;
+    let _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = (_e130 + _e133);
+    let _e139 = input_uniform.val_mat4x4_;
+    let _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = (_e139 + _e142);
+    let _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    let _e149 = val;
+    let _e151 = val;
+    val = (_e151 + abs(_e149));
+    let _e153 = val;
+    let _e154 = val;
+    let _e155 = val;
+    let _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    let _e159 = val;
+    let _e161 = val;
+    let _e164 = val;
+    val = (_e164 + dot(vec2(_e159), vec2(_e161)));
+    let _e166 = val;
+    let _e167 = val;
+    let _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    let _e171 = val;
+    let _e172 = val;
+    let _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    let _e176 = val;
+    let _e178 = val;
+    val = (_e178 + sign(_e176));
+    let _e181 = val;
+    val = (_e181 + 1h);
+    let _e185 = input_uniform.val_f16_2_;
+    let float_vec2_ = vec2<f32>(_e185);
+    output.val_f16_2_ = vec2<f16>(float_vec2_);
+    let _e192 = input_uniform.val_f16_3_;
+    let float_vec3_ = vec3<f32>(_e192);
+    output.val_f16_3_ = vec3<f16>(float_vec3_);
+    let _e199 = input_uniform.val_f16_4_;
+    let float_vec4_ = vec4<f32>(_e199);
+    output.val_f16_4_ = vec4<f16>(float_vec4_);
+    let _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = mat2x2<f16>(mat2x2<f32>(_e208));
+    let _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = mat2x3<f16>(mat2x3<f32>(_e215));
+    let _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = mat2x4<f16>(mat2x4<f32>(_e222));
+    let _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = mat3x2<f16>(mat3x2<f32>(_e229));
+    let _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = mat3x3<f16>(mat3x3<f32>(_e236));
+    let _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = mat3x4<f16>(mat3x4<f32>(_e243));
+    let _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = mat4x2<f16>(mat4x2<f32>(_e250));
+    let _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = mat4x3<f16>(mat4x3<f32>(_e257));
+    let _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = mat4x4<f16>(mat4x4<f32>(_e264));
+    let _e267 = val;
+    return _e267;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    let _e3 = f16_function(2h);
+    output.final_value = _e3;
+    return;
+}
diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs
index cc56f5e053c..f87c9c335ad 100644
--- a/naga/tests/snapshots.rs
+++ b/naga/tests/snapshots.rs
@@ -929,6 +929,10 @@ fn convert_wgsl() {
             "int64",
             Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL,
         ),
+        (
+            "f16",
+            Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL,
+        ),
         (
             "subgroup-operations",
             Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL,
@@ -1134,6 +1138,7 @@ fn convert_spv_all() {
     convert_spv("atomic_i_decrement", false, Targets::WGSL);
     convert_spv("atomic_i_add_sub", false, Targets::WGSL);
     convert_spv("atomic_global_struct_field_vertex", false, Targets::WGSL);
+    convert_spv("f16-spv", false, Targets::WGSL);
     convert_spv(
         "fetch_depth",
         false,
diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs
index f221c7896e3..2d46e37f72d 100644
--- a/naga/tests/spirv_capabilities.rs
+++ b/naga/tests/spirv_capabilities.rs
@@ -208,3 +208,8 @@ fn int64() {
         "#,
     );
 }
+
+#[test]
+fn float16() {
+    require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
+}
diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs
index 2dcd0588cac..34e1e18ece8 100644
--- a/naga/tests/wgsl_errors.rs
+++ b/naga/tests/wgsl_errors.rs
@@ -3,9 +3,11 @@ Tests for the WGSL front end.
 */
 #![cfg(feature = "wgsl-in")]
 
+#[track_caller]
 fn check(input: &str, snapshot: &str) {
     let output = naga::front::wgsl::parse_str(input)
-        .expect_err("expected parser error")
+        .map(|_| panic!("expected parser error, but parsing succeeded!"))
+        .unwrap_err()
         .emit_to_string(input);
     if output != snapshot {
         for diff in diff::lines(snapshot, &output) {
@@ -19,6 +21,19 @@ fn check(input: &str, snapshot: &str) {
     }
 }
 
+#[track_caller]
+fn check_success(input: &str) {
+    match naga::front::wgsl::parse_str(input) {
+        Ok(_) => {}
+        Err(err) => {
+            panic!(
+                "expected success, but parsing failed with:\n{}",
+                err.emit_to_string(input)
+            );
+        }
+    }
+}
+
 #[test]
 fn very_negative_integers() {
     // wgpu#4492
@@ -824,6 +839,50 @@ fn matrix_constructor_inferred() {
     );
 }
 
+#[test]
+fn float16_requires_enable() {
+    check(
+        r#"
+            const a: f16 = 1.0;
+        "#,
+        r#"error: the `f16` language extension is not enabled
+  ┌─ wgsl:2:22
+  │
+2 │             const a: f16 = 1.0;
+  │                      ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+
+"#,
+    );
+
+    check(
+        r#"
+            const a = 1.0h;
+        "#,
+        r#"error: the `f16` language extension is not enabled
+  ┌─ wgsl:2:23
+  │
+2 │             const a = 1.0h;
+  │                       ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+
+"#,
+    );
+}
+
+#[test]
+fn multiple_enables_valid() {
+    check_success(
+        r#"
+            enable f16;
+            enable f16;
+            const a: f16 = 1.0h;
+        "#,
+    );
+}
+
 /// Check the result of validating a WGSL program against a pattern.
 ///
 /// Unless you are generating code programmatically, the
@@ -900,6 +959,7 @@ macro_rules! check_validation {
     }
 }
 
+#[track_caller]
 fn validation_error(
     source: &str,
     caps: naga::valid::Capabilities,
@@ -928,6 +988,53 @@ fn int64_capability() {
     }
 }
 
+#[test]
+fn float16_capability() {
+    check_validation! {
+        "enable f16; var input: f16;",
+        "enable f16; var input: vec2<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}),
+            ..
+        })
+    }
+}
+
+#[test]
+fn float16_in_push_constant() {
+    check_validation! {
+        "enable f16; var<push_constant> input: f16;",
+        "enable f16; var<push_constant> input: vec2<f16>;",
+        "enable f16; var<push_constant> input: mat4x4<f16>;",
+        "enable f16; struct S { a: f16 }; var<push_constant> input: S;",
+        "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var<push_constant> input: S2;":
+        Err(naga::valid::ValidationError::GlobalVariable {
+            source: naga::valid::GlobalVariableError::InvalidPushConstantType(
+                naga::valid::PushConstantError::InvalidScalar(
+                    naga::Scalar::F16
+                )
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT
+    }
+}
+
+#[test]
+fn float16_in_atomic() {
+    check_validation! {
+        "enable f16; var<storage> a: atomic<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::InvalidAtomicWidth(
+                naga::ScalarKind::Float,
+                2
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16
+    }
+}
+
 #[test]
 fn invalid_arrays() {
     check_validation! {
diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs
index be6a9006309..8eb72b3ea8d 100644
--- a/naga/xtask/src/validate.rs
+++ b/naga/xtask/src/validate.rs
@@ -1,5 +1,5 @@
 use std::{
-    io::{BufRead, BufReader},
+    io::{BufRead, BufReader, Write},
     path::Path,
     process::Stdio,
 };
@@ -214,17 +214,23 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
     else {
         bail!("no {expected_header_prefix:?} header found in {path:?}");
     };
-    let file = open_file(path)?;
     let mut spirv_as_cmd = EasyCommand::new(spirv_as, |cmd| {
-        cmd.stdin(Stdio::from(file))
-            .stdout(Stdio::piped())
+        cmd.stdout(Stdio::piped())
             .arg("--target-env")
             .arg(format!("spv{version}"))
-            .args(["-", "-o", "-"])
+            .args([path.to_str().unwrap(), "-o", "-"])
     });
-    let child = spirv_as_cmd
-        .spawn()
-        .with_context(|| format!("failed to spawn {spirv_as_cmd:?}"))?;
+    let assembled_spirv = spirv_as_cmd
+        .output()
+        .with_context(|| format!("Failed to run {spirv_as_cmd}"))?;
+
+    if !assembled_spirv.status.success() {
+        bail!(
+            "Failed to assemble {path:?} with {spirv_as_cmd}:\n{}",
+            String::from_utf8_lossy(&assembled_spirv.stderr)
+        );
+    }
+
     let error_message = || {
         format!(
             "Failed to validate {path:?}.
@@ -234,9 +240,26 @@ Note: Labels and line numbers will not match the input file.
             path.display(),
         )
     };
-    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap()))
-        .success()
-        .with_context(error_message)
+    let mut spirv_val_command = EasyCommand::new(spirv_val, |cmd| cmd.stdin(Stdio::piped()));
+    let mut spirv_val_process = spirv_val_command
+        .spawn()
+        .with_context(|| format!("Failed to run {spirv_val_command}"))?;
+
+    spirv_val_process
+        .stdin
+        .as_mut()
+        .unwrap()
+        .write_all(&assembled_spirv.stdout)?;
+
+    let spirv_val_output = spirv_val_process
+        .wait()
+        .with_context(|| format!("Failed to wait for {spirv_val_command}"))?;
+
+    if !spirv_val_output.success() {
+        bail!("{}", error_message());
+    }
+
+    Ok(())
 }
 
 fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> {
@@ -257,22 +280,17 @@ fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> {
     } else {
         format!("-std={language}")
     };
-    let file = open_file(path)?;
     EasyCommand::new(xcrun, |cmd| {
-        cmd.stdin(Stdio::from(file))
-            .args(["-sdk", "macosx", "metal", "-mmacosx-version-min=10.11"])
+        cmd.args(["-sdk", "macosx", "metal", "-mmacosx-version-min=10.11"])
             .arg(std_arg)
-            .args(["-x", "metal", "-", "-o", "/dev/null"])
+            .args(["-x", "metal", &*path.to_string_lossy(), "-o", "/dev/null"])
     })
     .success()
 }
 
 fn validate_glsl(path: &Path, type_arg: &str, glslang_validator: &str) -> anyhow::Result<()> {
-    let file = open_file(path)?;
     EasyCommand::new(glslang_validator, |cmd| {
-        cmd.stdin(Stdio::from(file))
-            .args(["--stdin", "-S"])
-            .arg(type_arg)
+        cmd.args([&*path.to_string_lossy(), "-S"]).arg(type_arg)
     })
     .success()
 }
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
index 5ee691ba05e..bc2659da8a3 100644
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -43,6 +43,7 @@ cfg-if.workspace = true
 ctor.workspace = true
 futures-lite.workspace = true
 glam.workspace = true
+half = { workspace = true, features = ["bytemuck"] }
 itertools.workspace = true
 image.workspace = true
 libtest-mimic.workspace = true
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index 9a3bae0d401..15d4c5e5cf7 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -143,8 +143,8 @@ impl ShaderTest {
             body,
             input_type: String::from("CustomStruct"),
             output_type: String::from("array<u32>"),
-            input_values: bytemuck::cast_slice(input_values).to_vec(),
-            output_values: vec![bytemuck::cast_slice(output_values).to_vec()],
+            input_values: bytemuck::pod_collect_to_vec(input_values),
+            output_values: vec![bytemuck::pod_collect_to_vec(output_values)],
             output_comparison_fn: Self::default_comparison_function::<O>,
             output_initialization: u32::MAX,
             failures: Backends::empty(),
diff --git a/tests/tests/shader/struct_layout.rs b/tests/tests/shader/struct_layout.rs
index 38a040fcad5..25bd32bf77f 100644
--- a/tests/tests/shader/struct_layout.rs
+++ b/tests/tests/shader/struct_layout.rs
@@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits};
 use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
 
+#[gpu_test]
+static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
+            .expect_fail(
+                FailureCase::backend(wgpu::Backends::VULKAN)
+                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
+            )
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Uniform,
+            create_struct_layout_tests(InputStorageType::Uniform),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_struct_layout_tests(InputStorageType::Storage),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_struct_layout_tests(InputStorageType::PushConstant),
+        )
+    });
+
 fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
 
@@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
     tests
 }
 
+#[gpu_test]
+static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
 fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
 
@@ -356,29 +461,26 @@ fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
 }
 
 #[gpu_test]
-static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
+            .features(Features::SHADER_F16)
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
-            .expect_fail(
-                FailureCase::backend(wgpu::Backends::VULKAN)
-                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
-            )
             .limits(Limits::downlevel_defaults()),
     )
     .run_async(|ctx| {
         shader_input_output_test(
             ctx,
-            InputStorageType::Uniform,
-            create_struct_layout_tests(InputStorageType::Uniform),
+            InputStorageType::Storage,
+            create_16bit_struct_layout_test(),
         )
     });
 
 #[gpu_test]
-static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
+            .features(Features::SHADER_F16)
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
             .limits(Limits::downlevel_defaults()),
     )
@@ -386,76 +488,205 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
         shader_input_output_test(
             ctx,
             InputStorageType::Storage,
-            create_struct_layout_tests(InputStorageType::Storage),
+            create_16bit_struct_layout_test(),
         )
     });
 
-#[gpu_test]
-static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_struct_layout_tests(InputStorageType::PushConstant),
-        )
-    });
+fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
+    let mut tests = Vec::new();
 
-#[gpu_test]
-static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    fn f16asu16(f32: f32) -> u16 {
+        half::f16::from_f32(f32).to_bits()
+    }
 
-#[gpu_test]
-static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    // 16 bit alignment tests
+    {
+        let members =
+            "scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.scalar1);
+            output[1] = u32(input.scalar2);
+            output[2] = u32(input.v3.x);
+            output[3] = u32(input.v3.y);
+            output[4] = u32(input.v3.z);
+            output[5] = u32(input.tuck_in);
+            output[6] = u32(input.scalar4);
+            output[7] = u32(extractBits(input.larger, 0u, 16u));
+            output[8] = u32(extractBits(input.larger, 16u, 16u));
+        ",
+        );
 
-#[gpu_test]
-static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+        tests.push(ShaderTest::new(
+            "f16 alignment".into(),
+            members.into(),
+            direct,
+            &[
+                f16asu16(0.0),
+                f16asu16(1.0),
+                f16asu16(2.0),
+                f16asu16(3.0),
+                f16asu16(4.0),
+                f16asu16(5.0),
+                f16asu16(6.0),
+                f16asu16(7.0),
+                f16asu16(8.0),
+                f16asu16(9.0),
+                10_u16,
+                11_u16,
+                // Some extra values to help debug if the test fails.
+                12_u16,
+                13_u16,
+                14_u16,
+                15_u16,
+                16_u16,
+                17_u16,
+                18_u16,
+                19_u16,
+                20_u16,
+            ],
+            &[
+                0, // scalar1
+                1, // scalar2
+                4, 5, 6,  // v3
+                7,  // tuck_in
+                8,  // scalar4
+                10, // larger[0..16]
+                11, // larger[16..32]
+            ],
+        ));
+    }
+
+    // Matrix tests
+    {
+        let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.m2[0].x);
+            output[1] = u32(input.m2[0].y);
+            output[2] = u32(input.m2[1].x);
+            output[3] = u32(input.m2[1].y);
+
+            output[4] = u32(input.m3[0].x);
+            output[5] = u32(input.m3[0].y);
+            output[6] = u32(input.m3[0].z);
+            output[7] = u32(input.m3[1].x);
+            output[8] = u32(input.m3[1].y);
+            output[9] = u32(input.m3[1].z);
+            output[10] = u32(input.m3[2].x);
+            output[11] = u32(input.m3[2].y);
+            output[12] = u32(input.m3[2].z);
+
+            output[13] = u32(input.m4[0].x);
+            output[14] = u32(input.m4[0].y);
+            output[15] = u32(input.m4[0].z);
+            output[16] = u32(input.m4[0].w);
+            output[17] = u32(input.m4[1].x);
+            output[18] = u32(input.m4[1].y);
+            output[19] = u32(input.m4[1].z);
+            output[20] = u32(input.m4[1].w);
+            output[21] = u32(input.m4[2].x);
+            output[22] = u32(input.m4[2].y);
+            output[23] = u32(input.m4[2].z);
+            output[24] = u32(input.m4[2].w);
+            output[25] = u32(input.m4[3].x);
+            output[26] = u32(input.m4[3].y);
+            output[27] = u32(input.m4[3].z);
+            output[28] = u32(input.m4[3].w);
+        ",
+        );
+
+        tests.push(ShaderTest::new(
+            "f16 matrix alignment".into(),
+            members.into(),
+            direct,
+            &(0..32).map(|x| f16asu16(x as f32)).collect::<Vec<_>>(),
+            &[
+                0, 1, // m2[0]
+                2, 3, // m2[1]
+                //
+                4, 5, 6, // m3[0]
+                8, 9, 10, // m3[1]
+                12, 13, 14, // m3[2]
+                //
+                16, 17, 18, 19, // m4[0]
+                20, 21, 22, 23, // m4[1]
+                24, 25, 26, 27, // m4[2]
+                28, 29, 30, 31, // m4[3]
+            ],
+        ));
+    }
+
+    // // Nested struct and array test.
+    // //
+    // // This tries to exploit all the weird edge cases of the struct layout algorithm.
+    // // We dont go as all-out as the other nested struct test because
+    // // all our primitives are twice as wide and we have only so much buffer to spare.
+    // {
+    //     let header = String::from(
+    //         "struct Inner { scalar: u64, scalar32: u32, member: array<vec3<u64>, 2> }",
+    //     );
+    //     let members = String::from("inner: Inner");
+    //     let direct = String::from(
+    //         "\
+
+    //     ",
+    //     );
+
+    //     tests.push(
+    //         ShaderTest::new(
+    //             String::from("nested struct and array"),
+    //             members,
+    //             direct,
+    //             &input_values,
+    //             &[
+    //                 0, 1, // inner.scalar
+    //                 2, // inner.scalar32
+    //                 8, 9, 10, 11, 12, 13, // inner.member[0]
+    //                 16, 17, 18, 19, 20, 21, // inner.member[1]
+    //             ],
+    //         )
+    //         .header(header),
+    //     );
+    // }
+    // {
+    //     let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }");
+    //     let members = String::from("inner: Inner, vector: vec3<i64>");
+    //     let direct = String::from(
+    //         "\
+    //         output[0] = bitcast<u32>(input.inner.scalar32);
+    //         output[1] = u32(bitcast<u64>(input.inner.scalar) & 0xFFFFFFFF);
+    //         output[2] = u32((bitcast<u64>(input.inner.scalar) >> 32) & 0xFFFFFFFF);
+    //         output[3] = bitcast<u32>(input.inner.scalar32_2);
+    //         output[4] = u32(bitcast<u64>(input.vector.x) & 0xFFFFFFFF);
+    //         output[5] = u32((bitcast<u64>(input.vector.x) >> 32) & 0xFFFFFFFF);
+    //         output[6] = u32(bitcast<u64>(input.vector.y) & 0xFFFFFFFF);
+    //         output[7] = u32((bitcast<u64>(input.vector.y) >> 32) & 0xFFFFFFFF);
+    //         output[8] = u32(bitcast<u64>(input.vector.z) & 0xFFFFFFFF);
+    //         output[9] = u32((bitcast<u64>(input.vector.z) >> 32) & 0xFFFFFFFF);
+    //     ",
+    //     );
+
+    //     tests.push(
+    //         ShaderTest::new(
+    //             String::from("nested struct and array"),
+    //             members,
+    //             direct,
+    //             &input_values,
+    //             &[
+    //                 0, // inner.scalar32
+    //                 2, 3, // inner.scalar
+    //                 4, // inner.scalar32_2
+    //                 8, 9, 10, 11, 12, 13, // vector
+    //             ],
+    //         )
+    //         .header(header),
+    //     );
+    // }
+
+    // Insert `enable f16;` header
+    tests
+        .into_iter()
+        .map(|test| test.header("enable f16;".into()))
+        .collect()
+}
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index bb2ffd6b033..94bfbf4258e 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -382,6 +382,10 @@ pub fn create_validator(
         features.contains(wgt::Features::PUSH_CONSTANTS),
     );
     caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
+    caps.set(
+        Caps::SHADER_FLOAT16,
+        features.contains(wgt::Features::SHADER_F16),
+    );
     caps.set(
         Caps::PRIMITIVE_INDEX,
         features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index 370b842aded..53b67d238d6 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -1,5 +1,5 @@
 use std::{
-    mem::{size_of, size_of_val},
+    mem::{self, size_of, size_of_val},
     ptr,
     sync::Arc,
     thread,
@@ -430,6 +430,24 @@ impl super::Adapter {
                 && features1.Int64ShaderOps.as_bool(),
         );
 
+        let float16_supported = {
+            let mut features4: Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4 =
+                unsafe { mem::zeroed() };
+            let hr = unsafe {
+                device.CheckFeatureSupport(
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax
+                    ptr::from_mut(&mut features4).cast(),
+                    size_of::<Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4>() as _,
+                )
+            };
+            hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool()
+        };
+
+        features.set(
+            wgt::Features::SHADER_F16,
+            shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported,
+        );
+
         features.set(
             wgt::Features::TEXTURE_INT64_ATOMIC,
             shader_model >= naga::back::hlsl::ShaderModel::V6_6
@@ -617,7 +635,7 @@ impl crate::Adapter for super::Adapter {
 
     unsafe fn open(
         &self,
-        _features: wgt::Features,
+        features: wgt::Features,
         limits: &wgt::Limits,
         memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -638,6 +656,7 @@ impl crate::Adapter for super::Adapter {
         let device = super::Device::new(
             self.device.clone(),
             queue.clone(),
+            features,
             limits,
             memory_hints,
             self.private_caps,
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 9cbb6cd14e5..3876b76dae1 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -32,9 +32,11 @@ use crate::{
 const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0";
 
 impl super::Device {
+    #[allow(clippy::too_many_arguments)]
     pub(super) fn new(
         raw: Direct3D12::ID3D12Device,
         present_queue: Direct3D12::ID3D12CommandQueue,
+        features: wgt::Features,
         limits: &wgt::Limits,
         memory_hints: &wgt::MemoryHints,
         private_caps: super::PrivateCapabilities,
@@ -177,6 +179,7 @@ impl super::Device {
                 event: Event::create(false, false)?,
             },
             private_caps,
+            features,
             shared: Arc::new(shared),
             rtv_pool: Mutex::new(rtv_pool),
             dsv_pool: Mutex::new(descriptor::CpuPool::new(
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index d5d6843c39c..1611fa64cf8 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -627,6 +627,7 @@ pub struct Device {
     present_queue: Direct3D12::ID3D12CommandQueue,
     idler: Idler,
     private_caps: PrivateCapabilities,
+    features: wgt::Features,
     shared: Arc<DeviceShared>,
     // CPU only pools
     rtv_pool: Mutex<descriptor::CpuPool>,
diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs
index 957bd8c3c48..90a1b36741d 100644
--- a/wgpu-hal/src/dx12/shader_compilation.rs
+++ b/wgpu-hal/src/dx12/shader_compilation.rs
@@ -268,7 +268,7 @@ pub(super) fn compile_dxc(
     let raw_ep = OPCWSTR::new(raw_ep);
     let full_stage = OPCWSTR::new(full_stage);
 
-    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 12>::new_const();
+    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 13>::new_const();
 
     if let Some(source_name) = source_name.as_ref() {
         compile_args.push(source_name.ptr())
@@ -298,6 +298,10 @@ pub(super) fn compile_dxc(
         compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS);
     }
 
+    if device.features.contains(wgt::Features::SHADER_F16) {
+        compile_args.push(windows::core::w!("-enable-16bit-types"));
+    }
+
     let buffer = Dxc::DxcBuffer {
         Ptr: source.as_ptr().cast(),
         Size: source.len(),
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 8b41be69fcb..ce1aa94e346 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -378,6 +378,7 @@ impl PhysicalDeviceFeatures {
                     vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
+                        .storage_input_output16(true)
                         .uniform_and_storage_buffer16_bit_access(true),
                 ))
             } else {
@@ -668,7 +669,8 @@ impl PhysicalDeviceFeatures {
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0
+                    && bit16.storage_input_output16 != 0,
             );
         }
 
@@ -1846,6 +1848,10 @@ impl super::Adapter {
                 capabilities.push(spv::Capability::Int64);
             }
 
+            if features.contains(wgt::Features::SHADER_F16) {
+                capabilities.push(spv::Capability::Float16);
+            }
+
             if features.intersects(
                 wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
                     | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX
diff --git a/wgpu-info/src/report.rs b/wgpu-info/src/report.rs
index 954908b09e5..c453b21e06d 100644
--- a/wgpu-info/src/report.rs
+++ b/wgpu-info/src/report.rs
@@ -3,7 +3,8 @@ use std::io;
 use hashbrown::HashMap;
 use serde::{Deserialize, Serialize};
 use wgpu::{
-    AdapterInfo, DownlevelCapabilities, Features, Limits, TextureFormat, TextureFormatFeatures,
+    AdapterInfo, DownlevelCapabilities, Dx12Compiler, Features, Limits, TextureFormat,
+    TextureFormatFeatures,
 };
 
 use crate::texture;
@@ -20,6 +21,7 @@ impl GpuReport {
     pub fn generate() -> Self {
         let instance = wgpu::Instance::new(&{
             let mut desc = wgpu::InstanceDescriptor::from_env_or_default();
+            desc.backend_options.dx12.shader_compiler = Dx12Compiler::StaticDxc;
             desc.flags = wgpu::InstanceFlags::debugging().with_env();
             desc
         });