From 9c0628039c615953d362f45622eaf5428dd71793 Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Wed, 19 Feb 2025 21:50:37 -0500
Subject: [PATCH] Implement F16 Support

Co-authored-by: FL33TW00D <fleetwoodpersonal@gmail.com>
Co-authored-by: ErichDonGubler <erichdongubler@gmail.com>
---
 Cargo.lock                                    |  24 +
 Cargo.toml                                    |   2 +
 naga/Cargo.toml                               |  11 +-
 naga/src/back/glsl/mod.rs                     |   3 +
 naga/src/back/hlsl/writer.rs                  |   1 +
 naga/src/back/msl/writer.rs                   |  33 +-
 naga/src/back/spv/block.rs                    | 386 +++++---
 naga/src/back/spv/instructions.rs             |   4 +
 naga/src/back/spv/writer.rs                   |  26 +
 naga/src/back/wgsl/writer.rs                  |  45 +
 naga/src/front/glsl/error.rs                  |  12 +-
 naga/src/front/glsl/offset.rs                 |  24 +-
 naga/src/front/glsl/types.rs                  |   5 +
 naga/src/front/spv/mod.rs                     |   5 +
 naga/src/front/wgsl/error.rs                  |  16 +-
 naga/src/front/wgsl/lower/mod.rs              |   1 +
 naga/src/front/wgsl/parse/conv.rs             |  27 +-
 .../wgsl/parse/directive/enable_extension.rs  |  45 +-
 naga/src/front/wgsl/parse/lexer.rs            |  40 +-
 naga/src/front/wgsl/parse/mod.rs              | 189 +++-
 naga/src/front/wgsl/parse/number.rs           |  23 +-
 naga/src/lib.rs                               |   2 +
 naga/src/proc/constant_evaluator.rs           |  91 +-
 naga/src/proc/mod.rs                          |   4 +
 naga/src/proc/type_methods.rs                 |   4 +
 naga/src/valid/expression.rs                  |   2 +-
 naga/src/valid/interface.rs                   |   7 +-
 naga/src/valid/mod.rs                         |   4 +-
 naga/src/valid/type.rs                        |  71 +-
 naga/tests/in/extra.wgsl                      |   2 +-
 naga/tests/in/f16.param.ron                   |  24 +
 naga/tests/in/f16.wgsl                        | 127 +++
 naga/tests/in/glsl/f16-glsl.comp              |  57 ++
 naga/tests/in/glsl/f16-glsl.param.ron         |   3 +
 naga/tests/in/spv/f16-spv.comp                |  57 ++
 naga/tests/in/spv/f16-spv.param.ron           |   3 +
 naga/tests/in/spv/f16-spv.spv                 | Bin 0 -> 2268 bytes
 naga/tests/in/spv/f16-spv.spvasm              | 130 +++
 naga/tests/out/hlsl/f16.hlsl                  | 351 +++++++
 naga/tests/out/hlsl/f16.ron                   |  12 +
 naga/tests/out/msl/extra.msl                  |   2 +-
 naga/tests/out/msl/f16.msl                    | 177 ++++
 .../spv/atomicCompareExchange-int64.spvasm    | 164 ++--
 naga/tests/out/spv/extra.spvasm               | 118 ++-
 naga/tests/out/spv/f16.spvasm                 | 633 +++++++++++++
 naga/tests/out/spv/image.spvasm               | 896 +++++++++---------
 naga/tests/out/wgsl/extra.wgsl                |   2 +-
 naga/tests/out/wgsl/f16-glsl.comp.wgsl        |  47 +
 naga/tests/out/wgsl/f16-spv.wgsl              |  46 +
 naga/tests/out/wgsl/f16.wgsl                  | 167 ++++
 naga/tests/spirv_capabilities.rs              |   5 +
 naga/tests/wgsl_errors.rs                     | 109 ++-
 tests/Cargo.toml                              |   1 +
 tests/tests/shader/mod.rs                     |   4 +-
 tests/tests/shader/struct_layout.rs           | 383 ++++++--
 wgpu-core/src/device/mod.rs                   |   4 +
 wgpu-hal/src/dx12/adapter.rs                  |  23 +-
 wgpu-hal/src/dx12/device.rs                   |   3 +
 wgpu-hal/src/dx12/mod.rs                      |   1 +
 wgpu-hal/src/dx12/shader_compilation.rs       |   6 +-
 wgpu-hal/src/vulkan/adapter.rs                |   8 +-
 61 files changed, 3746 insertions(+), 926 deletions(-)
 create mode 100644 naga/tests/in/f16.param.ron
 create mode 100644 naga/tests/in/f16.wgsl
 create mode 100644 naga/tests/in/glsl/f16-glsl.comp
 create mode 100644 naga/tests/in/glsl/f16-glsl.param.ron
 create mode 100644 naga/tests/in/spv/f16-spv.comp
 create mode 100644 naga/tests/in/spv/f16-spv.param.ron
 create mode 100644 naga/tests/in/spv/f16-spv.spv
 create mode 100644 naga/tests/in/spv/f16-spv.spvasm
 create mode 100644 naga/tests/out/hlsl/f16.hlsl
 create mode 100644 naga/tests/out/hlsl/f16.ron
 create mode 100644 naga/tests/out/msl/f16.msl
 create mode 100644 naga/tests/out/spv/f16.spvasm
 create mode 100644 naga/tests/out/wgsl/f16-glsl.comp.wgsl
 create mode 100644 naga/tests/out/wgsl/f16-spv.wgsl
 create mode 100644 naga/tests/out/wgsl/f16.wgsl

diff --git a/Cargo.lock b/Cargo.lock
index 5e2422cc54..84c28f3c33 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1757,6 +1757,20 @@ dependencies = [
  "crunchy",
 ]
 
+[[package]]
+name = "half-2"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864"
+dependencies = [
+ "arbitrary",
+ "bytemuck",
+ "cfg-if",
+ "crunchy",
+ "num-traits",
+ "serde",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -2137,6 +2151,12 @@ dependencies = [
  "windows-targets 0.48.5",
 ]
 
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
 [[package]]
 name = "libredox"
 version = "0.1.3"
@@ -2337,12 +2357,14 @@ dependencies = [
  "codespan-reporting",
  "diff",
  "env_logger",
+ "half-2",
  "hashbrown",
  "hexf-parse",
  "hlsl-snapshots",
  "indexmap",
  "itertools 0.13.0",
  "log",
+ "num-traits",
  "petgraph 0.7.1",
  "pp-rs",
  "ron",
@@ -2517,6 +2539,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
  "autocfg",
+ "libm",
 ]
 
 [[package]]
@@ -4704,6 +4727,7 @@ dependencies = [
  "env_logger",
  "futures-lite",
  "glam",
+ "half-2",
  "image",
  "itertools 0.13.0",
  "js-sys",
diff --git a/Cargo.toml b/Cargo.toml
index 61d9c74a3a..c86bef44af 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -100,6 +100,8 @@ flume = "0.11"
 futures-lite = "2"
 getrandom = "0.2"
 glam = "0.29"
+# TODO: Use `half` directly again after <https://github.com/starkat99/half-rs/pull/111> is released upstream.
+half = { package = "half-2", version = "2.4.1" }
 hashbrown = { version = "0.14.5", default-features = false, features = [
     "ahash",
     "inline-more",
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index e0f609e787..a84ab9d2ba 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -41,10 +41,17 @@ msl-out = []
 ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
 msl-out-if-target-apple = []
 
-serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"]
+serialize = [
+    "dep:serde",
+    "bitflags/serde",
+    "half/serde",
+    "hashbrown/serde",
+    "indexmap/serde",
+]
 deserialize = [
     "dep:serde",
     "bitflags/serde",
+    "half/serde",
     "hashbrown/serde",
     "indexmap/serde",
 ]
@@ -84,9 +91,11 @@ termcolor = { version = "1.4.1" }
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
 hashbrown.workspace = true
+half = { workspace = true, features = ["arbitrary", "num-traits"] }
 rustc-hash.workspace = true
 indexmap.workspace = true
 log = "0.4"
+num-traits = "0.2"
 strum = { workspace = true, optional = true }
 spirv = { version = "0.3", optional = true }
 thiserror.workspace = true
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index 0798fac82d..3c93084a2f 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -2669,6 +2669,9 @@ impl<'a, W: Write> Writer<'a, W> {
                     // decimal part even it's zero which is needed for a valid glsl float constant
                     crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?,
                     crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                    crate::Literal::F16(_) => {
+                        return Err(Error::Custom("GLSL has no 16-bit float type".into()));
+                    }
                     // Unsigned integers need a `u` at the end
                     //
                     // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index a8283388ce..eac1cae574 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -2634,6 +2634,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 // decimal part even it's zero
                 crate::Literal::F64(value) => write!(self.out, "{value:?}L")?,
                 crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                crate::Literal::F16(value) => write!(self.out, "{value:?}h")?,
                 crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                 // HLSL has no suffix for explicit i32 literals, but not using any suffix
                 // makes the type ambiguous which prevents overload resolution from
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index bc77a7f6d0..0fe26270f8 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -5,6 +5,8 @@ use crate::{
     proc::{self, index, ExpressionKindTracker, NameKey, TypeResolution},
     valid, FastHashMap, FastHashSet,
 };
+use half::f16;
+use num_traits::real::Real;
 #[cfg(test)]
 use std::ptr;
 use std::{
@@ -174,9 +176,11 @@ impl Display for TypeContext<'_> {
                 write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
             }
             crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
-            crate::TypeInner::Matrix { columns, rows, .. } => {
-                put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
-            }
+            crate::TypeInner::Matrix {
+                columns,
+                rows,
+                scalar,
+            } => put_numeric_type(out, scalar, &[rows, columns]),
             crate::TypeInner::Pointer { base, space } => {
                 let sub = Self {
                     handle: base,
@@ -413,8 +417,12 @@ impl crate::Scalar {
         match self {
             Self {
                 kind: Sk::Float,
-                width: _,
+                width: 4,
             } => "float",
+            Self {
+                kind: Sk::Float,
+                width: 2,
+            } => "half",
             Self {
                 kind: Sk::Sint,
                 width: 4,
@@ -471,7 +479,7 @@ fn should_pack_struct_member(
     match *ty_inner {
         crate::TypeInner::Vector {
             size: crate::VectorSize::Tri,
-            scalar: scalar @ crate::Scalar { width: 4, .. },
+            scalar: scalar @ crate::Scalar { width: 4 | 2, .. },
         } if is_tight => Some(scalar),
         _ => None,
     }
@@ -1446,6 +1454,21 @@ impl<W: Write> Writer<W> {
                 crate::Literal::F64(_) => {
                     return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
                 }
+                crate::Literal::F16(value) => {
+                    if value.is_infinite() {
+                        let sign = if value.is_sign_negative() { "-" } else { "" };
+                        write!(self.out, "{sign}INFINITY")?;
+                    } else if value.is_nan() {
+                        write!(self.out, "NAN")?;
+                    } else {
+                        let suffix = if value.fract() == f16::from_f32(0.0) {
+                            ".0h"
+                        } else {
+                            "h"
+                        };
+                        write!(self.out, "{value}{suffix}")?;
+                    }
+                }
                 crate::Literal::F32(value) => {
                     if value.is_infinite() {
                         let sign = if value.is_sign_negative() { "-" } else { "" };
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 6b26c1c2aa..9c85608b70 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -8,6 +8,7 @@ use super::{
     WriterFlags,
 };
 use crate::{arena::Handle, proc::index::GuardedIndex, Statement};
+use arrayvec::ArrayVec;
 use spirv::Word;
 
 fn get_dimension(type_inner: &crate::TypeInner) -> Dimension {
@@ -1476,159 +1477,7 @@ impl BlockContext<'_> {
                 expr,
                 kind,
                 convert,
-            } => {
-                use crate::ScalarKind as Sk;
-
-                let expr_id = self.cached[expr];
-                let (src_scalar, src_size, is_matrix) =
-                    match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) {
-                        crate::TypeInner::Scalar(scalar) => (scalar, None, false),
-                        crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false),
-                        crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true),
-                        ref other => {
-                            log::error!("As source {:?}", other);
-                            return Err(Error::Validation("Unexpected Expression::As source"));
-                        }
-                    };
-
-                enum Cast {
-                    Identity,
-                    Unary(spirv::Op),
-                    Binary(spirv::Op, Word),
-                    Ternary(spirv::Op, Word, Word),
-                }
-
-                let cast = if is_matrix {
-                    // we only support identity casts for matrices
-                    Cast::Unary(spirv::Op::CopyObject)
-                } else {
-                    match (src_scalar.kind, kind, convert) {
-                        // Filter out identity casts. Some Adreno drivers are
-                        // confused by no-op OpBitCast instructions.
-                        (src_kind, kind, convert)
-                            if src_kind == kind
-                                && convert.filter(|&width| width != src_scalar.width).is_none() =>
-                        {
-                            Cast::Identity
-                        }
-                        (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
-                        (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
-                        // casting to a bool - generate `OpXxxNotEqual`
-                        (_, Sk::Bool, Some(_)) => {
-                            let op = match src_scalar.kind {
-                                Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
-                                Sk::Float => spirv::Op::FUnordNotEqual,
-                                Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, src_scalar)?;
-                            let zero_id = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: src_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    self.writer.get_constant_composite(ty, &self.temp_list)
-                                }
-                                None => zero_scalar_id,
-                            };
-
-                            Cast::Binary(op, zero_id)
-                        }
-                        // casting from a bool - generate `OpSelect`
-                        (Sk::Bool, _, Some(dst_width)) => {
-                            let dst_scalar = crate::Scalar {
-                                kind,
-                                width: dst_width,
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, dst_scalar)?;
-                            let one_scalar_id =
-                                self.writer.get_constant_scalar_with(1, dst_scalar)?;
-                            let (accept_id, reject_id) = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: dst_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    let vec0_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    self.temp_list.fill(one_scalar_id);
-
-                                    let vec1_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    (vec1_id, vec0_id)
-                                }
-                                None => (one_scalar_id, zero_scalar_id),
-                            };
-
-                            Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
-                        }
-                        (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
-                        (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
-                        (Sk::Float, Sk::Float, Some(dst_width))
-                            if src_scalar.width != dst_width =>
-                        {
-                            Cast::Unary(spirv::Op::FConvert)
-                        }
-                        (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
-                        (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
-                        (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        // We assume it's either an identity cast, or int-uint.
-                        _ => Cast::Unary(spirv::Op::Bitcast),
-                    }
-                };
-
-                let id = self.gen_id();
-                let instruction = match cast {
-                    Cast::Identity => None,
-                    Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)),
-                    Cast::Binary(op, operand) => Some(Instruction::binary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        operand,
-                    )),
-                    Cast::Ternary(op, op1, op2) => Some(Instruction::ternary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        op1,
-                        op2,
-                    )),
-                };
-                if let Some(instruction) = instruction {
-                    block.body.push(instruction);
-                    id
-                } else {
-                    expr_id
-                }
-            }
+            } => self.write_as_expression(expr, convert, kind, block, result_type_id)?,
             crate::Expression::ImageLoad {
                 image,
                 coordinate,
@@ -1784,6 +1633,237 @@ impl BlockContext<'_> {
         Ok(())
     }
 
+    /// Helper which focuses on generating the `As` expressions and the various conversions
+    /// that need to happen because of that.
+    fn write_as_expression(
+        &mut self,
+        expr: Handle<crate::Expression>,
+        convert: Option<u8>,
+        kind: crate::ScalarKind,
+
+        block: &mut Block,
+        result_type_id: u32,
+    ) -> Result<u32, Error> {
+        use crate::ScalarKind as Sk;
+        let expr_id = self.cached[expr];
+        let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types);
+
+        // Matrix casts needs special treatment in SPIR-V, as the cast functions
+        // can take vectors or scalars, but not matrices. In order to cast a matrix
+        // we need to cast each column of the matrix individually and construct a new
+        // matrix from the converted columns.
+        if let crate::TypeInner::Matrix {
+            columns,
+            rows,
+            scalar,
+        } = *ty
+        {
+            let Some(convert) = convert else {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            };
+
+            if convert == scalar.width {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            }
+
+            if kind != Sk::Float {
+                // Only float conversions are supported for matrices.
+                return Err(Error::Validation("Matrices must be floats"));
+            }
+
+            // Type of each extracted column
+            let column_src_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar,
+                })));
+
+            // Type of the column after conversion
+            let column_dst_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar: crate::Scalar {
+                        kind,
+                        width: convert,
+                    },
+                })));
+
+            let mut components = ArrayVec::<Word, 4>::new();
+
+            for column in 0..columns as usize {
+                let column_id = self.gen_id();
+                block.body.push(Instruction::composite_extract(
+                    column_src_ty,
+                    column_id,
+                    expr_id,
+                    &[column as u32],
+                ));
+
+                let column_conv_id = self.gen_id();
+                block.body.push(Instruction::unary(
+                    spirv::Op::FConvert,
+                    column_dst_ty,
+                    column_conv_id,
+                    column_id,
+                ));
+
+                components.push(column_conv_id);
+            }
+
+            let construct_id = self.gen_id();
+
+            block.body.push(Instruction::composite_construct(
+                result_type_id,
+                construct_id,
+                &components,
+            ));
+
+            return Ok(construct_id);
+        }
+
+        let (src_scalar, src_size) = match *ty {
+            crate::TypeInner::Scalar(scalar) => (scalar, None),
+            crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)),
+            ref other => {
+                log::error!("As source {:?}", other);
+                return Err(Error::Validation("Unexpected Expression::As source"));
+            }
+        };
+
+        enum Cast {
+            Identity,
+            Unary(spirv::Op),
+            Binary(spirv::Op, Word),
+            Ternary(spirv::Op, Word, Word),
+        }
+        let cast = match (src_scalar.kind, kind, convert) {
+            // Filter out identity casts. Some Adreno drivers are
+            // confused by no-op OpBitCast instructions.
+            (src_kind, kind, convert)
+                if src_kind == kind
+                    && convert.filter(|&width| width != src_scalar.width).is_none() =>
+            {
+                Cast::Identity
+            }
+            (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
+            (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
+            // casting to a bool - generate `OpXxxNotEqual`
+            (_, Sk::Bool, Some(_)) => {
+                let op = match src_scalar.kind {
+                    Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
+                    Sk::Float => spirv::Op::FUnordNotEqual,
+                    Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?;
+                let zero_id = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: src_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        self.writer.get_constant_composite(ty, &self.temp_list)
+                    }
+                    None => zero_scalar_id,
+                };
+
+                Cast::Binary(op, zero_id)
+            }
+            // casting from a bool - generate `OpSelect`
+            (Sk::Bool, _, Some(dst_width)) => {
+                let dst_scalar = crate::Scalar {
+                    kind,
+                    width: dst_width,
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?;
+                let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?;
+                let (accept_id, reject_id) = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: dst_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        self.temp_list.fill(one_scalar_id);
+
+                        let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        (vec1_id, vec0_id)
+                    }
+                    None => (one_scalar_id, zero_scalar_id),
+                };
+
+                Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
+            }
+            (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
+            (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
+            (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::FConvert)
+            }
+            (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
+            (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
+            (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            // We assume it's either an identity cast, or int-uint.
+            _ => Cast::Unary(spirv::Op::Bitcast),
+        };
+        Ok(match cast {
+            Cast::Identity => expr_id,
+            Cast::Unary(op) => {
+                let id = self.gen_id();
+                block
+                    .body
+                    .push(Instruction::unary(op, result_type_id, id, expr_id));
+                id
+            }
+            Cast::Binary(op, operand) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::binary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    operand,
+                ));
+                id
+            }
+            Cast::Ternary(op, op1, op2) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::ternary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    op1,
+                    op2,
+                ));
+                id
+            }
+        })
+    }
+
     /// Build an `OpAccessChain` instruction.
     ///
     /// Emit any needed bounds-checking expressions to `block`.
diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs
index 57bfb4e9f3..bb4b4f98c1 100644
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -406,6 +406,10 @@ impl super::Instruction {
         instruction
     }
 
+    pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self {
+        Self::constant(result_type_id, id, &[low])
+    }
+
     pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self {
         Self::constant(result_type_id, id, &[value])
     }
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index cc0c227bec..56f9985790 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -1049,6 +1049,15 @@ impl Writer {
                 if bits == 64 {
                     self.capabilities_used.insert(spirv::Capability::Float64);
                 }
+                if bits == 16 {
+                    self.capabilities_used.insert(spirv::Capability::Float16);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageInputOutput16);
+                }
                 Instruction::type_float(id, bits)
             }
             Sk::Bool => Instruction::type_bool(id),
@@ -1117,6 +1126,19 @@ impl Writer {
                 )?;
                 self.use_extension("SPV_EXT_shader_atomic_float_add");
             }
+            // 16 bit floating-point support requires Float16 capability
+            crate::TypeInner::Matrix {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Vector {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Scalar(crate::Scalar::F16) => {
+                self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?;
+                self.use_extension("SPV_KHR_16bit_storage");
+            }
             _ => {}
         }
         Ok(())
@@ -1410,6 +1432,10 @@ impl Writer {
                 Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32)
             }
             crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
+            crate::Literal::F16(value) => {
+                let low = value.to_bits();
+                Instruction::constant_16bit(type_id, id, low as u32)
+            }
             crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
             crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
             crate::Literal::U64(value) => {
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index a7cd8f95c9..cbcf107985 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -1,3 +1,5 @@
+use hashbrown::HashSet;
+
 use super::Error;
 use crate::back::wgsl::polyfill::InversePolyfill;
 use crate::{
@@ -125,6 +127,9 @@ impl<W: Write> Writer<W> {
             }
         }
 
+        // Write all `enable` declarations
+        self.write_enable_declarations(module)?;
+
         // Write all structs
         for (handle, ty) in module.types.iter() {
             if let TypeInner::Struct { ref members, .. } = ty.inner {
@@ -217,6 +222,41 @@ impl<W: Write> Writer<W> {
         Ok(())
     }
 
+    /// Helper method which writes all the `enable` declarations
+    /// needed for a module.
+    fn write_enable_declarations(&mut self, module: &Module) -> BackendResult {
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+        enum WrittenDeclarations {
+            F16,
+        }
+
+        let mut written_declarations = HashSet::new();
+
+        // Write all the `enable` declarations
+        for (_, ty) in module.types.iter() {
+            match ty.inner {
+                TypeInner::Scalar(scalar)
+                | TypeInner::Vector { scalar, .. }
+                | TypeInner::Matrix { scalar, .. } => {
+                    if scalar == crate::Scalar::F16
+                        && !written_declarations.contains(&WrittenDeclarations::F16)
+                    {
+                        writeln!(self.out, "enable f16;")?;
+                        written_declarations.insert(WrittenDeclarations::F16);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        if !written_declarations.is_empty() {
+            // Empty line for readability
+            writeln!(self.out)?;
+        }
+
+        Ok(())
+    }
+
     /// Helper method used to write struct name
     ///
     /// # Notes
@@ -1262,6 +1302,7 @@ impl<W: Write> Writer<W> {
 
         match expressions[expr] {
             Expression::Literal(literal) => match literal {
+                crate::Literal::F16(value) => write!(self.out, "{value}h")?,
                 crate::Literal::F32(value) => write!(self.out, "{value}f")?,
                 crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                 crate::Literal::I32(value) => {
@@ -2026,6 +2067,10 @@ const fn scalar_kind_str(scalar: crate::Scalar) -> &'static str {
             kind: Sk::Float,
             width: 4,
         } => "f32",
+        Scalar {
+            kind: Sk::Float,
+            width: 2,
+        } => "f16",
         Scalar {
             kind: Sk::Sint,
             width: 4,
diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs
index 92962db00d..201ad5333d 100644
--- a/naga/src/front/glsl/error.rs
+++ b/naga/src/front/glsl/error.rs
@@ -102,9 +102,15 @@ pub enum ErrorKind {
     /// Unsupported matrix of the form matCx2
     ///
     /// Our IR expects matrices of the form matCx2 to have a stride of 8 however
-    /// matrices in the std140 layout have a stride of at least 16
-    #[error("unsupported matrix of the form matCx2 in std140 block layout")]
-    UnsupportedMatrixTypeInStd140,
+    /// matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 },
+    /// Unsupported matrix of the form f16matCxR
+    ///
+    /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count,
+    /// however matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 },
     /// A variable with the same name already exists in the current scope.
     #[error("Variable already declared: {0}")]
     VariableAlreadyDeclared(String),
diff --git a/naga/src/front/glsl/offset.rs b/naga/src/front/glsl/offset.rs
index 6e8d5ada10..d3d29ca428 100644
--- a/naga/src/front/glsl/offset.rs
+++ b/naga/src/front/glsl/offset.rs
@@ -120,11 +120,25 @@ pub fn calculate_offset(
             }
 
             // See comment on the error kind
-            if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi {
-                errors.push(Error {
-                    kind: ErrorKind::UnsupportedMatrixTypeInStd140,
-                    meta,
-                });
+            if StructLayout::Std140 == layout {
+                // Do the f16 test first, as it's more specific
+                if scalar == Scalar::F16 {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedF16MatrixInStd140 {
+                            columns: columns as u8,
+                            rows: rows as u8,
+                        },
+                        meta,
+                    });
+                }
+                if rows == crate::VectorSize::Bi {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 {
+                            columns: columns as u8,
+                        },
+                        meta,
+                    });
+                }
             }
 
             (align, align * columns as u32)
diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs
index ad5e188fd9..d4bd5a69fa 100644
--- a/naga/src/front/glsl/types.rs
+++ b/naga/src/front/glsl/types.rs
@@ -10,6 +10,10 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
             name: None,
             inner: TypeInner::Scalar(Scalar::BOOL),
         }),
+        "float16_t" => Some(Type {
+            name: None,
+            inner: TypeInner::Scalar(Scalar::F16),
+        }),
         "float" => Some(Type {
             name: None,
             inner: TypeInner::Scalar(Scalar::F32),
@@ -40,6 +44,7 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
                     "i" => Scalar::I32,
                     "u" => Scalar::U32,
                     "d" => Scalar::F64,
+                    "f16" => Scalar::F16,
                     _ => return None,
                 })
             }
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index b8087fc8b0..473011dbf2 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -36,6 +36,7 @@ mod null;
 use convert::*;
 pub use error::Error;
 use function::*;
+use half::f16;
 
 use crate::{
     arena::{Arena, Handle, UniqueArena},
@@ -80,6 +81,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
     "SPV_KHR_vulkan_memory_model",
     "SPV_KHR_multiview",
     "SPV_EXT_shader_atomic_float_add",
+    "SPV_KHR_16bit_storage",
 ];
 pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];
 
@@ -5598,6 +5600,9 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
             }) => {
                 let low = self.next()?;
                 match width {
+                    // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal
+                    // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word.
+                    2 => crate::Literal::F16(f16::from_bits(low as u16)),
                     4 => crate::Literal::F32(f32::from_bits(low)),
                     8 => {
                         inst.expect(5)?;
diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs
index 7bdbf12d2c..504c0a1f90 100644
--- a/naga/src/front/wgsl/error.rs
+++ b/naga/src/front/wgsl/error.rs
@@ -146,8 +146,6 @@ pub enum NumberError {
     Invalid,
     #[error("numeric literal not representable by target type")]
     NotRepresentable,
-    #[error("unimplemented f16 type")]
-    UnimplementedF16,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -1008,19 +1006,18 @@ impl<'a> Error<'a> {
                 )],
             },
             Error::EnableExtensionNotEnabled { kind, span } => ParseError {
-                message: format!("`{}` enable-extension is not enabled", kind.to_ident()),
+                message: format!("the `{}` language extension is not enabled", kind.to_ident()),
                 labels: vec![(
                     span,
                     format!(
                         concat!(
-                            "the `{}` enable-extension is needed for this functionality, ",
-                            "but it is not currently enabled"
+                            "the `{0}` language extension is needed for this functionality, ",
+                            "but it is not currently enabled."
                         ),
                         kind.to_ident()
                     )
                     .into(),
                 )],
-                #[allow(irrefutable_let_patterns)]
                 notes: if let EnableExtension::Unimplemented(kind) = kind {
                     vec![format!(
                         concat!(
@@ -1032,7 +1029,12 @@ impl<'a> Error<'a> {
                         kind.tracking_issue_num()
                     )]
                 } else {
-                    vec![]
+                    vec![
+                        format!(
+                            "You can enable this extension by adding `enable {};` at the top of the shader.",
+                            kind.to_ident()
+                        ),
+                    ]
                 },
             },
             Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError {
diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs
index 7cd3ef90f8..14b969a685 100644
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -1886,6 +1886,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
         let expr: Typed<crate::Expression> = match *expr {
             ast::Expression::Literal(literal) => {
                 let literal = match literal {
+                    ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f),
                     ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
                     ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
                     ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 00c19d877d..1e07ff39e6 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -1,4 +1,7 @@
 use super::Error;
+use crate::front::wgsl::parse::directive::enable_extension::{
+    EnableExtensions, ImplementedEnableExtension,
+};
 use crate::front::wgsl::Scalar;
 use crate::Span;
 
@@ -112,10 +115,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<crate::StorageFormat
     })
 }
 
-pub fn get_scalar_type(word: &str) -> Option<Scalar> {
+pub fn get_scalar_type(
+    enable_extensions: &EnableExtensions,
+    span: Span,
+    word: &str,
+) -> Result<Option<Scalar>, Error<'static>> {
     use crate::ScalarKind as Sk;
-    match word {
-        // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }),
+    let scalar = match word {
+        "f16" => Some(Scalar {
+            kind: Sk::Float,
+            width: 2,
+        }),
         "f32" => Some(Scalar {
             kind: Sk::Float,
             width: 4,
@@ -145,7 +155,18 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
             width: crate::BOOL_WIDTH,
         }),
         _ => None,
+    };
+
+    if matches!(scalar, Some(Scalar::F16))
+        && !enable_extensions.contains(ImplementedEnableExtension::F16)
+    {
+        return Err(Error::EnableExtensionNotEnabled {
+            span,
+            kind: ImplementedEnableExtension::F16.into(),
+        });
     }
+
+    Ok(scalar)
 }
 
 pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> {
diff --git a/naga/src/front/wgsl/parse/directive/enable_extension.rs b/naga/src/front/wgsl/parse/directive/enable_extension.rs
index 147ec0b5e0..3efa3b5283 100644
--- a/naga/src/front/wgsl/parse/directive/enable_extension.rs
+++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs
@@ -5,24 +5,29 @@ use crate::{front::wgsl::error::Error, Span};
 
 /// Tracks the status of every enable-extension known to Naga.
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub struct EnableExtensions {}
+pub struct EnableExtensions {
+    /// Whether `enable f16;` was written earlier in the shader module.
+    f16: bool,
+}
 
 impl EnableExtensions {
     pub(crate) const fn empty() -> Self {
-        Self {}
+        Self { f16: false }
     }
 
     /// Add an enable-extension to the set requested by a module.
-    #[allow(unreachable_code)]
     pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) {
-        let _field: &mut bool = match ext {};
-        *_field = true;
+        let field = match ext {
+            ImplementedEnableExtension::F16 => &mut self.f16,
+        };
+        *field = true;
     }
 
     /// Query whether an enable-extension tracked here has been requested.
-    #[allow(unused)]
     pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool {
-        match ext {}
+        match ext {
+            ImplementedEnableExtension::F16 => self.f16,
+        }
     }
 }
 
@@ -37,7 +42,6 @@ impl Default for EnableExtensions {
 /// WGSL spec.: <https://www.w3.org/TR/WGSL/#enable-extensions-sec>
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
 pub enum EnableExtension {
-    #[allow(unused)]
     Implemented(ImplementedEnableExtension),
     Unimplemented(UnimplementedEnableExtension),
 }
@@ -50,7 +54,7 @@ impl EnableExtension {
     /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible.
     pub(crate) fn from_ident(word: &str, span: Span) -> Result<Self, Error<'_>> {
         Ok(match word {
-            Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16),
+            Self::F16 => Self::Implemented(ImplementedEnableExtension::F16),
             Self::CLIP_DISTANCES => {
                 Self::Unimplemented(UnimplementedEnableExtension::ClipDistances)
             }
@@ -64,9 +68,10 @@ impl EnableExtension {
     /// Maps this [`EnableExtension`] into the sentinel word associated with it in WGSL.
     pub const fn to_ident(self) -> &'static str {
         match self {
-            Self::Implemented(kind) => match kind {},
+            Self::Implemented(kind) => match kind {
+                ImplementedEnableExtension::F16 => Self::F16,
+            },
             Self::Unimplemented(kind) => match kind {
-                UnimplementedEnableExtension::F16 => Self::F16,
                 UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES,
                 UnimplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING,
             },
@@ -76,17 +81,24 @@ impl EnableExtension {
 
 /// A variant of [`EnableExtension::Implemented`].
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub enum ImplementedEnableExtension {}
-
-/// A variant of [`EnableExtension::Unimplemented`].
-#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub enum UnimplementedEnableExtension {
+pub enum ImplementedEnableExtension {
     /// Enables `f16`/`half` primitive support in all shader languages.
     ///
     /// In the WGSL standard, this corresponds to [`enable f16;`].
     ///
     /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16
     F16,
+}
+
+impl From<ImplementedEnableExtension> for EnableExtension {
+    fn from(value: ImplementedEnableExtension) -> Self {
+        Self::Implemented(value)
+    }
+}
+
+/// A variant of [`EnableExtension::Unimplemented`].
+#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
+pub enum UnimplementedEnableExtension {
     /// Enables the `clip_distances` variable in WGSL.
     ///
     /// In the WGSL standard, this corresponds to [`enable clip_distances;`].
@@ -104,7 +116,6 @@ pub enum UnimplementedEnableExtension {
 impl UnimplementedEnableExtension {
     pub(crate) const fn tracking_issue_num(self) -> u16 {
         match self {
-            Self::F16 => 4384,
             Self::ClipDistances => 6236,
             Self::DualSourceBlending => 6402,
         }
diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs
index d55720972e..8d64f52ac6 100644
--- a/naga/src/front/wgsl/parse/lexer.rs
+++ b/naga/src/front/wgsl/parse/lexer.rs
@@ -408,14 +408,17 @@ impl<'a> Lexer<'a> {
     /// Parses a generic scalar type, for example `<f32>`.
     pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<Scalar, Error<'a>> {
         self.expect_generic_paren('<')?;
-        let pair = match self.next() {
+        let (scalar, _span) = match self.next() {
             (Token::Word(word), span) => {
-                conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
             }
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+            (_, span) => return Err(Error::UnknownScalarType(span)),
+        };
+
         self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok(scalar)
     }
 
     /// Parses a generic scalar type, for example `<f32>`.
@@ -425,14 +428,18 @@ impl<'a> Lexer<'a> {
         &mut self,
     ) -> Result<(Scalar, Span), Error<'a>> {
         self.expect_generic_paren('<')?;
-        let pair = match self.next() {
-            (Token::Word(word), span) => conv::get_scalar_type(word)
-                .map(|scalar| (scalar, span))
-                .ok_or(Error::UnknownScalarType(span)),
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+
+        let (scalar, span) = match self.next() {
+            (Token::Word(word), span) => {
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
+            }
+            (_, span) => return Err(Error::UnknownScalarType(span)),
+        };
+
         self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok((scalar, span))
     }
 
     pub(in crate::front::wgsl) fn next_storage_access(
@@ -493,6 +500,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) {
 
 #[test]
 fn test_numbers() {
+    use half::f16;
     // WGSL spec examples //
 
     // decimal integer
@@ -517,14 +525,16 @@ fn test_numbers() {
             Token::Number(Ok(Number::AbstractFloat(0.01))),
             Token::Number(Ok(Number::AbstractFloat(12.34))),
             Token::Number(Ok(Number::F32(0.))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
             Token::Number(Ok(Number::AbstractFloat(0.001))),
             Token::Number(Ok(Number::AbstractFloat(43.75))),
             Token::Number(Ok(Number::F32(16.))),
             Token::Number(Ok(Number::AbstractFloat(0.1875))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
             Token::Number(Ok(Number::AbstractFloat(0.12109375))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
         ],
     );
 
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index 23b2984e75..f4115014b8 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -3,9 +3,7 @@ use crate::diagnostic_filter::{
     ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule,
 };
 use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken};
-use crate::front::wgsl::parse::directive::enable_extension::{
-    EnableExtension, EnableExtensions, UnimplementedEnableExtension,
-};
+use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions};
 use crate::front::wgsl::parse::directive::language_extension::LanguageExtension;
 use crate::front::wgsl::parse::directive::DirectiveKind;
 use crate::front::wgsl::parse::lexer::{Lexer, Token};
@@ -346,7 +344,7 @@ impl Parser {
         span: Span,
         ctx: &mut ExpressionContext<'a, '_, '_>,
     ) -> Result<Option<ast::ConstructorType<'a>>, Error<'a>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
             return Ok(Some(ast::ConstructorType::Scalar(scalar)));
         }
 
@@ -375,6 +373,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec2h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "vec3" => ast::ConstructorType::PartialVector {
                 size: crate::VectorSize::Tri,
             },
@@ -399,6 +404,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec3h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "vec4" => ast::ConstructorType::PartialVector {
                 size: crate::VectorSize::Quad,
             },
@@ -423,6 +435,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec4h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Bi,
@@ -435,6 +454,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Tri,
@@ -447,6 +474,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Quad,
@@ -459,6 +494,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Bi,
@@ -471,6 +514,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Tri,
@@ -483,6 +534,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Quad,
@@ -495,6 +554,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Bi,
@@ -507,6 +574,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Tri,
@@ -519,6 +594,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Quad,
@@ -531,6 +614,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "array" => ast::ConstructorType::PartialArray,
             "atomic"
             | "binding_array"
@@ -725,15 +816,17 @@ impl Parser {
             }
             (Token::Number(res), span) => {
                 let _ = lexer.next();
-                let num = res.map_err(|err| match err {
-                    super::error::NumberError::UnimplementedF16 => {
-                        Error::EnableExtensionNotEnabled {
-                            kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16),
+                let num = res.map_err(|err| Error::BadNumber(span, err))?;
+
+                if let Some(enable_extension) = num.required_enable_extension() {
+                    if !lexer.enable_extensions.contains(enable_extension) {
+                        return Err(Error::EnableExtensionNotEnabled {
+                            kind: enable_extension.into(),
                             span,
-                        }
+                        });
                     }
-                    err => Error::BadNumber(span, err),
-                })?;
+                }
+
                 ast::Expression::Literal(ast::Literal::Number(num))
             }
             (Token::Word("RAY_FLAG_NONE"), _) => {
@@ -1268,9 +1361,10 @@ impl Parser {
         &mut self,
         lexer: &mut Lexer<'a>,
         word: &'a str,
+        span: Span,
         ctx: &mut ExpressionContext<'a, '_, '_>,
     ) -> Result<Option<ast::Type<'a>>, Error<'a>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
             return Ok(Some(ast::Type::Scalar(scalar)));
         }
 
@@ -1298,6 +1392,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec2h" => ast::Type::Vector {
+                size: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "vec3" => {
                 let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                 ast::Type::Vector {
@@ -1321,6 +1420,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec3h" => ast::Type::Vector {
+                size: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "vec4" => {
                 let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                 ast::Type::Vector {
@@ -1344,6 +1448,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec4h" => ast::Type::Vector {
+                size: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)?
             }
@@ -1353,6 +1462,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)?
             }
@@ -1362,6 +1477,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)?
             }
@@ -1371,6 +1492,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)?
             }
@@ -1380,6 +1507,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)?
             }
@@ -1389,6 +1522,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)?
             }
@@ -1398,6 +1537,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)?
             }
@@ -1407,6 +1552,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)?
             }
@@ -1416,6 +1567,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)?
             }
@@ -1425,6 +1582,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "atomic" => {
                 let scalar = lexer.next_scalar_generic()?;
                 ast::Type::Atomic(scalar)
@@ -1681,7 +1844,7 @@ impl Parser {
 
             let (name, span) = lexer.next_ident_with_span()?;
 
-            let ty = match this.type_decl_impl(lexer, name, ctx)? {
+            let ty = match this.type_decl_impl(lexer, name, span, ctx)? {
                 Some(ty) => ty,
                 None => {
                     ctx.unresolved.insert(ast::Dependency {
diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs
index 72795de6b4..87dfb0a02c 100644
--- a/naga/src/front/wgsl/parse/number.rs
+++ b/naga/src/front/wgsl/parse/number.rs
@@ -1,5 +1,7 @@
 use crate::front::wgsl::error::NumberError;
+use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension;
 use crate::front::wgsl::parse::lexer::Token;
+use half::f16;
 
 /// When using this type assume no Abstract Int/Float for now
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -16,12 +18,23 @@ pub enum Number {
     I64(i64),
     /// Concrete u64
     U64(u64),
+    /// Concrete f16
+    F16(f16),
     /// Concrete f32
     F32(f32),
     /// Concrete f64
     F64(f64),
 }
 
+impl Number {
+    pub(super) const fn required_enable_extension(&self) -> Option<ImplementedEnableExtension> {
+        match *self {
+            Number::F16(_) => Some(ImplementedEnableExtension::F16),
+            _ => None,
+        }
+    }
+}
+
 pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) {
     let (result, rest) = parse(input);
     (Token::Number(result), rest)
@@ -367,7 +380,8 @@ fn parse_hex_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
             // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
             _ => Err(NumberError::NotRepresentable),
         },
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        // TODO: f16 is not supported by hexf_parse
+        Some(FloatKind::F16) => Err(NumberError::NotRepresentable),
         Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) {
             Ok(num) => Ok(Number::F32(num)),
             // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
@@ -403,7 +417,12 @@ fn parse_dec_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
                 .then_some(Number::F64(num))
                 .ok_or(NumberError::NotRepresentable)
         }
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        Some(FloatKind::F16) => {
+            let num = input.parse::<f16>().unwrap(); // will never fail
+            num.is_finite()
+                .then_some(Number::F16(num))
+                .ok_or(NumberError::NotRepresentable)
+        }
     }
 }
 
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 2e917d34e0..d78bdfb7af 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -271,6 +271,7 @@ pub use crate::span::{SourceLocation, Span, SpanContext, WithSpan};
 #[cfg(feature = "arbitrary")]
 use arbitrary::Arbitrary;
 use diagnostic_filter::DiagnosticFilterNode;
+use half::f16;
 #[cfg(feature = "deserialize")]
 use serde::Deserialize;
 #[cfg(feature = "serialize")]
@@ -895,6 +896,7 @@ pub enum Literal {
     F64(f64),
     /// May not be NaN or infinity.
     F32(f32),
+    F16(f16),
     U32(u32),
     I32(i32),
     U64(u64),
diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index be7c503a01..ed713bf921 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -1,6 +1,8 @@
 use std::iter;
 
 use arrayvec::ArrayVec;
+use half::f16;
+use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero};
 
 use crate::{
     arena::{Arena, Handle, HandleVec, UniqueArena},
@@ -199,6 +201,7 @@ gen_component_wise_extractor! {
     literals: [
         AbstractFloat => AbstractFloat: f64,
         F32 => F32: f32,
+        F16 => F16: f16,
         AbstractInt => AbstractInt: i64,
         U32 => U32: u32,
         I32 => I32: i32,
@@ -219,6 +222,7 @@ gen_component_wise_extractor! {
     literals: [
         AbstractFloat => Abstract: f64,
         F32 => F32: f32,
+        F16 => F16: f16,
     ],
     scalar_kinds: [
         Float,
@@ -244,6 +248,7 @@ gen_component_wise_extractor! {
         AbstractFloat => AbstractFloat: f64,
         AbstractInt => AbstractInt: i64,
         F32 => F32: f32,
+        F16 => F16: f16,
         I32 => I32: i32,
     ],
     scalar_kinds: [
@@ -1103,6 +1108,7 @@ impl<'a> ConstantEvaluator<'a> {
                 component_wise_scalar(self, span, [arg], |args| match args {
                     Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])),
                     Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])),
+                    Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])),
                     Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
                     Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
                     Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
@@ -1134,9 +1140,13 @@ impl<'a> ConstantEvaluator<'a> {
                     }
                 )
             }
-            crate::MathFunction::Saturate => {
-                component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) })
-            }
+            crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e {
+                Float::F16([e]) => Ok(Float::F16(
+                    [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))],
+                )),
+                Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])),
+                Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])),
+            }),
 
             // trigonometry
             crate::MathFunction::Cos => {
@@ -1213,6 +1223,9 @@ impl<'a> ConstantEvaluator<'a> {
                 component_wise_float(self, span, [arg], |e| match e {
                     Float::Abstract([e]) => Ok(Float::Abstract([round_ties_even(e)])),
                     Float::F32([e]) => Ok(Float::F32([(round_ties_even(e as f64) as f32)])),
+                    Float::F16([e]) => {
+                        Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))]))
+                    }
                 })
             }
             crate::MathFunction::Fract => {
@@ -1258,15 +1271,27 @@ impl<'a> ConstantEvaluator<'a> {
                 )
             }
             crate::MathFunction::Step => {
-                component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| {
-                    Ok([if edge <= x { 1.0 } else { 0.0 }])
+                component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x {
+                    Float::Abstract([edge, x]) => {
+                        Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }]))
+                    }
+                    Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])),
+                    Float::F16([edge, x]) => Ok(Float::F16([if edge <= x {
+                        f16::one()
+                    } else {
+                        f16::zero()
+                    }])),
                 })
             }
             crate::MathFunction::Sqrt => {
                 component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) })
             }
             crate::MathFunction::InverseSqrt => {
-                component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) })
+                component_wise_float(self, span, [arg], |e| match e {
+                    Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])),
+                    Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])),
+                    Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])),
+                })
             }
 
             // bits
@@ -1561,6 +1586,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v,
                         Literal::U32(v) => v as i32,
                         Literal::F32(v) => v as i32,
+                        Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf
                         Literal::Bool(v) => v as i32,
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
@@ -1572,6 +1598,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v as u32,
                         Literal::U32(v) => v,
                         Literal::F32(v) => v as u32,
+                        Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf
                         Literal::Bool(v) => v as u32,
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
@@ -1587,6 +1614,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(v) => v as i64,
                         Literal::I64(v) => v,
                         Literal::U64(v) => v as i64,
+                        Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf
                         Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
                     }),
@@ -1598,9 +1626,22 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(v) => v as u64,
                         Literal::I64(v) => v as u64,
                         Literal::U64(v) => v,
+                        Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf
                         Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
                     }),
+                    Sc::F16 => Literal::F16(match literal {
+                        Literal::F16(v) => v,
+                        Literal::F32(v) => f16::from_f32(v),
+                        Literal::F64(v) => f16::from_f64(v),
+                        Literal::Bool(v) => f16::from_u32(v as u32).unwrap(),
+                        Literal::I64(v) => f16::from_i64(v).unwrap(),
+                        Literal::U64(v) => f16::from_u64(v).unwrap(),
+                        Literal::I32(v) => f16::from_i32(v).unwrap(),
+                        Literal::U32(v) => f16::from_u32(v).unwrap(),
+                        Literal::AbstractFloat(v) => f16::try_from_abstract(v)?,
+                        Literal::AbstractInt(v) => f16::try_from_abstract(v)?,
+                    }),
                     Sc::F32 => Literal::F32(match literal {
                         Literal::I32(v) => v as f32,
                         Literal::U32(v) => v as f32,
@@ -1609,12 +1650,14 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
                         }
+                        Literal::F16(v) => f16::to_f32(v),
                         Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => f32::try_from_abstract(v)?,
                     }),
                     Sc::F64 => Literal::F64(match literal {
                         Literal::I32(v) => v as f64,
                         Literal::U32(v) => v as f64,
+                        Literal::F16(v) => f16::to_f64(v),
                         Literal::F32(v) => v as f64,
                         Literal::F64(v) => v,
                         Literal::Bool(v) => v as u32 as f64,
@@ -1626,6 +1669,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v != 0,
                         Literal::U32(v) => v != 0,
                         Literal::F32(v) => v != 0.0,
+                        Literal::F16(v) => v != f16::zero(),
                         Literal::Bool(v) => v,
                         Literal::F64(_)
                         | Literal::I64(_)
@@ -1780,6 +1824,7 @@ impl<'a> ConstantEvaluator<'a> {
                     Literal::I32(v) => Literal::I32(v.wrapping_neg()),
                     Literal::I64(v) => Literal::I64(v.wrapping_neg()),
                     Literal::F32(v) => Literal::F32(-v),
+                    Literal::F16(v) => Literal::F16(-v),
                     Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()),
                     Literal::AbstractFloat(v) => Literal::AbstractFloat(-v),
                     _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg),
@@ -1928,6 +1973,14 @@ impl<'a> ConstantEvaluator<'a> {
                                 _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
                             })
                         }
+                        (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op {
+                            BinaryOperator::Add => a + b,
+                            BinaryOperator::Subtract => a - b,
+                            BinaryOperator::Multiply => a * b,
+                            BinaryOperator::Divide => a / b,
+                            BinaryOperator::Modulo => a % b,
+                            _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
+                        }),
                         (Literal::AbstractInt(a), Literal::AbstractInt(b)) => {
                             Literal::AbstractInt(match op {
                                 BinaryOperator::Add => a.checked_add(b).ok_or_else(|| {
@@ -2532,6 +2585,32 @@ impl TryFromAbstract<f64> for u64 {
     }
 }
 
+impl TryFromAbstract<f64> for f16 {
+    fn try_from_abstract(value: f64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_f64(value);
+        if f.is_infinite() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f)
+    }
+}
+
+impl TryFromAbstract<i64> for f16 {
+    fn try_from_abstract(value: i64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_i64(value);
+        if f.is_none() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f.unwrap())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::vec;
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index a6a19f70ed..ef095cff39 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -79,6 +79,7 @@ impl From<super::StorageFormat> for super::Scalar {
 pub enum HashableLiteral {
     F64(u64),
     F32(u32),
+    F16(u16),
     U32(u32),
     I32(i32),
     U64(u64),
@@ -93,6 +94,7 @@ impl From<crate::Literal> for HashableLiteral {
         match l {
             crate::Literal::F64(v) => Self::F64(v.to_bits()),
             crate::Literal::F32(v) => Self::F32(v.to_bits()),
+            crate::Literal::F16(v) => Self::F16(v.to_bits()),
             crate::Literal::U32(v) => Self::U32(v),
             crate::Literal::I32(v) => Self::I32(v),
             crate::Literal::U64(v) => Self::U64(v),
@@ -131,6 +133,7 @@ impl crate::Literal {
         match *self {
             Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
             Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
+            Self::F16(_) => 2,
             Self::Bool(_) => crate::BOOL_WIDTH,
             Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
         }
@@ -139,6 +142,7 @@ impl crate::Literal {
         match *self {
             Self::F64(_) => crate::Scalar::F64,
             Self::F32(_) => crate::Scalar::F32,
+            Self::F16(_) => crate::Scalar::F16,
             Self::U32(_) => crate::Scalar::U32,
             Self::I32(_) => crate::Scalar::I32,
             Self::U64(_) => crate::Scalar::U64,
diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs
index 351f4c5368..9b44f05db5 100644
--- a/naga/src/proc/type_methods.rs
+++ b/naga/src/proc/type_methods.rs
@@ -20,6 +20,10 @@ impl crate::ScalarKind {
 }
 
 impl crate::Scalar {
+    pub const F16: Self = Self {
+        kind: crate::ScalarKind::Float,
+        width: 2,
+    };
     pub const I32: Self = Self {
         kind: crate::ScalarKind::Sint,
         width: 4,
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index b0c54a3df4..15e01b661c 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1767,7 +1767,7 @@ impl super::Validator {
     }
 
     pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> {
-        self.check_width(literal.scalar())?;
+        let _ = self.check_width(literal.scalar())?;
         check_literal_value(literal)?;
 
         Ok(())
diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs
index a6fff61d82..3ae16b232b 100644
--- a/naga/src/valid/interface.rs
+++ b/naga/src/valid/interface.rs
@@ -1,6 +1,6 @@
 use super::{
     analyzer::{FunctionInfo, GlobalUse},
-    Capabilities, Disalignment, FunctionError, ModuleInfo,
+    Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError,
 };
 use crate::arena::{Handle, UniqueArena};
 
@@ -39,6 +39,8 @@ pub enum GlobalVariableError {
     InitializerNotAllowed(crate::AddressSpace),
     #[error("Storage address space doesn't support write-only access")]
     StorageAddressSpaceWriteOnlyNotSupported,
+    #[error("Type is not valid for use as a push constant")]
+    InvalidPushConstantType(#[source] PushConstantError),
 }
 
 #[derive(Clone, Debug, thiserror::Error)]
@@ -569,6 +571,9 @@ impl super::Validator {
                         Capabilities::PUSH_CONSTANT,
                     ));
                 }
+                if let Err(ref err) = type_info.push_constant_compatibility {
+                    return Err(GlobalVariableError::InvalidPushConstantType(err.clone()));
+                }
                 (
                     TypeFlags::DATA
                         | TypeFlags::COPY
diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs
index 8560404703..3ffed6a8e4 100644
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -28,7 +28,7 @@ pub use expression::{check_literal_value, LiteralError};
 pub use expression::{ConstExpressionError, ExpressionError};
 pub use function::{CallError, FunctionError, LocalVariableError};
 pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
-pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};
+pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError};
 
 use self::handles::InvalidHandleError;
 
@@ -158,6 +158,8 @@ bitflags::bitflags! {
         const TEXTURE_ATOMIC = 1 << 23;
         /// Support for atomic operations on 64-bit images.
         const TEXTURE_INT64_ATOMIC = 1 << 24;
+        /// Support for 16-bit floating-point types.
+        const SHADER_FLOAT16 = 1 << 25;
     }
 }
 
diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs
index 8c6825b842..837e00e239 100644
--- a/naga/src/valid/type.rs
+++ b/naga/src/valid/type.rs
@@ -167,8 +167,16 @@ pub enum WidthError {
     Abstract,
 }
 
+#[derive(Clone, Debug, thiserror::Error)]
+#[cfg_attr(test, derive(PartialEq))]
+pub enum PushConstantError {
+    #[error("The scalar type {0:?} is not supported in push constants")]
+    InvalidScalar(crate::Scalar),
+}
+
 // Only makes sense if `flags.contains(HOST_SHAREABLE)`
 type LayoutCompatibility = Result<Alignment, (Handle<crate::Type>, Disalignment)>;
+type PushConstantCompatibility = Result<(), PushConstantError>;
 
 fn check_member_layout(
     accum: &mut LayoutCompatibility,
@@ -219,6 +227,7 @@ pub(super) struct TypeInfo {
     pub flags: TypeFlags,
     pub uniform_layout: LayoutCompatibility,
     pub storage_layout: LayoutCompatibility,
+    pub push_constant_compatibility: PushConstantCompatibility,
 }
 
 impl TypeInfo {
@@ -227,6 +236,7 @@ impl TypeInfo {
             flags: TypeFlags::empty(),
             uniform_layout: Ok(Alignment::ONE),
             storage_layout: Ok(Alignment::ONE),
+            push_constant_compatibility: Ok(()),
         }
     }
 
@@ -235,6 +245,7 @@ impl TypeInfo {
             flags,
             uniform_layout: Ok(alignment),
             storage_layout: Ok(alignment),
+            push_constant_compatibility: Ok(()),
         }
     }
 }
@@ -248,11 +259,15 @@ impl super::Validator {
         }
     }
 
-    pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> {
+    pub(super) const fn check_width(
+        &self,
+        scalar: crate::Scalar,
+    ) -> Result<PushConstantCompatibility, WidthError> {
+        let mut push_constant_compatibility = Ok(());
         let good = match scalar.kind {
             crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH,
-            crate::ScalarKind::Float => {
-                if scalar.width == 8 {
+            crate::ScalarKind::Float => match scalar.width {
+                8 => {
                     if !self.capabilities.contains(Capabilities::FLOAT64) {
                         return Err(WidthError::MissingCapability {
                             name: "f64",
@@ -260,10 +275,21 @@ impl super::Validator {
                         });
                     }
                     true
-                } else {
-                    scalar.width == 4
                 }
-            }
+                2 => {
+                    if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) {
+                        return Err(WidthError::MissingCapability {
+                            name: "f16",
+                            flag: "FLOAT16",
+                        });
+                    }
+
+                    push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar));
+
+                    true
+                }
+                _ => scalar.width == 4,
+            },
             crate::ScalarKind::Sint => {
                 if scalar.width == 8 {
                     if !self.capabilities.contains(Capabilities::SHADER_INT64) {
@@ -295,7 +321,7 @@ impl super::Validator {
             }
         };
         if good {
-            Ok(())
+            Ok(push_constant_compatibility)
         } else {
             Err(WidthError::Invalid(scalar.kind, scalar.width))
         }
@@ -315,13 +341,13 @@ impl super::Validator {
         use crate::TypeInner as Ti;
         Ok(match gctx.types[handle].inner {
             Ti::Scalar(scalar) => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                 let shareable = if scalar.kind.is_numeric() {
                     TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                 } else {
                     TypeFlags::empty()
                 };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -330,16 +356,18 @@ impl super::Validator {
                         | TypeFlags::CREATION_RESOLVED
                         | shareable,
                     Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Vector { size, scalar } => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                 let shareable = if scalar.kind.is_numeric() {
                     TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                 } else {
                     TypeFlags::empty()
                 };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -348,7 +376,9 @@ impl super::Validator {
                         | TypeFlags::CREATION_RESOLVED
                         | shareable,
                     Alignment::from(size) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Matrix {
                 columns: _,
@@ -358,8 +388,8 @@ impl super::Validator {
                 if scalar.kind != crate::ScalarKind::Float {
                     return Err(TypeError::MatrixElementNotFloat);
                 }
-                self.check_width(scalar)?;
-                TypeInfo::new(
+                let push_constant_compatibility = self.check_width(scalar)?;
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -368,7 +398,9 @@ impl super::Validator {
                         | TypeFlags::CONSTRUCTIBLE
                         | TypeFlags::CREATION_RESOLVED,
                     Alignment::from(rows) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Atomic(scalar) => {
                 match scalar {
@@ -463,7 +495,7 @@ impl super::Validator {
                 // However, some cases are trivial: All our implicit base types
                 // are DATA and SIZED, so we can never return
                 // `InvalidPointerBase` or `InvalidPointerToUnsized`.
-                self.check_width(scalar)?;
+                let _ = self.check_width(scalar)?;
 
                 // `Validator::validate_function` actually checks the address
                 // space of pointer arguments explicitly before checking the
@@ -549,6 +581,7 @@ impl super::Validator {
                     flags: base_info.flags & type_info_mask,
                     uniform_layout,
                     storage_layout,
+                    push_constant_compatibility: base_info.push_constant_compatibility.clone(),
                 }
             }
             Ti::Struct { ref members, span } => {
@@ -629,6 +662,10 @@ impl super::Validator {
                         base_info.storage_layout,
                         handle,
                     );
+                    if base_info.push_constant_compatibility.is_err() {
+                        ti.push_constant_compatibility =
+                            base_info.push_constant_compatibility.clone();
+                    }
 
                     // Validate rule: If a structure member itself has a structure type S,
                     // then the number of bytes between the start of that member and
diff --git a/naga/tests/in/extra.wgsl b/naga/tests/in/extra.wgsl
index ef68f4aa80..1a7ab91ff5 100644
--- a/naga/tests/in/extra.wgsl
+++ b/naga/tests/in/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
     index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 var<push_constant> pc: PushConstants;
 
diff --git a/naga/tests/in/f16.param.ron b/naga/tests/in/f16.param.ron
new file mode 100644
index 0000000000..4a6f36e279
--- /dev/null
+++ b/naga/tests/in/f16.param.ron
@@ -0,0 +1,24 @@
+(
+	// No GLSL support for f16
+	targets: "SPIRV | METAL | HLSL | WGSL",
+	god_mode: true,
+	spv: (
+		version: (1, 0),
+	),
+	hlsl: (
+		shader_model: V6_2,
+		binding_map: {},
+		fake_missing_bindings: true,
+		special_constants_binding: Some((space: 1, register: 0)),
+		push_constants_target: Some((space: 0, register: 0)),
+		zero_initialize_workgroup_memory: true,
+	),
+	msl: (
+		lang_version: (1, 0),
+		per_entry_point_map: {},
+		inline_samplers: [],
+		spirv_cross_compatibility: false,
+		fake_missing_bindings: true,
+		zero_initialize_workgroup_memory: true,
+	),
+)
diff --git a/naga/tests/in/f16.wgsl b/naga/tests/in/f16.wgsl
new file mode 100644
index 0000000000..dbef9aedfc
--- /dev/null
+++ b/naga/tests/in/f16.wgsl
@@ -0,0 +1,127 @@
+enable f16;
+
+var<private> private_variable: f16 = 1h;
+const constant_variable: f16 = f16(15.2);
+
+struct UniformCompatible {
+   // Other types
+   val_u32: u32,
+   val_i32: i32,
+   val_f32: f32,
+
+   // f16
+   val_f16: f16,
+   val_f16_2: vec2<f16>,
+   val_f16_3: vec3<f16>,
+   val_f16_4: vec4<f16>,
+   final_value: f16,
+
+   val_mat2x2: mat2x2<f16>,
+   val_mat2x3: mat2x3<f16>,
+   val_mat2x4: mat2x4<f16>,
+   val_mat3x2: mat3x2<f16>,
+   val_mat3x3: mat3x3<f16>,
+   val_mat3x4: mat3x4<f16>,
+   val_mat4x2: mat4x2<f16>,
+   val_mat4x3: mat4x3<f16>,
+   val_mat4x4: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+   val_f16_array_2: array<f16, 2>,
+}
+
+struct LayoutTest {
+   scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32
+}
+
+@group(0) @binding(0)
+var<uniform> input_uniform: UniformCompatible;
+
+@group(0) @binding(1)
+var<storage> input_storage: UniformCompatible;
+
+@group(0) @binding(2)
+var<storage> input_arrays: StorageCompatible;
+
+@group(0) @binding(3)
+var<storage, read_write> output: UniformCompatible;
+
+@group(0) @binding(4)
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+   var val: f16 = f16(constant_variable);
+   // A number too big for f16
+   val += 1h - 33333h;
+   // Constructing an f16 from an AbstractInt
+   val += val + f16(5.);
+   // Constructing a f16 from other types and other types from f16.
+   val += f16(input_uniform.val_f32 + f32(val));
+   // Constructing a vec3<i64> from a i64
+   val += vec3<f16>(input_uniform.val_f16).z;
+
+   // Reading/writing to a uniform/storage buffer
+   output.val_f16 = input_uniform.val_f16 + input_storage.val_f16;
+   output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2;
+   output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3;
+   output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4;
+
+   output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2;
+   output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3;
+   output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4;
+   output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2;
+   output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3;
+   output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4;
+   output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2;
+   output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3;
+   output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4;
+
+   output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2;
+
+   // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
+   // constants based on the size of the type, and which are arguments.
+
+   // Numeric functions
+   val += abs(val);
+   val += clamp(val, val, val);
+   val += dot(vec2(val), vec2(val));
+   val += max(val, val);
+   val += min(val, val);
+   val += sign(val);
+   
+   val += f16(1.0);
+
+   // We use the shorthand aliases here to ensure the aliases
+   // work correctly.
+
+   // Cast vectors to/from f32
+   let float_vec2 = vec2f(input_uniform.val_f16_2);
+   output.val_f16_2 = vec2h(float_vec2);
+
+   let float_vec3 = vec3f(input_uniform.val_f16_3);
+   output.val_f16_3 = vec3h(float_vec3);
+
+   let float_vec4 = vec4f(input_uniform.val_f16_4);
+   output.val_f16_4 = vec4h(float_vec4);
+
+   // Cast matrices to/from f32
+   output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2));
+   output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3));
+   output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4));
+   output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2));
+   output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3));
+   output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4));
+   output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2));
+   output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3));
+   output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4));
+
+   // Make sure all the variables are used.
+   return val;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+   output.final_value = f16_function(2h);
+}
+
diff --git a/naga/tests/in/glsl/f16-glsl.comp b/naga/tests/in/glsl/f16-glsl.comp
new file mode 100644
index 0000000000..af8c89cb8a
--- /dev/null
+++ b/naga/tests/in/glsl/f16-glsl.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
diff --git a/naga/tests/in/glsl/f16-glsl.param.ron b/naga/tests/in/glsl/f16-glsl.param.ron
new file mode 100644
index 0000000000..c70ada9939
--- /dev/null
+++ b/naga/tests/in/glsl/f16-glsl.param.ron
@@ -0,0 +1,3 @@
+(
+    god_mode: true,
+)
\ No newline at end of file
diff --git a/naga/tests/in/spv/f16-spv.comp b/naga/tests/in/spv/f16-spv.comp
new file mode 100644
index 0000000000..af8c89cb8a
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
diff --git a/naga/tests/in/spv/f16-spv.param.ron b/naga/tests/in/spv/f16-spv.param.ron
new file mode 100644
index 0000000000..c70ada9939
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.param.ron
@@ -0,0 +1,3 @@
+(
+    god_mode: true,
+)
\ No newline at end of file
diff --git a/naga/tests/in/spv/f16-spv.spv b/naga/tests/in/spv/f16-spv.spv
new file mode 100644
index 0000000000000000000000000000000000000000..7e7f1763ef881940b76fbd21675bd01ed56055d7
GIT binary patch
literal 2268
zcmZ9M>rPWa6op4hxynuC;tdM*jy57lOpHd1rX<#Ykob3UdI6f0OQ4YY&qwf4d=MYV
zKPIm4oU=LOnPzwPUbEJoJ$t4t^pyvlyXHpSn5(#COu0TG&M|+TO6Fb7BW}>`?R@i}
zw|4#NM!ntj=iQUD=%6L;s2hki)!JUIa^7uL)?d9K+qf$*#cLC;sE>2~vZJVd3_d9x
zvC_vJ_QeYN^lARBpx+I<YRzwc`RET$FZ}so)NGyk!>F_G_d6$1S2S@(;8eG_tF=$Q
zaeAuSzIR%_(>`wbX8Www=(aCaXulf@c<tjx=b{-BieiNI6;;Qy@_&#~DL$j_R}~lI
zn0o8}vekGAC&8#a#?~^7PQ=)HhOt-OA4T0*I}qoiM+w%G8g7CQWVkY9xGpf^ul1BA
z%RO|3XWZc{toL7z5yRIlN3!Q=hNrb8=evG|JDs@TW)w5<=QVH2=odaCO8Z4jzoMXD
zVDu}-(td%_uNYIM<h`w!QI$Wfnf0x--{57wGz?=OFnVkl`+(6K!^i_R8Dq&Eqv0Nm
zLrkB;T=?jybt`=vXhI!6x^O=*bS0VTxZ6-ioY#S&<A-HBIH8Uh3+p6C_g-c?>Irqk
z@WMLr(e21g$NLF&#CaVUx<i@icweE8IA13ix*syr@%};`aehBAbU$UL<9&uY;=B$F
z-7i^H9peoz$#^#wFUx1}=IovFCh!^P8HVNGoN3@$Z@~&Out^zv@wVBUVX^H!tzQT4
zc1bF9__OkO59sHl=WN8R5wjODd}8i|n0GO+Z|gsNVtR<iVtRwd;xYN`K|c87n&DeK
zAs@cQ)CJ%6<NqDL#W&=`Cth$9@$Z@X?2cHYM(Uy#_GeG_AvgKRQBsDRnyHijgg0r-
zU4qRhp4D8EaW^YzJSU&ApssKq`Mt?|OL%hrBrh@d1jhQE)JJQ17v-}L>m|)_Ig_p5
zPchaXq&|JI^(XS_A?r`mb$YR(H%Bh|#enli2yc8--_&wfMtz*^t(eqxPX=zB1^M&_
z&O2oxhQoSE^OB4loP!+HzAOVbPFX%V;E;nD4(puhp^O}yl^mS!kqq28kL8mC4mpV7
hu+F(xWaOY9<e&%7WZ=emE}tB5$UzL}uWsPA>>v8>igW+~

literal 0
HcmV?d00001

diff --git a/naga/tests/in/spv/f16-spv.spvasm b/naga/tests/in/spv/f16-spv.spvasm
new file mode 100644
index 0000000000..806de32754
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.spvasm
@@ -0,0 +1,130 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 11
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageBuffer16BitAccess
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %B "B"
+               OpMemberName %B 0 "b_1"
+               OpMemberName %B 1 "b_vec2"
+               OpMemberName %B 2 "b_vec3"
+               OpMemberName %B 3 "b_vec4"
+               OpMemberName %B 4 "b_mat2"
+               OpMemberName %B 5 "b_mat2x3"
+               OpMemberName %B 6 "b_mat2x4"
+               OpMemberName %B 7 "b_mat3x2"
+               OpMemberName %B 8 "b_mat3"
+               OpMemberName %B 9 "b_mat3x4"
+               OpMemberName %B 10 "b_mat4x2"
+               OpMemberName %B 11 "b_mat4x3"
+               OpMemberName %B 12 "b_mat4"
+               OpName %_ ""
+               OpName %A "A"
+               OpMemberName %A 0 "a_1"
+               OpMemberName %A 1 "a_vec2"
+               OpMemberName %A 2 "a_vec3"
+               OpMemberName %A 3 "a_vec4"
+               OpName %__0 ""
+               OpDecorate %B BufferBlock
+               OpMemberDecorate %B 0 Offset 0
+               OpMemberDecorate %B 1 Offset 4
+               OpMemberDecorate %B 2 Offset 8
+               OpMemberDecorate %B 3 Offset 16
+               OpMemberDecorate %B 4 ColMajor
+               OpMemberDecorate %B 4 MatrixStride 4
+               OpMemberDecorate %B 4 Offset 24
+               OpMemberDecorate %B 5 ColMajor
+               OpMemberDecorate %B 5 MatrixStride 8
+               OpMemberDecorate %B 5 Offset 32
+               OpMemberDecorate %B 6 ColMajor
+               OpMemberDecorate %B 6 MatrixStride 8
+               OpMemberDecorate %B 6 Offset 48
+               OpMemberDecorate %B 7 ColMajor
+               OpMemberDecorate %B 7 MatrixStride 4
+               OpMemberDecorate %B 7 Offset 64
+               OpMemberDecorate %B 8 ColMajor
+               OpMemberDecorate %B 8 MatrixStride 8
+               OpMemberDecorate %B 8 Offset 80
+               OpMemberDecorate %B 9 ColMajor
+               OpMemberDecorate %B 9 MatrixStride 8
+               OpMemberDecorate %B 9 Offset 104
+               OpMemberDecorate %B 10 ColMajor
+               OpMemberDecorate %B 10 MatrixStride 4
+               OpMemberDecorate %B 10 Offset 128
+               OpMemberDecorate %B 11 ColMajor
+               OpMemberDecorate %B 11 MatrixStride 8
+               OpMemberDecorate %B 11 Offset 144
+               OpMemberDecorate %B 12 ColMajor
+               OpMemberDecorate %B 12 MatrixStride 8
+               OpMemberDecorate %B 12 Offset 176
+               OpDecorate %_ Binding 1
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %A Block
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 4
+               OpMemberDecorate %A 2 Offset 8
+               OpMemberDecorate %A 3 Offset 16
+               OpDecorate %__0 Binding 0
+               OpDecorate %__0 DescriptorSet 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half
+%_ptr_Uniform_B = OpTypePointer Uniform %B
+          %_ = OpVariable %_ptr_Uniform_B Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+          %A = OpTypeStruct %half %v2half %v3half %v4half
+%_ptr_Uniform_A = OpTypePointer Uniform %A
+        %__0 = OpVariable %_ptr_Uniform_A Uniform
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0
+         %29 = OpLoad %half %28
+         %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0
+               OpStore %30 %29
+         %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1
+         %34 = OpLoad %v2half %33
+         %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1
+               OpStore %35 %34
+         %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2
+         %39 = OpLoad %v3half %38
+         %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2
+               OpStore %40 %39
+         %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3
+         %44 = OpLoad %v4half %43
+         %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3
+               OpStore %45 %44
+               OpReturn
+               OpFunctionEnd
diff --git a/naga/tests/out/hlsl/f16.hlsl b/naga/tests/out/hlsl/f16.hlsl
new file mode 100644
index 0000000000..05e2387212
--- /dev/null
+++ b/naga/tests/out/hlsl/f16.hlsl
@@ -0,0 +1,351 @@
+struct NagaConstants {
+    int first_vertex;
+    int first_instance;
+    uint other;
+};
+ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    half2 val_f16_2_;
+    int _pad5_0;
+    half3 val_f16_3_;
+    half4 val_f16_4_;
+    half final_value;
+    half2 val_mat2x2__0; half2 val_mat2x2__1;
+    int _pad9_0;
+    row_major half2x3 val_mat2x3_;
+    row_major half2x4 val_mat2x4_;
+    half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2;
+    int _pad12_0;
+    row_major half3x3 val_mat3x3_;
+    row_major half3x4 val_mat3x4_;
+    half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3;
+    row_major half4x3 val_mat4x3_;
+    row_major half4x4 val_mat4x4_;
+};
+
+struct StorageCompatible {
+    half val_f16_array_2_[2];
+};
+
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    int _pad2_0;
+    half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    uint larger;
+};
+
+static const half constant_variable = 15.203125h;
+
+static half private_variable = 1.0h;
+cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
+ByteAddressBuffer input_storage : register(t1);
+ByteAddressBuffer input_arrays : register(t2);
+RWByteAddressBuffer output : register(u3);
+RWByteAddressBuffer output_arrays : register(u4);
+
+half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) {
+    return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1);
+}
+
+void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) {
+    obj.val_mat2x2__0 = mat[0];
+    obj.val_mat2x2__1 = mat[1];
+}
+
+void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0 = vec; break; }
+    case 1: { obj.val_mat2x2__1 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; }
+    }
+}
+
+half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) {
+    return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2);
+}
+
+void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) {
+    obj.val_mat3x2__0 = mat[0];
+    obj.val_mat3x2__1 = mat[1];
+    obj.val_mat3x2__2 = mat[2];
+}
+
+void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0 = vec; break; }
+    case 1: { obj.val_mat3x2__1 = vec; break; }
+    case 2: { obj.val_mat3x2__2 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; }
+    }
+}
+
+half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) {
+    return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3);
+}
+
+void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) {
+    obj.val_mat4x2__0 = mat[0];
+    obj.val_mat4x2__1 = mat[1];
+    obj.val_mat4x2__2 = mat[2];
+    obj.val_mat4x2__3 = mat[3];
+}
+
+void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0 = vec; break; }
+    case 1: { obj.val_mat4x2__1 = vec; break; }
+    case 2: { obj.val_mat4x2__2 = vec; break; }
+    case 3: { obj.val_mat4x2__3 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; }
+    case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; }
+    }
+}
+
+typedef half ret_Constructarray2_half_[2];
+ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) {
+    half ret[2] = { arg0, arg1 };
+    return ret;
+}
+
+half f16_function(half x)
+{
+    half val = 15.203125h;
+
+    half _e4 = val;
+    val = (_e4 + -33344.0h);
+    half _e6 = val;
+    half _e9 = val;
+    val = (_e9 + (_e6 + 5.0h));
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = (_e18 + half((_e13 + float(_e14))));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = (_e25 + (_e22).xxx.z);
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.Load<half>(12);
+    output.Store(12, (_e31 + _e34));
+    half2 _e40 = input_uniform.val_f16_2_;
+    half2 _e43 = input_storage.Load<half2>(16);
+    output.Store(16, (_e40 + _e43));
+    half3 _e49 = input_uniform.val_f16_3_;
+    half3 _e52 = input_storage.Load<half3>(24);
+    output.Store(24, (_e49 + _e52));
+    half4 _e58 = input_uniform.val_f16_4_;
+    half4 _e61 = input_storage.Load<half4>(32);
+    output.Store(32, (_e58 + _e61));
+    half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    half2x2 _e70 = half2x2(input_storage.Load<half2>(44+0), input_storage.Load<half2>(44+4));
+    {
+        half2x2 _value2 = (_e67 + _e70);
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e76 = input_uniform.val_mat2x3_;
+    half2x3 _e79 = half2x3(input_storage.Load<half3>(56+0), input_storage.Load<half3>(56+8));
+    {
+        half2x3 _value2 = (_e76 + _e79);
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e85 = input_uniform.val_mat2x4_;
+    half2x4 _e88 = half2x4(input_storage.Load<half4>(72+0), input_storage.Load<half4>(72+8));
+    {
+        half2x4 _value2 = (_e85 + _e88);
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    half3x2 _e97 = half3x2(input_storage.Load<half2>(88+0), input_storage.Load<half2>(88+4), input_storage.Load<half2>(88+8));
+    {
+        half3x2 _value2 = (_e94 + _e97);
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e103 = input_uniform.val_mat3x3_;
+    half3x3 _e106 = half3x3(input_storage.Load<half3>(104+0), input_storage.Load<half3>(104+8), input_storage.Load<half3>(104+16));
+    {
+        half3x3 _value2 = (_e103 + _e106);
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e112 = input_uniform.val_mat3x4_;
+    half3x4 _e115 = half3x4(input_storage.Load<half4>(128+0), input_storage.Load<half4>(128+8), input_storage.Load<half4>(128+16));
+    {
+        half3x4 _value2 = (_e112 + _e115);
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    half4x2 _e124 = half4x2(input_storage.Load<half2>(152+0), input_storage.Load<half2>(152+4), input_storage.Load<half2>(152+8), input_storage.Load<half2>(152+12));
+    {
+        half4x2 _value2 = (_e121 + _e124);
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e130 = input_uniform.val_mat4x3_;
+    half4x3 _e133 = half4x3(input_storage.Load<half3>(168+0), input_storage.Load<half3>(168+8), input_storage.Load<half3>(168+16), input_storage.Load<half3>(168+24));
+    {
+        half4x3 _value2 = (_e130 + _e133);
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e139 = input_uniform.val_mat4x4_;
+    half4x4 _e142 = half4x4(input_storage.Load<half4>(200+0), input_storage.Load<half4>(200+8), input_storage.Load<half4>(200+16), input_storage.Load<half4>(200+24));
+    {
+        half4x4 _value2 = (_e139 + _e142);
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e148[2] = Constructarray2_half_(input_arrays.Load<half>(0+0), input_arrays.Load<half>(0+2));
+    {
+        half _value2[2] = _e148;
+        output_arrays.Store(0+0, _value2[0]);
+        output_arrays.Store(0+2, _value2[1]);
+    }
+    half _e149 = val;
+    half _e151 = val;
+    val = (_e151 + abs(_e149));
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = (_e164 + dot((_e159).xx, (_e161).xx));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    half _e176 = val;
+    half _e178 = val;
+    val = (_e178 + sign(_e176));
+    half _e181 = val;
+    val = (_e181 + 1.0h);
+    half2 _e185 = input_uniform.val_f16_2_;
+    float2 float_vec2_ = float2(_e185);
+    output.Store(16, half2(float_vec2_));
+    half3 _e192 = input_uniform.val_f16_3_;
+    float3 float_vec3_ = float3(_e192);
+    output.Store(24, half3(float_vec3_));
+    half4 _e199 = input_uniform.val_f16_4_;
+    float4 float_vec4_ = float4(_e199);
+    output.Store(32, half4(float_vec4_));
+    half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    {
+        half2x2 _value2 = half2x2(float2x2(_e208));
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e215 = input_uniform.val_mat2x3_;
+    {
+        half2x3 _value2 = half2x3(float2x3(_e215));
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e222 = input_uniform.val_mat2x4_;
+    {
+        half2x4 _value2 = half2x4(float2x4(_e222));
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    {
+        half3x2 _value2 = half3x2(float3x2(_e229));
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e236 = input_uniform.val_mat3x3_;
+    {
+        half3x3 _value2 = half3x3(float3x3(_e236));
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e243 = input_uniform.val_mat3x4_;
+    {
+        half3x4 _value2 = half3x4(float3x4(_e243));
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    {
+        half4x2 _value2 = half4x2(float4x2(_e250));
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e257 = input_uniform.val_mat4x3_;
+    {
+        half4x3 _value2 = half4x3(float4x3(_e257));
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e264 = input_uniform.val_mat4x4_;
+    {
+        half4x4 _value2 = half4x4(float4x4(_e264));
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e267 = val;
+    return _e267;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const half _e3 = f16_function(2.0h);
+    output.Store(40, _e3);
+    return;
+}
diff --git a/naga/tests/out/hlsl/f16.ron b/naga/tests/out/hlsl/f16.ron
new file mode 100644
index 0000000000..b396a4626e
--- /dev/null
+++ b/naga/tests/out/hlsl/f16.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_2",
+        ),
+    ],
+)
diff --git a/naga/tests/out/msl/extra.msl b/naga/tests/out/msl/extra.msl
index 8288dfad92..4d6bb568f3 100644
--- a/naga/tests/out/msl/extra.msl
+++ b/naga/tests/out/msl/extra.msl
@@ -6,7 +6,7 @@ using metal::uint;
 
 struct PushConstants {
     uint index;
-    char _pad1[12];
+    char _pad1[4];
     metal::float2 double_;
 };
 struct FragmentIn {
diff --git a/naga/tests/out/msl/f16.msl b/naga/tests/out/msl/f16.msl
new file mode 100644
index 0000000000..40be58e901
--- /dev/null
+++ b/naga/tests/out/msl/f16.msl
@@ -0,0 +1,177 @@
+// language: metal1.0
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using metal::uint;
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    char _pad4[2];
+    metal::half2 val_f16_2_;
+    char _pad5[4];
+    metal::half3 val_f16_3_;
+    metal::half4 val_f16_4_;
+    half final_value;
+    char _pad8[2];
+    metal::half2x2 val_mat2x2_;
+    char _pad9[4];
+    metal::half2x3 val_mat2x3_;
+    metal::half2x4 val_mat2x4_;
+    metal::half3x2 val_mat3x2_;
+    char _pad12[4];
+    metal::half3x3 val_mat3x3_;
+    metal::half3x4 val_mat3x4_;
+    metal::half4x2 val_mat4x2_;
+    metal::half4x3 val_mat4x3_;
+    metal::half4x4 val_mat4x4_;
+};
+struct type_16 {
+    half inner[2];
+};
+struct StorageCompatible {
+    type_16 val_f16_array_2_;
+};
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    char _pad2[4];
+    metal::packed_half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    char _pad5[2];
+    uint larger;
+};
+constant half constant_variable = 15.203125h;
+
+half f16_function(
+    half x,
+    constant UniformCompatible& input_uniform,
+    device UniformCompatible const& input_storage,
+    device StorageCompatible const& input_arrays,
+    device UniformCompatible& output,
+    device StorageCompatible& output_arrays
+) {
+    half val = 15.203125h;
+    half _e4 = val;
+    val = _e4 + -33344.0h;
+    half _e6 = val;
+    half _e9 = val;
+    val = _e9 + (_e6 + 5.0h);
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = _e18 + static_cast<half>(_e13 + static_cast<float>(_e14));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = _e25 + metal::half3(_e22).z;
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.val_f16_;
+    output.val_f16_ = _e31 + _e34;
+    metal::half2 _e40 = input_uniform.val_f16_2_;
+    metal::half2 _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = _e40 + _e43;
+    metal::half3 _e49 = input_uniform.val_f16_3_;
+    metal::half3 _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = _e49 + _e52;
+    metal::half4 _e58 = input_uniform.val_f16_4_;
+    metal::half4 _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = _e58 + _e61;
+    metal::half2x2 _e67 = input_uniform.val_mat2x2_;
+    metal::half2x2 _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = _e67 + _e70;
+    metal::half2x3 _e76 = input_uniform.val_mat2x3_;
+    metal::half2x3 _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = _e76 + _e79;
+    metal::half2x4 _e85 = input_uniform.val_mat2x4_;
+    metal::half2x4 _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = _e85 + _e88;
+    metal::half3x2 _e94 = input_uniform.val_mat3x2_;
+    metal::half3x2 _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = _e94 + _e97;
+    metal::half3x3 _e103 = input_uniform.val_mat3x3_;
+    metal::half3x3 _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = _e103 + _e106;
+    metal::half3x4 _e112 = input_uniform.val_mat3x4_;
+    metal::half3x4 _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = _e112 + _e115;
+    metal::half4x2 _e121 = input_uniform.val_mat4x2_;
+    metal::half4x2 _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = _e121 + _e124;
+    metal::half4x3 _e130 = input_uniform.val_mat4x3_;
+    metal::half4x3 _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = _e130 + _e133;
+    metal::half4x4 _e139 = input_uniform.val_mat4x4_;
+    metal::half4x4 _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = _e139 + _e142;
+    type_16 _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    half _e149 = val;
+    half _e151 = val;
+    val = _e151 + metal::abs(_e149);
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = _e157 + metal::clamp(_e153, _e154, _e155);
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = _e169 + metal::max(_e166, _e167);
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = _e174 + metal::min(_e171, _e172);
+    half _e176 = val;
+    half _e178 = val;
+    val = _e178 + metal::sign(_e176);
+    half _e181 = val;
+    val = _e181 + 1.0h;
+    metal::half2 _e185 = input_uniform.val_f16_2_;
+    metal::float2 float_vec2_ = static_cast<metal::float2>(_e185);
+    output.val_f16_2_ = static_cast<metal::half2>(float_vec2_);
+    metal::half3 _e192 = input_uniform.val_f16_3_;
+    metal::float3 float_vec3_ = static_cast<metal::float3>(_e192);
+    output.val_f16_3_ = static_cast<metal::half3>(float_vec3_);
+    metal::half4 _e199 = input_uniform.val_f16_4_;
+    metal::float4 float_vec4_ = static_cast<metal::float4>(_e199);
+    output.val_f16_4_ = static_cast<metal::half4>(float_vec4_);
+    metal::half2x2 _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208));
+    metal::half2x3 _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215));
+    metal::half2x4 _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222));
+    metal::half3x2 _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229));
+    metal::half3x3 _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236));
+    metal::half3x4 _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243));
+    metal::half4x2 _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250));
+    metal::half4x3 _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257));
+    metal::half4x4 _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264));
+    half _e267 = val;
+    return _e267;
+}
+
+kernel void main_(
+  constant UniformCompatible& input_uniform [[user(fake0)]]
+, device UniformCompatible const& input_storage [[user(fake0)]]
+, device StorageCompatible const& input_arrays [[user(fake0)]]
+, device UniformCompatible& output [[user(fake0)]]
+, device StorageCompatible& output_arrays [[user(fake0)]]
+) {
+    half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays);
+    output.final_value = _e3;
+    return;
+}
diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
index f174ad3b38..7fa0c3f0de 100644
--- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
+++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 125
+; Bound: 123
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
@@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64"
-OpEntryPoint GLCompute %77 "test_atomic_compare_exchange_u64"
+OpEntryPoint GLCompute %76 "test_atomic_compare_exchange_u64"
 OpExecutionMode %19 LocalSize 1 1 1
-OpExecutionMode %77 LocalSize 1 1 1
+OpExecutionMode %76 LocalSize 1 1 1
 OpDecorate %5 ArrayStride 8
 OpDecorate %8 ArrayStride 8
 OpMemberDecorate %10 0 Offset 0
@@ -57,12 +57,12 @@ OpMemberDecorate %16 0 Offset 0
 %51 = OpTypeInt 32 1
 %50 = OpConstant  %51  1
 %52 = OpConstant  %3  64
-%78 = OpTypePointer StorageBuffer %8
-%80 = OpConstant  %7  10
-%83 = OpTypePointer Function %7
-%84 = OpConstantNull  %7
-%86 = OpConstantNull  %9
-%99 = OpTypePointer StorageBuffer %7
+%77 = OpTypePointer StorageBuffer %8
+%79 = OpConstant  %7  10
+%82 = OpTypePointer Function %7
+%83 = OpConstantNull  %7
+%85 = OpConstantNull  %9
+%98 = OpTypePointer StorageBuffer %7
 %19 = OpFunction  %2  None %20
 %18 = OpLabel
 %27 = OpVariable  %28  Function %22
@@ -106,16 +106,16 @@ OpBranch %61
 %61 = OpLabel
 %63 = OpLoad  %4  %29
 %64 = OpIAdd  %4  %63 %25
-%66 = OpLoad  %3  %27
-%67 = OpLoad  %4  %29
-%69 = OpAccessChain  %47  %23 %66
-%70 = OpAtomicCompareExchange  %4  %69 %50 %52 %52 %64 %67
-%71 = OpIEqual  %9  %70 %67
-%68 = OpCompositeConstruct  %10  %70 %71
-%72 = OpCompositeExtract  %4  %68 0
-OpStore %29 %72
-%73 = OpCompositeExtract  %9  %68 1
-OpStore %32 %73
+%65 = OpLoad  %3  %27
+%66 = OpLoad  %4  %29
+%68 = OpAccessChain  %47  %23 %65
+%69 = OpAtomicCompareExchange  %4  %68 %50 %52 %52 %64 %66
+%70 = OpIEqual  %9  %69 %66
+%67 = OpCompositeConstruct  %10  %69 %70
+%71 = OpCompositeExtract  %4  %67 0
+OpStore %29 %71
+%72 = OpCompositeExtract  %9  %67 1
+OpStore %32 %72
 OpBranch %62
 %62 = OpLabel
 OpBranch %56
@@ -126,80 +126,80 @@ OpBranch %45
 %45 = OpLabel
 OpBranch %39
 %39 = OpLabel
-%74 = OpLoad  %3  %27
-%75 = OpIAdd  %3  %74 %26
-OpStore %27 %75
+%73 = OpLoad  %3  %27
+%74 = OpIAdd  %3  %73 %26
+OpStore %27 %74
 OpBranch %36
 %37 = OpLabel
 OpReturn
 OpFunctionEnd
-%77 = OpFunction  %2  None %20
-%76 = OpLabel
-%81 = OpVariable  %28  Function %22
-%82 = OpVariable  %83  Function %84
-%85 = OpVariable  %33  Function %86
-%79 = OpAccessChain  %78  %15 %22
+%76 = OpFunction  %2  None %20
+%75 = OpLabel
+%80 = OpVariable  %28  Function %22
+%81 = OpVariable  %82  Function %83
+%84 = OpVariable  %33  Function %85
+%78 = OpAccessChain  %77  %15 %22
+OpBranch %86
+%86 = OpLabel
 OpBranch %87
 %87 = OpLabel
-OpBranch %88
-%88 = OpLabel
-OpLoopMerge %89 %91 None
-OpBranch %90
-%90 = OpLabel
-%92 = OpLoad  %3  %81
-%93 = OpULessThan  %9  %92 %6
-OpSelectionMerge %94 None
-OpBranchConditional %93 %94 %95
-%95 = OpLabel
+OpLoopMerge %88 %90 None
 OpBranch %89
+%89 = OpLabel
+%91 = OpLoad  %3  %80
+%92 = OpULessThan  %9  %91 %6
+OpSelectionMerge %93 None
+OpBranchConditional %92 %93 %94
 %94 = OpLabel
-OpBranch %96
-%96 = OpLabel
-%98 = OpLoad  %3  %81
-%100 = OpAccessChain  %99  %79 %98
-%101 = OpAtomicLoad  %7  %100 %50 %52
-OpStore %82 %101
-OpStore %85 %24
-OpBranch %102
-%102 = OpLabel
-OpLoopMerge %103 %105 None
-OpBranch %104
-%104 = OpLabel
-%106 = OpLoad  %9  %85
-%107 = OpLogicalNot  %9  %106
-OpSelectionMerge %108 None
-OpBranchConditional %107 %108 %109
-%109 = OpLabel
+OpBranch %88
+%93 = OpLabel
+OpBranch %95
+%95 = OpLabel
+%97 = OpLoad  %3  %80
+%99 = OpAccessChain  %98  %78 %97
+%100 = OpAtomicLoad  %7  %99 %50 %52
+OpStore %81 %100
+OpStore %84 %24
+OpBranch %101
+%101 = OpLabel
+OpLoopMerge %102 %104 None
 OpBranch %103
+%103 = OpLabel
+%105 = OpLoad  %9  %84
+%106 = OpLogicalNot  %9  %105
+OpSelectionMerge %107 None
+OpBranchConditional %106 %107 %108
 %108 = OpLabel
+OpBranch %102
+%107 = OpLabel
+OpBranch %109
+%109 = OpLabel
+%111 = OpLoad  %7  %81
+%112 = OpIAdd  %7  %111 %79
+%113 = OpLoad  %3  %80
+%114 = OpLoad  %7  %81
+%116 = OpAccessChain  %98  %78 %113
+%117 = OpAtomicCompareExchange  %7  %116 %50 %52 %52 %112 %114
+%118 = OpIEqual  %9  %117 %114
+%115 = OpCompositeConstruct  %11  %117 %118
+%119 = OpCompositeExtract  %7  %115 0
+OpStore %81 %119
+%120 = OpCompositeExtract  %9  %115 1
+OpStore %84 %120
 OpBranch %110
 %110 = OpLabel
-%112 = OpLoad  %7  %82
-%113 = OpIAdd  %7  %112 %80
-%115 = OpLoad  %3  %81
-%116 = OpLoad  %7  %82
-%118 = OpAccessChain  %99  %79 %115
-%119 = OpAtomicCompareExchange  %7  %118 %50 %52 %52 %113 %116
-%120 = OpIEqual  %9  %119 %116
-%117 = OpCompositeConstruct  %11  %119 %120
-%121 = OpCompositeExtract  %7  %117 0
-OpStore %82 %121
-%122 = OpCompositeExtract  %9  %117 1
-OpStore %85 %122
-OpBranch %111
-%111 = OpLabel
-OpBranch %105
-%105 = OpLabel
-OpBranch %102
-%103 = OpLabel
-OpBranch %97
-%97 = OpLabel
-OpBranch %91
-%91 = OpLabel
-%123 = OpLoad  %3  %81
-%124 = OpIAdd  %3  %123 %26
-OpStore %81 %124
-OpBranch %88
-%89 = OpLabel
+OpBranch %104
+%104 = OpLabel
+OpBranch %101
+%102 = OpLabel
+OpBranch %96
+%96 = OpLabel
+OpBranch %90
+%90 = OpLabel
+%121 = OpLoad  %3  %80
+%122 = OpIAdd  %3  %121 %26
+OpStore %80 %122
+OpBranch %87
+%88 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/extra.spvasm b/naga/tests/out/spv/extra.spvasm
index 9c434a8ce2..0e84427bad 100644
--- a/naga/tests/out/spv/extra.spvasm
+++ b/naga/tests/out/spv/extra.spvasm
@@ -1,76 +1,74 @@
 ; SPIR-V
 ; Version: 1.2
 ; Generator: rspirv
-; Bound: 48
+; Bound: 47
 OpCapability Shader
-OpCapability Float64
 OpCapability Geometry
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %23 "main" %15 %18 %21
-OpExecutionMode %23 OriginUpperLeft
+OpEntryPoint Fragment %22 "main" %14 %17 %20
+OpExecutionMode %22 OriginUpperLeft
 OpMemberDecorate %6 0 Offset 0
-OpMemberDecorate %6 1 Offset 16
-OpMemberDecorate %9 0 Offset 0
-OpMemberDecorate %9 1 Offset 16
-OpDecorate %11 Block
-OpMemberDecorate %11 0 Offset 0
-OpDecorate %15 Location 0
-OpDecorate %18 BuiltIn PrimitiveId
-OpDecorate %18 Flat
-OpDecorate %21 Location 0
+OpMemberDecorate %6 1 Offset 8
+OpMemberDecorate %8 0 Offset 0
+OpMemberDecorate %8 1 Offset 16
+OpDecorate %10 Block
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %14 Location 0
+OpDecorate %17 BuiltIn PrimitiveId
+OpDecorate %17 Flat
+OpDecorate %20 Location 0
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
-%5 = OpTypeFloat 64
+%5 = OpTypeFloat 32
 %4 = OpTypeVector %5 2
 %6 = OpTypeStruct %3 %4
-%8 = OpTypeFloat 32
-%7 = OpTypeVector %8 4
-%9 = OpTypeStruct %7 %3
-%11 = OpTypeStruct %6
-%12 = OpTypePointer PushConstant %11
-%10 = OpVariable  %12  PushConstant
-%16 = OpTypePointer Input %7
-%15 = OpVariable  %16  Input
-%19 = OpTypePointer Input %3
-%18 = OpVariable  %19  Input
-%22 = OpTypePointer Output %7
-%21 = OpVariable  %22  Output
-%24 = OpTypeFunction %2
-%25 = OpTypePointer PushConstant %6
-%26 = OpConstant  %3  0
-%28 = OpConstant  %8  1.0
-%29 = OpTypeVector %8 3
-%30 = OpConstantComposite  %29  %28 %28 %28
-%33 = OpTypePointer PushConstant %3
-%36 = OpTypeBool
-%23 = OpFunction  %2  None %24
-%13 = OpLabel
-%17 = OpLoad  %7  %15
-%20 = OpLoad  %3  %18
-%14 = OpCompositeConstruct  %9  %17 %20
-%27 = OpAccessChain  %25  %10 %26
-OpBranch %31
-%31 = OpLabel
-%32 = OpCompositeExtract  %3  %14 1
-%34 = OpAccessChain  %33  %27 %26
-%35 = OpLoad  %3  %34
-%37 = OpIEqual  %36  %32 %35
-OpSelectionMerge %38 None
-OpBranchConditional %37 %39 %40
-%39 = OpLabel
-%41 = OpCompositeExtract  %7  %14 0
-OpStore %21 %41
+%7 = OpTypeVector %5 4
+%8 = OpTypeStruct %7 %3
+%10 = OpTypeStruct %6
+%11 = OpTypePointer PushConstant %10
+%9 = OpVariable  %11  PushConstant
+%15 = OpTypePointer Input %7
+%14 = OpVariable  %15  Input
+%18 = OpTypePointer Input %3
+%17 = OpVariable  %18  Input
+%21 = OpTypePointer Output %7
+%20 = OpVariable  %21  Output
+%23 = OpTypeFunction %2
+%24 = OpTypePointer PushConstant %6
+%25 = OpConstant  %3  0
+%27 = OpConstant  %5  1.0
+%28 = OpTypeVector %5 3
+%29 = OpConstantComposite  %28  %27 %27 %27
+%32 = OpTypePointer PushConstant %3
+%35 = OpTypeBool
+%22 = OpFunction  %2  None %23
+%12 = OpLabel
+%16 = OpLoad  %7  %14
+%19 = OpLoad  %3  %17
+%13 = OpCompositeConstruct  %8  %16 %19
+%26 = OpAccessChain  %24  %9 %25
+OpBranch %30
+%30 = OpLabel
+%31 = OpCompositeExtract  %3  %13 1
+%33 = OpAccessChain  %32  %26 %25
+%34 = OpLoad  %3  %33
+%36 = OpIEqual  %35  %31 %34
+OpSelectionMerge %37 None
+OpBranchConditional %36 %38 %39
+%38 = OpLabel
+%40 = OpCompositeExtract  %7  %13 0
+OpStore %20 %40
 OpReturn
-%40 = OpLabel
-%42 = OpCompositeExtract  %7  %14 0
-%43 = OpVectorShuffle  %29  %42 %42 0 1 2
-%44 = OpFSub  %29  %30 %43
-%45 = OpCompositeExtract  %7  %14 0
-%46 = OpCompositeExtract  %8  %45 3
-%47 = OpCompositeConstruct  %7  %44 %46
-OpStore %21 %47
+%39 = OpLabel
+%41 = OpCompositeExtract  %7  %13 0
+%42 = OpVectorShuffle  %28  %41 %41 0 1 2
+%43 = OpFSub  %28  %29 %42
+%44 = OpCompositeExtract  %7  %13 0
+%45 = OpCompositeExtract  %5  %44 3
+%46 = OpCompositeConstruct  %7  %43 %45
+OpStore %20 %46
 OpReturn
-%38 = OpLabel
+%37 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/f16.spvasm b/naga/tests/out/spv/f16.spvasm
new file mode 100644
index 0000000000..3d8fcea7a7
--- /dev/null
+++ b/naga/tests/out/spv/f16.spvasm
@@ -0,0 +1,633 @@
+; SPIR-V
+; Version: 1.0
+; Generator: rspirv
+; Bound: 511
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability StorageInputOutput16
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %499 "main"
+OpExecutionMode %499 LocalSize 1 1 1
+OpMemberDecorate %19 0 Offset 0
+OpMemberDecorate %19 1 Offset 4
+OpMemberDecorate %19 2 Offset 8
+OpMemberDecorate %19 3 Offset 12
+OpMemberDecorate %19 4 Offset 16
+OpMemberDecorate %19 5 Offset 24
+OpMemberDecorate %19 6 Offset 32
+OpMemberDecorate %19 7 Offset 40
+OpMemberDecorate %19 8 Offset 44
+OpMemberDecorate %19 8 ColMajor
+OpMemberDecorate %19 8 MatrixStride 4
+OpMemberDecorate %19 9 Offset 56
+OpMemberDecorate %19 9 ColMajor
+OpMemberDecorate %19 9 MatrixStride 8
+OpMemberDecorate %19 10 Offset 72
+OpMemberDecorate %19 10 ColMajor
+OpMemberDecorate %19 10 MatrixStride 8
+OpMemberDecorate %19 11 Offset 88
+OpMemberDecorate %19 11 ColMajor
+OpMemberDecorate %19 11 MatrixStride 4
+OpMemberDecorate %19 12 Offset 104
+OpMemberDecorate %19 12 ColMajor
+OpMemberDecorate %19 12 MatrixStride 8
+OpMemberDecorate %19 13 Offset 128
+OpMemberDecorate %19 13 ColMajor
+OpMemberDecorate %19 13 MatrixStride 8
+OpMemberDecorate %19 14 Offset 152
+OpMemberDecorate %19 14 ColMajor
+OpMemberDecorate %19 14 MatrixStride 4
+OpMemberDecorate %19 15 Offset 168
+OpMemberDecorate %19 15 ColMajor
+OpMemberDecorate %19 15 MatrixStride 8
+OpMemberDecorate %19 16 Offset 200
+OpMemberDecorate %19 16 ColMajor
+OpMemberDecorate %19 16 MatrixStride 8
+OpDecorate %20 ArrayStride 2
+OpMemberDecorate %22 0 Offset 0
+OpMemberDecorate %23 0 Offset 0
+OpMemberDecorate %23 1 Offset 2
+OpMemberDecorate %23 2 Offset 8
+OpMemberDecorate %23 3 Offset 14
+OpMemberDecorate %23 4 Offset 16
+OpMemberDecorate %23 5 Offset 20
+OpDecorate %28 DescriptorSet 0
+OpDecorate %28 Binding 0
+OpDecorate %29 Block
+OpMemberDecorate %29 0 Offset 0
+OpDecorate %31 NonWritable
+OpDecorate %31 DescriptorSet 0
+OpDecorate %31 Binding 1
+OpDecorate %32 Block
+OpMemberDecorate %32 0 Offset 0
+OpDecorate %34 NonWritable
+OpDecorate %34 DescriptorSet 0
+OpDecorate %34 Binding 2
+OpDecorate %35 Block
+OpMemberDecorate %35 0 Offset 0
+OpDecorate %37 DescriptorSet 0
+OpDecorate %37 Binding 3
+OpDecorate %38 Block
+OpMemberDecorate %38 0 Offset 0
+OpDecorate %40 DescriptorSet 0
+OpDecorate %40 Binding 4
+OpDecorate %41 Block
+OpMemberDecorate %41 0 Offset 0
+%2 = OpTypeVoid
+%3 = OpTypeFloat 16
+%4 = OpTypeInt 32 0
+%5 = OpTypeInt 32 1
+%6 = OpTypeFloat 32
+%7 = OpTypeVector %3 2
+%8 = OpTypeVector %3 3
+%9 = OpTypeVector %3 4
+%10 = OpTypeMatrix %7 2
+%11 = OpTypeMatrix %8 2
+%12 = OpTypeMatrix %9 2
+%13 = OpTypeMatrix %7 3
+%14 = OpTypeMatrix %8 3
+%15 = OpTypeMatrix %9 3
+%16 = OpTypeMatrix %7 4
+%17 = OpTypeMatrix %8 4
+%18 = OpTypeMatrix %9 4
+%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18
+%21 = OpConstant  %4  2
+%20 = OpTypeArray %3 %21
+%22 = OpTypeStruct %20
+%23 = OpTypeStruct %3 %3 %8 %3 %3 %4
+%24 = OpConstant  %3  2.1524e-41
+%25 = OpConstant  %3  2.7121e-41
+%27 = OpTypePointer Private %3
+%26 = OpVariable  %27  Private %24
+%29 = OpTypeStruct %19
+%30 = OpTypePointer Uniform %29
+%28 = OpVariable  %30  Uniform
+%32 = OpTypeStruct %19
+%33 = OpTypePointer StorageBuffer %32
+%31 = OpVariable  %33  StorageBuffer
+%35 = OpTypeStruct %22
+%36 = OpTypePointer StorageBuffer %35
+%34 = OpVariable  %36  StorageBuffer
+%38 = OpTypeStruct %19
+%39 = OpTypePointer StorageBuffer %38
+%37 = OpVariable  %39  StorageBuffer
+%41 = OpTypeStruct %22
+%42 = OpTypePointer StorageBuffer %41
+%40 = OpVariable  %42  StorageBuffer
+%46 = OpTypeFunction %3 %3
+%47 = OpTypePointer Uniform %19
+%48 = OpConstant  %4  0
+%50 = OpTypePointer StorageBuffer %19
+%52 = OpTypePointer StorageBuffer %22
+%56 = OpConstant  %3  8.8991e-41
+%57 = OpConstant  %3  2.4753e-41
+%59 = OpTypePointer Function %3
+%67 = OpTypePointer Uniform %6
+%76 = OpTypePointer Uniform %3
+%77 = OpConstant  %4  3
+%84 = OpTypePointer StorageBuffer %3
+%91 = OpTypePointer StorageBuffer %7
+%92 = OpTypePointer Uniform %7
+%93 = OpConstant  %4  4
+%100 = OpTypePointer StorageBuffer %8
+%101 = OpTypePointer Uniform %8
+%102 = OpConstant  %4  5
+%109 = OpTypePointer StorageBuffer %9
+%110 = OpTypePointer Uniform %9
+%111 = OpConstant  %4  6
+%118 = OpTypePointer StorageBuffer %10
+%119 = OpTypePointer Uniform %10
+%120 = OpConstant  %4  8
+%133 = OpTypePointer StorageBuffer %11
+%134 = OpTypePointer Uniform %11
+%135 = OpConstant  %4  9
+%148 = OpTypePointer StorageBuffer %12
+%149 = OpTypePointer Uniform %12
+%150 = OpConstant  %4  10
+%163 = OpTypePointer StorageBuffer %13
+%164 = OpTypePointer Uniform %13
+%165 = OpConstant  %4  11
+%181 = OpTypePointer StorageBuffer %14
+%182 = OpTypePointer Uniform %14
+%183 = OpConstant  %4  12
+%199 = OpTypePointer StorageBuffer %15
+%200 = OpTypePointer Uniform %15
+%201 = OpConstant  %4  13
+%217 = OpTypePointer StorageBuffer %16
+%218 = OpTypePointer Uniform %16
+%219 = OpConstant  %4  14
+%238 = OpTypePointer StorageBuffer %17
+%239 = OpTypePointer Uniform %17
+%240 = OpConstant  %4  15
+%259 = OpTypePointer StorageBuffer %18
+%260 = OpTypePointer Uniform %18
+%261 = OpConstant  %4  16
+%280 = OpTypePointer StorageBuffer %20
+%319 = OpTypeVector %6 2
+%325 = OpTypeVector %6 3
+%331 = OpTypeVector %6 4
+%337 = OpTypeMatrix %319 2
+%351 = OpTypeMatrix %325 2
+%365 = OpTypeMatrix %331 2
+%379 = OpTypeMatrix %319 3
+%397 = OpTypeMatrix %325 3
+%415 = OpTypeMatrix %331 3
+%433 = OpTypeMatrix %319 4
+%455 = OpTypeMatrix %325 4
+%477 = OpTypeMatrix %331 4
+%500 = OpTypeFunction %2
+%506 = OpConstant  %3  2.2959e-41
+%509 = OpConstant  %4  7
+%45 = OpFunction  %3  None %46
+%44 = OpFunctionParameter  %3
+%43 = OpLabel
+%58 = OpVariable  %59  Function %25
+%49 = OpAccessChain  %47  %28 %48
+%51 = OpAccessChain  %50  %31 %48
+%53 = OpAccessChain  %52  %34 %48
+%54 = OpAccessChain  %50  %37 %48
+%55 = OpAccessChain  %52  %40 %48
+OpBranch %60
+%60 = OpLabel
+%61 = OpLoad  %3  %58
+%62 = OpFAdd  %3  %61 %56
+OpStore %58 %62
+%63 = OpLoad  %3  %58
+%64 = OpFAdd  %3  %63 %57
+%65 = OpLoad  %3  %58
+%66 = OpFAdd  %3  %65 %64
+OpStore %58 %66
+%68 = OpAccessChain  %67  %49 %21
+%69 = OpLoad  %6  %68
+%70 = OpLoad  %3  %58
+%71 = OpFConvert  %6  %70
+%72 = OpFAdd  %6  %69 %71
+%73 = OpFConvert  %3  %72
+%74 = OpLoad  %3  %58
+%75 = OpFAdd  %3  %74 %73
+OpStore %58 %75
+%78 = OpAccessChain  %76  %49 %77
+%79 = OpLoad  %3  %78
+%80 = OpCompositeConstruct  %8  %79 %79 %79
+%81 = OpCompositeExtract  %3  %80 2
+%82 = OpLoad  %3  %58
+%83 = OpFAdd  %3  %82 %81
+OpStore %58 %83
+%85 = OpAccessChain  %76  %49 %77
+%86 = OpLoad  %3  %85
+%87 = OpAccessChain  %84  %51 %77
+%88 = OpLoad  %3  %87
+%89 = OpFAdd  %3  %86 %88
+%90 = OpAccessChain  %84  %54 %77
+OpStore %90 %89
+%94 = OpAccessChain  %92  %49 %93
+%95 = OpLoad  %7  %94
+%96 = OpAccessChain  %91  %51 %93
+%97 = OpLoad  %7  %96
+%98 = OpFAdd  %7  %95 %97
+%99 = OpAccessChain  %91  %54 %93
+OpStore %99 %98
+%103 = OpAccessChain  %101  %49 %102
+%104 = OpLoad  %8  %103
+%105 = OpAccessChain  %100  %51 %102
+%106 = OpLoad  %8  %105
+%107 = OpFAdd  %8  %104 %106
+%108 = OpAccessChain  %100  %54 %102
+OpStore %108 %107
+%112 = OpAccessChain  %110  %49 %111
+%113 = OpLoad  %9  %112
+%114 = OpAccessChain  %109  %51 %111
+%115 = OpLoad  %9  %114
+%116 = OpFAdd  %9  %113 %115
+%117 = OpAccessChain  %109  %54 %111
+OpStore %117 %116
+%121 = OpAccessChain  %119  %49 %120
+%122 = OpLoad  %10  %121
+%123 = OpAccessChain  %118  %51 %120
+%124 = OpLoad  %10  %123
+%126 = OpCompositeExtract  %7  %122 0
+%127 = OpCompositeExtract  %7  %124 0
+%128 = OpFAdd  %7  %126 %127
+%129 = OpCompositeExtract  %7  %122 1
+%130 = OpCompositeExtract  %7  %124 1
+%131 = OpFAdd  %7  %129 %130
+%125 = OpCompositeConstruct  %10  %128 %131
+%132 = OpAccessChain  %118  %54 %120
+OpStore %132 %125
+%136 = OpAccessChain  %134  %49 %135
+%137 = OpLoad  %11  %136
+%138 = OpAccessChain  %133  %51 %135
+%139 = OpLoad  %11  %138
+%141 = OpCompositeExtract  %8  %137 0
+%142 = OpCompositeExtract  %8  %139 0
+%143 = OpFAdd  %8  %141 %142
+%144 = OpCompositeExtract  %8  %137 1
+%145 = OpCompositeExtract  %8  %139 1
+%146 = OpFAdd  %8  %144 %145
+%140 = OpCompositeConstruct  %11  %143 %146
+%147 = OpAccessChain  %133  %54 %135
+OpStore %147 %140
+%151 = OpAccessChain  %149  %49 %150
+%152 = OpLoad  %12  %151
+%153 = OpAccessChain  %148  %51 %150
+%154 = OpLoad  %12  %153
+%156 = OpCompositeExtract  %9  %152 0
+%157 = OpCompositeExtract  %9  %154 0
+%158 = OpFAdd  %9  %156 %157
+%159 = OpCompositeExtract  %9  %152 1
+%160 = OpCompositeExtract  %9  %154 1
+%161 = OpFAdd  %9  %159 %160
+%155 = OpCompositeConstruct  %12  %158 %161
+%162 = OpAccessChain  %148  %54 %150
+OpStore %162 %155
+%166 = OpAccessChain  %164  %49 %165
+%167 = OpLoad  %13  %166
+%168 = OpAccessChain  %163  %51 %165
+%169 = OpLoad  %13  %168
+%171 = OpCompositeExtract  %7  %167 0
+%172 = OpCompositeExtract  %7  %169 0
+%173 = OpFAdd  %7  %171 %172
+%174 = OpCompositeExtract  %7  %167 1
+%175 = OpCompositeExtract  %7  %169 1
+%176 = OpFAdd  %7  %174 %175
+%177 = OpCompositeExtract  %7  %167 2
+%178 = OpCompositeExtract  %7  %169 2
+%179 = OpFAdd  %7  %177 %178
+%170 = OpCompositeConstruct  %13  %173 %176 %179
+%180 = OpAccessChain  %163  %54 %165
+OpStore %180 %170
+%184 = OpAccessChain  %182  %49 %183
+%185 = OpLoad  %14  %184
+%186 = OpAccessChain  %181  %51 %183
+%187 = OpLoad  %14  %186
+%189 = OpCompositeExtract  %8  %185 0
+%190 = OpCompositeExtract  %8  %187 0
+%191 = OpFAdd  %8  %189 %190
+%192 = OpCompositeExtract  %8  %185 1
+%193 = OpCompositeExtract  %8  %187 1
+%194 = OpFAdd  %8  %192 %193
+%195 = OpCompositeExtract  %8  %185 2
+%196 = OpCompositeExtract  %8  %187 2
+%197 = OpFAdd  %8  %195 %196
+%188 = OpCompositeConstruct  %14  %191 %194 %197
+%198 = OpAccessChain  %181  %54 %183
+OpStore %198 %188
+%202 = OpAccessChain  %200  %49 %201
+%203 = OpLoad  %15  %202
+%204 = OpAccessChain  %199  %51 %201
+%205 = OpLoad  %15  %204
+%207 = OpCompositeExtract  %9  %203 0
+%208 = OpCompositeExtract  %9  %205 0
+%209 = OpFAdd  %9  %207 %208
+%210 = OpCompositeExtract  %9  %203 1
+%211 = OpCompositeExtract  %9  %205 1
+%212 = OpFAdd  %9  %210 %211
+%213 = OpCompositeExtract  %9  %203 2
+%214 = OpCompositeExtract  %9  %205 2
+%215 = OpFAdd  %9  %213 %214
+%206 = OpCompositeConstruct  %15  %209 %212 %215
+%216 = OpAccessChain  %199  %54 %201
+OpStore %216 %206
+%220 = OpAccessChain  %218  %49 %219
+%221 = OpLoad  %16  %220
+%222 = OpAccessChain  %217  %51 %219
+%223 = OpLoad  %16  %222
+%225 = OpCompositeExtract  %7  %221 0
+%226 = OpCompositeExtract  %7  %223 0
+%227 = OpFAdd  %7  %225 %226
+%228 = OpCompositeExtract  %7  %221 1
+%229 = OpCompositeExtract  %7  %223 1
+%230 = OpFAdd  %7  %228 %229
+%231 = OpCompositeExtract  %7  %221 2
+%232 = OpCompositeExtract  %7  %223 2
+%233 = OpFAdd  %7  %231 %232
+%234 = OpCompositeExtract  %7  %221 3
+%235 = OpCompositeExtract  %7  %223 3
+%236 = OpFAdd  %7  %234 %235
+%224 = OpCompositeConstruct  %16  %227 %230 %233 %236
+%237 = OpAccessChain  %217  %54 %219
+OpStore %237 %224
+%241 = OpAccessChain  %239  %49 %240
+%242 = OpLoad  %17  %241
+%243 = OpAccessChain  %238  %51 %240
+%244 = OpLoad  %17  %243
+%246 = OpCompositeExtract  %8  %242 0
+%247 = OpCompositeExtract  %8  %244 0
+%248 = OpFAdd  %8  %246 %247
+%249 = OpCompositeExtract  %8  %242 1
+%250 = OpCompositeExtract  %8  %244 1
+%251 = OpFAdd  %8  %249 %250
+%252 = OpCompositeExtract  %8  %242 2
+%253 = OpCompositeExtract  %8  %244 2
+%254 = OpFAdd  %8  %252 %253
+%255 = OpCompositeExtract  %8  %242 3
+%256 = OpCompositeExtract  %8  %244 3
+%257 = OpFAdd  %8  %255 %256
+%245 = OpCompositeConstruct  %17  %248 %251 %254 %257
+%258 = OpAccessChain  %238  %54 %240
+OpStore %258 %245
+%262 = OpAccessChain  %260  %49 %261
+%263 = OpLoad  %18  %262
+%264 = OpAccessChain  %259  %51 %261
+%265 = OpLoad  %18  %264
+%267 = OpCompositeExtract  %9  %263 0
+%268 = OpCompositeExtract  %9  %265 0
+%269 = OpFAdd  %9  %267 %268
+%270 = OpCompositeExtract  %9  %263 1
+%271 = OpCompositeExtract  %9  %265 1
+%272 = OpFAdd  %9  %270 %271
+%273 = OpCompositeExtract  %9  %263 2
+%274 = OpCompositeExtract  %9  %265 2
+%275 = OpFAdd  %9  %273 %274
+%276 = OpCompositeExtract  %9  %263 3
+%277 = OpCompositeExtract  %9  %265 3
+%278 = OpFAdd  %9  %276 %277
+%266 = OpCompositeConstruct  %18  %269 %272 %275 %278
+%279 = OpAccessChain  %259  %54 %261
+OpStore %279 %266
+%281 = OpAccessChain  %280  %53 %48
+%282 = OpLoad  %20  %281
+%283 = OpAccessChain  %280  %55 %48
+OpStore %283 %282
+%284 = OpLoad  %3  %58
+%285 = OpExtInst  %3  %1 FAbs %284
+%286 = OpLoad  %3  %58
+%287 = OpFAdd  %3  %286 %285
+OpStore %58 %287
+%288 = OpLoad  %3  %58
+%289 = OpLoad  %3  %58
+%290 = OpLoad  %3  %58
+%291 = OpExtInst  %3  %1 FClamp %288 %289 %290
+%292 = OpLoad  %3  %58
+%293 = OpFAdd  %3  %292 %291
+OpStore %58 %293
+%294 = OpLoad  %3  %58
+%295 = OpCompositeConstruct  %7  %294 %294
+%296 = OpLoad  %3  %58
+%297 = OpCompositeConstruct  %7  %296 %296
+%298 = OpDot  %3  %295 %297
+%299 = OpLoad  %3  %58
+%300 = OpFAdd  %3  %299 %298
+OpStore %58 %300
+%301 = OpLoad  %3  %58
+%302 = OpLoad  %3  %58
+%303 = OpExtInst  %3  %1 FMax %301 %302
+%304 = OpLoad  %3  %58
+%305 = OpFAdd  %3  %304 %303
+OpStore %58 %305
+%306 = OpLoad  %3  %58
+%307 = OpLoad  %3  %58
+%308 = OpExtInst  %3  %1 FMin %306 %307
+%309 = OpLoad  %3  %58
+%310 = OpFAdd  %3  %309 %308
+OpStore %58 %310
+%311 = OpLoad  %3  %58
+%312 = OpExtInst  %3  %1 FSign %311
+%313 = OpLoad  %3  %58
+%314 = OpFAdd  %3  %313 %312
+OpStore %58 %314
+%315 = OpLoad  %3  %58
+%316 = OpFAdd  %3  %315 %24
+OpStore %58 %316
+%317 = OpAccessChain  %92  %49 %93
+%318 = OpLoad  %7  %317
+%320 = OpFConvert  %319  %318
+%321 = OpFConvert  %7  %320
+%322 = OpAccessChain  %91  %54 %93
+OpStore %322 %321
+%323 = OpAccessChain  %101  %49 %102
+%324 = OpLoad  %8  %323
+%326 = OpFConvert  %325  %324
+%327 = OpFConvert  %8  %326
+%328 = OpAccessChain  %100  %54 %102
+OpStore %328 %327
+%329 = OpAccessChain  %110  %49 %111
+%330 = OpLoad  %9  %329
+%332 = OpFConvert  %331  %330
+%333 = OpFConvert  %9  %332
+%334 = OpAccessChain  %109  %54 %111
+OpStore %334 %333
+%335 = OpAccessChain  %119  %49 %120
+%336 = OpLoad  %10  %335
+%338 = OpCompositeExtract  %7  %336 0
+%339 = OpFConvert  %319  %338
+%340 = OpCompositeExtract  %7  %336 1
+%341 = OpFConvert  %319  %340
+%342 = OpCompositeConstruct  %337  %339 %341
+%343 = OpCompositeExtract  %319  %342 0
+%344 = OpFConvert  %7  %343
+%345 = OpCompositeExtract  %319  %342 1
+%346 = OpFConvert  %7  %345
+%347 = OpCompositeConstruct  %10  %344 %346
+%348 = OpAccessChain  %118  %54 %120
+OpStore %348 %347
+%349 = OpAccessChain  %134  %49 %135
+%350 = OpLoad  %11  %349
+%352 = OpCompositeExtract  %8  %350 0
+%353 = OpFConvert  %325  %352
+%354 = OpCompositeExtract  %8  %350 1
+%355 = OpFConvert  %325  %354
+%356 = OpCompositeConstruct  %351  %353 %355
+%357 = OpCompositeExtract  %325  %356 0
+%358 = OpFConvert  %8  %357
+%359 = OpCompositeExtract  %325  %356 1
+%360 = OpFConvert  %8  %359
+%361 = OpCompositeConstruct  %11  %358 %360
+%362 = OpAccessChain  %133  %54 %135
+OpStore %362 %361
+%363 = OpAccessChain  %149  %49 %150
+%364 = OpLoad  %12  %363
+%366 = OpCompositeExtract  %9  %364 0
+%367 = OpFConvert  %331  %366
+%368 = OpCompositeExtract  %9  %364 1
+%369 = OpFConvert  %331  %368
+%370 = OpCompositeConstruct  %365  %367 %369
+%371 = OpCompositeExtract  %331  %370 0
+%372 = OpFConvert  %9  %371
+%373 = OpCompositeExtract  %331  %370 1
+%374 = OpFConvert  %9  %373
+%375 = OpCompositeConstruct  %12  %372 %374
+%376 = OpAccessChain  %148  %54 %150
+OpStore %376 %375
+%377 = OpAccessChain  %164  %49 %165
+%378 = OpLoad  %13  %377
+%380 = OpCompositeExtract  %7  %378 0
+%381 = OpFConvert  %319  %380
+%382 = OpCompositeExtract  %7  %378 1
+%383 = OpFConvert  %319  %382
+%384 = OpCompositeExtract  %7  %378 2
+%385 = OpFConvert  %319  %384
+%386 = OpCompositeConstruct  %379  %381 %383 %385
+%387 = OpCompositeExtract  %319  %386 0
+%388 = OpFConvert  %7  %387
+%389 = OpCompositeExtract  %319  %386 1
+%390 = OpFConvert  %7  %389
+%391 = OpCompositeExtract  %319  %386 2
+%392 = OpFConvert  %7  %391
+%393 = OpCompositeConstruct  %13  %388 %390 %392
+%394 = OpAccessChain  %163  %54 %165
+OpStore %394 %393
+%395 = OpAccessChain  %182  %49 %183
+%396 = OpLoad  %14  %395
+%398 = OpCompositeExtract  %8  %396 0
+%399 = OpFConvert  %325  %398
+%400 = OpCompositeExtract  %8  %396 1
+%401 = OpFConvert  %325  %400
+%402 = OpCompositeExtract  %8  %396 2
+%403 = OpFConvert  %325  %402
+%404 = OpCompositeConstruct  %397  %399 %401 %403
+%405 = OpCompositeExtract  %325  %404 0
+%406 = OpFConvert  %8  %405
+%407 = OpCompositeExtract  %325  %404 1
+%408 = OpFConvert  %8  %407
+%409 = OpCompositeExtract  %325  %404 2
+%410 = OpFConvert  %8  %409
+%411 = OpCompositeConstruct  %14  %406 %408 %410
+%412 = OpAccessChain  %181  %54 %183
+OpStore %412 %411
+%413 = OpAccessChain  %200  %49 %201
+%414 = OpLoad  %15  %413
+%416 = OpCompositeExtract  %9  %414 0
+%417 = OpFConvert  %331  %416
+%418 = OpCompositeExtract  %9  %414 1
+%419 = OpFConvert  %331  %418
+%420 = OpCompositeExtract  %9  %414 2
+%421 = OpFConvert  %331  %420
+%422 = OpCompositeConstruct  %415  %417 %419 %421
+%423 = OpCompositeExtract  %331  %422 0
+%424 = OpFConvert  %9  %423
+%425 = OpCompositeExtract  %331  %422 1
+%426 = OpFConvert  %9  %425
+%427 = OpCompositeExtract  %331  %422 2
+%428 = OpFConvert  %9  %427
+%429 = OpCompositeConstruct  %15  %424 %426 %428
+%430 = OpAccessChain  %199  %54 %201
+OpStore %430 %429
+%431 = OpAccessChain  %218  %49 %219
+%432 = OpLoad  %16  %431
+%434 = OpCompositeExtract  %7  %432 0
+%435 = OpFConvert  %319  %434
+%436 = OpCompositeExtract  %7  %432 1
+%437 = OpFConvert  %319  %436
+%438 = OpCompositeExtract  %7  %432 2
+%439 = OpFConvert  %319  %438
+%440 = OpCompositeExtract  %7  %432 3
+%441 = OpFConvert  %319  %440
+%442 = OpCompositeConstruct  %433  %435 %437 %439 %441
+%443 = OpCompositeExtract  %319  %442 0
+%444 = OpFConvert  %7  %443
+%445 = OpCompositeExtract  %319  %442 1
+%446 = OpFConvert  %7  %445
+%447 = OpCompositeExtract  %319  %442 2
+%448 = OpFConvert  %7  %447
+%449 = OpCompositeExtract  %319  %442 3
+%450 = OpFConvert  %7  %449
+%451 = OpCompositeConstruct  %16  %444 %446 %448 %450
+%452 = OpAccessChain  %217  %54 %219
+OpStore %452 %451
+%453 = OpAccessChain  %239  %49 %240
+%454 = OpLoad  %17  %453
+%456 = OpCompositeExtract  %8  %454 0
+%457 = OpFConvert  %325  %456
+%458 = OpCompositeExtract  %8  %454 1
+%459 = OpFConvert  %325  %458
+%460 = OpCompositeExtract  %8  %454 2
+%461 = OpFConvert  %325  %460
+%462 = OpCompositeExtract  %8  %454 3
+%463 = OpFConvert  %325  %462
+%464 = OpCompositeConstruct  %455  %457 %459 %461 %463
+%465 = OpCompositeExtract  %325  %464 0
+%466 = OpFConvert  %8  %465
+%467 = OpCompositeExtract  %325  %464 1
+%468 = OpFConvert  %8  %467
+%469 = OpCompositeExtract  %325  %464 2
+%470 = OpFConvert  %8  %469
+%471 = OpCompositeExtract  %325  %464 3
+%472 = OpFConvert  %8  %471
+%473 = OpCompositeConstruct  %17  %466 %468 %470 %472
+%474 = OpAccessChain  %238  %54 %240
+OpStore %474 %473
+%475 = OpAccessChain  %260  %49 %261
+%476 = OpLoad  %18  %475
+%478 = OpCompositeExtract  %9  %476 0
+%479 = OpFConvert  %331  %478
+%480 = OpCompositeExtract  %9  %476 1
+%481 = OpFConvert  %331  %480
+%482 = OpCompositeExtract  %9  %476 2
+%483 = OpFConvert  %331  %482
+%484 = OpCompositeExtract  %9  %476 3
+%485 = OpFConvert  %331  %484
+%486 = OpCompositeConstruct  %477  %479 %481 %483 %485
+%487 = OpCompositeExtract  %331  %486 0
+%488 = OpFConvert  %9  %487
+%489 = OpCompositeExtract  %331  %486 1
+%490 = OpFConvert  %9  %489
+%491 = OpCompositeExtract  %331  %486 2
+%492 = OpFConvert  %9  %491
+%493 = OpCompositeExtract  %331  %486 3
+%494 = OpFConvert  %9  %493
+%495 = OpCompositeConstruct  %18  %488 %490 %492 %494
+%496 = OpAccessChain  %259  %54 %261
+OpStore %496 %495
+%497 = OpLoad  %3  %58
+OpReturnValue %497
+OpFunctionEnd
+%499 = OpFunction  %2  None %500
+%498 = OpLabel
+%501 = OpAccessChain  %47  %28 %48
+%502 = OpAccessChain  %50  %31 %48
+%503 = OpAccessChain  %52  %34 %48
+%504 = OpAccessChain  %50  %37 %48
+%505 = OpAccessChain  %52  %40 %48
+OpBranch %507
+%507 = OpLabel
+%508 = OpFunctionCall  %3  %45 %506
+%510 = OpAccessChain  %84  %504 %509
+OpStore %510 %508
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/image.spvasm b/naga/tests/out/spv/image.spvasm
index 6b5ef8c890..8982e2bf6e 100644
--- a/naga/tests/out/spv/image.spvasm
+++ b/naga/tests/out/spv/image.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 547
+; Bound: 546
 OpCapability Shader
 OpCapability Image1D
 OpCapability Sampled1D
@@ -10,19 +10,19 @@ OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %100 "main" %97
-OpEntryPoint GLCompute %191 "depth_load" %189
-OpEntryPoint Vertex %211 "queries" %209
-OpEntryPoint Vertex %263 "levels_queries" %262
-OpEntryPoint Fragment %294 "texture_sample" %293
-OpEntryPoint Fragment %440 "texture_sample_comparison" %438
-OpEntryPoint Fragment %496 "gather" %495
-OpEntryPoint Fragment %530 "depth_no_comparison" %529
+OpEntryPoint GLCompute %190 "depth_load" %188
+OpEntryPoint Vertex %210 "queries" %208
+OpEntryPoint Vertex %262 "levels_queries" %261
+OpEntryPoint Fragment %293 "texture_sample" %292
+OpEntryPoint Fragment %439 "texture_sample_comparison" %437
+OpEntryPoint Fragment %495 "gather" %494
+OpEntryPoint Fragment %529 "depth_no_comparison" %528
 OpExecutionMode %100 LocalSize 16 1 1
-OpExecutionMode %191 LocalSize 16 1 1
-OpExecutionMode %294 OriginUpperLeft
-OpExecutionMode %440 OriginUpperLeft
-OpExecutionMode %496 OriginUpperLeft
-OpExecutionMode %530 OriginUpperLeft
+OpExecutionMode %190 LocalSize 16 1 1
+OpExecutionMode %293 OriginUpperLeft
+OpExecutionMode %439 OriginUpperLeft
+OpExecutionMode %495 OriginUpperLeft
+OpExecutionMode %529 OriginUpperLeft
 %3 = OpString "image.wgsl"
 OpSource Unknown 0 %3 "@group(0) @binding(0)
 var image_mipmapped_src: texture_2d<u32>;
@@ -245,16 +245,16 @@ OpName %77 "lhs"
 OpName %78 "rhs"
 OpName %97 "local_id"
 OpName %100 "main"
-OpName %189 "local_id"
-OpName %191 "depth_load"
-OpName %211 "queries"
-OpName %263 "levels_queries"
-OpName %294 "texture_sample"
-OpName %307 "a"
-OpName %440 "texture_sample_comparison"
-OpName %445 "a"
-OpName %496 "gather"
-OpName %530 "depth_no_comparison"
+OpName %188 "local_id"
+OpName %190 "depth_load"
+OpName %210 "queries"
+OpName %262 "levels_queries"
+OpName %293 "texture_sample"
+OpName %306 "a"
+OpName %439 "texture_sample_comparison"
+OpName %444 "a"
+OpName %495 "gather"
+OpName %529 "depth_no_comparison"
 OpDecorate %32 DescriptorSet 0
 OpDecorate %32 Binding 0
 OpDecorate %34 DescriptorSet 0
@@ -303,13 +303,13 @@ OpDecorate %71 Binding 3
 OpDecorate %73 DescriptorSet 1
 OpDecorate %73 Binding 4
 OpDecorate %97 BuiltIn LocalInvocationId
-OpDecorate %189 BuiltIn LocalInvocationId
-OpDecorate %209 BuiltIn Position
-OpDecorate %262 BuiltIn Position
-OpDecorate %293 Location 0
-OpDecorate %438 Location 0
-OpDecorate %495 Location 0
-OpDecorate %529 Location 0
+OpDecorate %188 BuiltIn LocalInvocationId
+OpDecorate %208 BuiltIn Position
+OpDecorate %261 BuiltIn Position
+OpDecorate %292 Location 0
+OpDecorate %437 Location 0
+OpDecorate %494 Location 0
+OpDecorate %528 Location 0
 %2 = OpTypeVoid
 %5 = OpTypeInt 32 0
 %4 = OpTypeImage %5 2D 0 0 0 1 Unknown
@@ -401,40 +401,40 @@ OpDecorate %529 Location 0
 %112 = OpTypeVector %5 2
 %120 = OpTypeVector %5 4
 %131 = OpTypeVector %15 3
-%189 = OpVariable  %98  Input
-%210 = OpTypePointer Output %24
-%209 = OpVariable  %210  Output
-%220 = OpConstant  %5  0
-%262 = OpVariable  %210  Output
-%293 = OpVariable  %210  Output
-%300 = OpConstant  %8  0.5
-%301 = OpTypeVector %8 2
-%302 = OpConstantComposite  %301  %300 %300
-%303 = OpTypeVector %8 3
-%304 = OpConstantComposite  %303  %300 %300 %300
-%305 = OpConstant  %8  2.3
-%306 = OpConstant  %8  2.0
-%308 = OpTypePointer Function %24
-%309 = OpConstantNull  %24
-%312 = OpTypeSampledImage %16
-%317 = OpTypeSampledImage %17
-%338 = OpTypeSampledImage %19
-%399 = OpTypeSampledImage %21
-%439 = OpTypePointer Output %8
-%438 = OpVariable  %439  Output
-%446 = OpTypePointer Function %8
-%447 = OpConstantNull  %8
-%449 = OpTypeSampledImage %26
-%454 = OpTypeSampledImage %27
-%467 = OpTypeSampledImage %28
-%474 = OpConstant  %8  0.0
-%495 = OpVariable  %210  Output
-%506 = OpConstant  %5  1
-%509 = OpConstant  %5  3
-%514 = OpTypeSampledImage %4
-%517 = OpTypeVector %15 4
-%518 = OpTypeSampledImage %18
-%529 = OpVariable  %210  Output
+%188 = OpVariable  %98  Input
+%209 = OpTypePointer Output %24
+%208 = OpVariable  %209  Output
+%219 = OpConstant  %5  0
+%261 = OpVariable  %209  Output
+%292 = OpVariable  %209  Output
+%299 = OpConstant  %8  0.5
+%300 = OpTypeVector %8 2
+%301 = OpConstantComposite  %300  %299 %299
+%302 = OpTypeVector %8 3
+%303 = OpConstantComposite  %302  %299 %299 %299
+%304 = OpConstant  %8  2.3
+%305 = OpConstant  %8  2.0
+%307 = OpTypePointer Function %24
+%308 = OpConstantNull  %24
+%311 = OpTypeSampledImage %16
+%316 = OpTypeSampledImage %17
+%337 = OpTypeSampledImage %19
+%398 = OpTypeSampledImage %21
+%438 = OpTypePointer Output %8
+%437 = OpVariable  %438  Output
+%445 = OpTypePointer Function %8
+%446 = OpConstantNull  %8
+%448 = OpTypeSampledImage %26
+%453 = OpTypeSampledImage %27
+%466 = OpTypeSampledImage %28
+%473 = OpConstant  %8  0.0
+%494 = OpVariable  %209  Output
+%505 = OpConstant  %5  1
+%508 = OpConstant  %5  3
+%513 = OpTypeSampledImage %4
+%516 = OpTypeVector %15 4
+%517 = OpTypeSampledImage %18
+%528 = OpVariable  %209  Output
 %75 = OpFunction  %14  None %76
 %77 = OpFunctionParameter  %14
 %78 = OpFunctionParameter  %14
@@ -536,520 +536,520 @@ OpLine %3 34 19
 %171 = OpImageFetch  %120  %105 %170 Lod %168
 OpLine %3 35 19
 %172 = OpCompositeExtract  %5  %99 0
-%174 = OpCompositeExtract  %5  %99 2
-%175 = OpBitcast  %15  %174
-%176 = OpImageFetch  %120  %106 %172 Lod %175
+%173 = OpCompositeExtract  %5  %99 2
+%174 = OpBitcast  %15  %173
+%175 = OpImageFetch  %120  %106 %172 Lod %174
 OpLine %3 37 29
-%177 = OpCompositeExtract  %15  %117 0
-%178 = OpIAdd  %120  %121 %124
-%179 = OpIAdd  %120  %178 %125
-%180 = OpIAdd  %120  %179 %133
-%181 = OpIAdd  %120  %180 %140
+%176 = OpCompositeExtract  %15  %117 0
+%177 = OpIAdd  %120  %121 %124
+%178 = OpIAdd  %120  %177 %125
+%179 = OpIAdd  %120  %178 %133
+%180 = OpIAdd  %120  %179 %140
 OpLine %3 37 5
-OpImageWrite %107 %177 %181
+OpImageWrite %107 %176 %180
 OpLine %3 39 29
-%182 = OpCompositeExtract  %15  %117 0
-%183 = OpBitcast  %5  %182
-%184 = OpIAdd  %120  %149 %153
-%185 = OpIAdd  %120  %184 %155
-%186 = OpIAdd  %120  %185 %162
-%187 = OpIAdd  %120  %186 %171
+%181 = OpCompositeExtract  %15  %117 0
+%182 = OpBitcast  %5  %181
+%183 = OpIAdd  %120  %149 %153
+%184 = OpIAdd  %120  %183 %155
+%185 = OpIAdd  %120  %184 %162
+%186 = OpIAdd  %120  %185 %171
 OpLine %3 39 5
-OpImageWrite %107 %183 %187
+OpImageWrite %107 %182 %186
 OpReturn
 OpFunctionEnd
-%191 = OpFunction  %2  None %101
-%188 = OpLabel
-%190 = OpLoad  %13  %189
-%192 = OpLoad  %7  %36
-%193 = OpLoad  %9  %38
-%194 = OpLoad  %11  %46
-OpBranch %195
-%195 = OpLabel
+%190 = OpFunction  %2  None %101
+%187 = OpLabel
+%189 = OpLoad  %13  %188
+%191 = OpLoad  %7  %36
+%192 = OpLoad  %9  %38
+%193 = OpLoad  %11  %46
+OpBranch %194
+%194 = OpLabel
 OpLine %3 44 26
-%196 = OpImageQuerySize  %112  %193
+%195 = OpImageQuerySize  %112  %192
 OpLine %3 45 27
-%197 = OpVectorShuffle  %112  %190 %190 0 1
-%198 = OpIMul  %112  %196 %197
-%199 = OpBitcast  %14  %198
+%196 = OpVectorShuffle  %112  %189 %189 0 1
+%197 = OpIMul  %112  %195 %196
+%198 = OpBitcast  %14  %197
 OpLine %3 45 27
-%200 = OpFunctionCall  %14  %75 %199 %110
+%199 = OpFunctionCall  %14  %75 %198 %110
 OpLine %3 46 20
-%201 = OpCompositeExtract  %5  %190 2
-%202 = OpBitcast  %15  %201
-%203 = OpImageFetch  %24  %192 %200 Sample %202
-%204 = OpCompositeExtract  %8  %203 0
+%200 = OpCompositeExtract  %5  %189 2
+%201 = OpBitcast  %15  %200
+%202 = OpImageFetch  %24  %191 %199 Sample %201
+%203 = OpCompositeExtract  %8  %202 0
 OpLine %3 47 29
-%205 = OpCompositeExtract  %15  %200 0
-%206 = OpConvertFToU  %5  %204
-%207 = OpCompositeConstruct  %120  %206 %206 %206 %206
+%204 = OpCompositeExtract  %15  %199 0
+%205 = OpConvertFToU  %5  %203
+%206 = OpCompositeConstruct  %120  %205 %205 %205 %205
 OpLine %3 47 5
-OpImageWrite %194 %205 %207
+OpImageWrite %193 %204 %206
 OpReturn
 OpFunctionEnd
-%211 = OpFunction  %2  None %101
-%208 = OpLabel
-%212 = OpLoad  %16  %48
-%213 = OpLoad  %17  %50
-%214 = OpLoad  %19  %55
-%215 = OpLoad  %20  %57
-%216 = OpLoad  %21  %59
-%217 = OpLoad  %22  %61
-%218 = OpLoad  %23  %63
-OpBranch %219
-%219 = OpLabel
+%210 = OpFunction  %2  None %101
+%207 = OpLabel
+%211 = OpLoad  %16  %48
+%212 = OpLoad  %17  %50
+%213 = OpLoad  %19  %55
+%214 = OpLoad  %20  %57
+%215 = OpLoad  %21  %59
+%216 = OpLoad  %22  %61
+%217 = OpLoad  %23  %63
+OpBranch %218
+%218 = OpLabel
 OpLine %3 72 18
-%221 = OpImageQuerySizeLod  %5  %212 %220
+%220 = OpImageQuerySizeLod  %5  %211 %219
 OpLine %3 73 22
-%222 = OpBitcast  %15  %221
-%223 = OpImageQuerySizeLod  %5  %212 %222
+%221 = OpBitcast  %15  %220
+%222 = OpImageQuerySizeLod  %5  %211 %221
 OpLine %3 74 18
-%224 = OpImageQuerySizeLod  %112  %213 %220
+%223 = OpImageQuerySizeLod  %112  %212 %219
 OpLine %3 75 22
-%225 = OpImageQuerySizeLod  %112  %213 %30
+%224 = OpImageQuerySizeLod  %112  %212 %30
 OpLine %3 76 24
-%226 = OpImageQuerySizeLod  %13  %214 %220
-%227 = OpVectorShuffle  %112  %226 %226 0 1
+%225 = OpImageQuerySizeLod  %13  %213 %219
+%226 = OpVectorShuffle  %112  %225 %225 0 1
 OpLine %3 77 28
-%228 = OpImageQuerySizeLod  %13  %214 %30
-%229 = OpVectorShuffle  %112  %228 %228 0 1
+%227 = OpImageQuerySizeLod  %13  %213 %30
+%228 = OpVectorShuffle  %112  %227 %227 0 1
 OpLine %3 78 20
-%230 = OpImageQuerySizeLod  %112  %215 %220
+%229 = OpImageQuerySizeLod  %112  %214 %219
 OpLine %3 79 24
-%231 = OpImageQuerySizeLod  %112  %215 %30
+%230 = OpImageQuerySizeLod  %112  %214 %30
 OpLine %3 80 26
-%232 = OpImageQuerySizeLod  %13  %216 %220
-%233 = OpVectorShuffle  %112  %232 %232 0 0
+%231 = OpImageQuerySizeLod  %13  %215 %219
+%232 = OpVectorShuffle  %112  %231 %231 0 0
 OpLine %3 81 30
-%234 = OpImageQuerySizeLod  %13  %216 %30
-%235 = OpVectorShuffle  %112  %234 %234 0 0
+%233 = OpImageQuerySizeLod  %13  %215 %30
+%234 = OpVectorShuffle  %112  %233 %233 0 0
 OpLine %3 82 18
-%236 = OpImageQuerySizeLod  %13  %217 %220
+%235 = OpImageQuerySizeLod  %13  %216 %219
 OpLine %3 83 22
-%237 = OpImageQuerySizeLod  %13  %217 %30
+%236 = OpImageQuerySizeLod  %13  %216 %30
 OpLine %3 84 21
-%238 = OpImageQuerySize  %112  %218
+%237 = OpImageQuerySize  %112  %217
 OpLine %3 86 15
-%239 = OpCompositeExtract  %5  %224 1
-%240 = OpIAdd  %5  %221 %239
-%241 = OpCompositeExtract  %5  %225 1
-%242 = OpIAdd  %5  %240 %241
-%243 = OpCompositeExtract  %5  %227 1
-%244 = OpIAdd  %5  %242 %243
-%245 = OpCompositeExtract  %5  %229 1
-%246 = OpIAdd  %5  %244 %245
-%247 = OpCompositeExtract  %5  %230 1
-%248 = OpIAdd  %5  %246 %247
-%249 = OpCompositeExtract  %5  %231 1
-%250 = OpIAdd  %5  %248 %249
-%251 = OpCompositeExtract  %5  %233 1
-%252 = OpIAdd  %5  %250 %251
-%253 = OpCompositeExtract  %5  %235 1
-%254 = OpIAdd  %5  %252 %253
-%255 = OpCompositeExtract  %5  %236 2
-%256 = OpIAdd  %5  %254 %255
-%257 = OpCompositeExtract  %5  %237 2
-%258 = OpIAdd  %5  %256 %257
+%238 = OpCompositeExtract  %5  %223 1
+%239 = OpIAdd  %5  %220 %238
+%240 = OpCompositeExtract  %5  %224 1
+%241 = OpIAdd  %5  %239 %240
+%242 = OpCompositeExtract  %5  %226 1
+%243 = OpIAdd  %5  %241 %242
+%244 = OpCompositeExtract  %5  %228 1
+%245 = OpIAdd  %5  %243 %244
+%246 = OpCompositeExtract  %5  %229 1
+%247 = OpIAdd  %5  %245 %246
+%248 = OpCompositeExtract  %5  %230 1
+%249 = OpIAdd  %5  %247 %248
+%250 = OpCompositeExtract  %5  %232 1
+%251 = OpIAdd  %5  %249 %250
+%252 = OpCompositeExtract  %5  %234 1
+%253 = OpIAdd  %5  %251 %252
+%254 = OpCompositeExtract  %5  %235 2
+%255 = OpIAdd  %5  %253 %254
+%256 = OpCompositeExtract  %5  %236 2
+%257 = OpIAdd  %5  %255 %256
 OpLine %3 89 12
-%259 = OpConvertUToF  %8  %258
-%260 = OpCompositeConstruct  %24  %259 %259 %259 %259
-OpStore %209 %260
+%258 = OpConvertUToF  %8  %257
+%259 = OpCompositeConstruct  %24  %258 %258 %258 %258
+OpStore %208 %259
 OpReturn
 OpFunctionEnd
-%263 = OpFunction  %2  None %101
-%261 = OpLabel
-%264 = OpLoad  %17  %50
-%265 = OpLoad  %19  %55
-%266 = OpLoad  %20  %57
-%267 = OpLoad  %21  %59
-%268 = OpLoad  %22  %61
-%269 = OpLoad  %23  %63
-OpBranch %270
-%270 = OpLabel
+%262 = OpFunction  %2  None %101
+%260 = OpLabel
+%263 = OpLoad  %17  %50
+%264 = OpLoad  %19  %55
+%265 = OpLoad  %20  %57
+%266 = OpLoad  %21  %59
+%267 = OpLoad  %22  %61
+%268 = OpLoad  %23  %63
+OpBranch %269
+%269 = OpLabel
 OpLine %3 94 25
-%271 = OpImageQueryLevels  %5  %264
+%270 = OpImageQueryLevels  %5  %263
 OpLine %3 95 25
-%272 = OpImageQuerySizeLod  %13  %265 %220
-%273 = OpCompositeExtract  %5  %272 2
+%271 = OpImageQuerySizeLod  %13  %264 %219
+%272 = OpCompositeExtract  %5  %271 2
 OpLine %3 96 31
-%274 = OpImageQueryLevels  %5  %265
+%273 = OpImageQueryLevels  %5  %264
 OpLine %3 97 31
-%275 = OpImageQuerySizeLod  %13  %265 %220
-%276 = OpCompositeExtract  %5  %275 2
+%274 = OpImageQuerySizeLod  %13  %264 %219
+%275 = OpCompositeExtract  %5  %274 2
 OpLine %3 98 27
-%277 = OpImageQueryLevels  %5  %266
+%276 = OpImageQueryLevels  %5  %265
 OpLine %3 99 33
-%278 = OpImageQueryLevels  %5  %267
+%277 = OpImageQueryLevels  %5  %266
 OpLine %3 100 27
-%279 = OpImageQuerySizeLod  %13  %267 %220
-%280 = OpCompositeExtract  %5  %279 2
+%278 = OpImageQuerySizeLod  %13  %266 %219
+%279 = OpCompositeExtract  %5  %278 2
 OpLine %3 101 25
-%281 = OpImageQueryLevels  %5  %268
+%280 = OpImageQueryLevels  %5  %267
 OpLine %3 102 26
-%282 = OpImageQuerySamples  %5  %269
+%281 = OpImageQuerySamples  %5  %268
 OpLine %3 104 15
-%283 = OpIAdd  %5  %273 %280
-%284 = OpIAdd  %5  %283 %282
-%285 = OpIAdd  %5  %284 %271
-%286 = OpIAdd  %5  %285 %274
-%287 = OpIAdd  %5  %286 %281
+%282 = OpIAdd  %5  %272 %279
+%283 = OpIAdd  %5  %282 %281
+%284 = OpIAdd  %5  %283 %270
+%285 = OpIAdd  %5  %284 %273
+%286 = OpIAdd  %5  %285 %280
+%287 = OpIAdd  %5  %286 %276
 %288 = OpIAdd  %5  %287 %277
-%289 = OpIAdd  %5  %288 %278
 OpLine %3 106 12
-%290 = OpConvertUToF  %8  %289
-%291 = OpCompositeConstruct  %24  %290 %290 %290 %290
-OpStore %262 %291
+%289 = OpConvertUToF  %8  %288
+%290 = OpCompositeConstruct  %24  %289 %289 %289 %289
+OpStore %261 %290
 OpReturn
 OpFunctionEnd
-%294 = OpFunction  %2  None %101
-%292 = OpLabel
-%307 = OpVariable  %308  Function %309
-%295 = OpLoad  %16  %48
-%296 = OpLoad  %17  %50
-%297 = OpLoad  %19  %55
-%298 = OpLoad  %21  %59
-%299 = OpLoad  %25  %65
-OpBranch %310
-%310 = OpLabel
+%293 = OpFunction  %2  None %101
+%291 = OpLabel
+%306 = OpVariable  %307  Function %308
+%294 = OpLoad  %16  %48
+%295 = OpLoad  %17  %50
+%296 = OpLoad  %19  %55
+%297 = OpLoad  %21  %59
+%298 = OpLoad  %25  %65
+OpBranch %309
+%309 = OpLabel
 OpLine %3 114 14
 OpLine %3 115 15
 OpLine %3 118 5
-%311 = OpCompositeExtract  %8  %302 0
-%313 = OpSampledImage  %312  %295 %299
-%314 = OpImageSampleImplicitLod  %24  %313 %311
-%315 = OpLoad  %24  %307
-%316 = OpFAdd  %24  %315 %314
+%310 = OpCompositeExtract  %8  %301 0
+%312 = OpSampledImage  %311  %294 %298
+%313 = OpImageSampleImplicitLod  %24  %312 %310
+%314 = OpLoad  %24  %306
+%315 = OpFAdd  %24  %314 %313
 OpLine %3 118 5
-OpStore %307 %316
+OpStore %306 %315
 OpLine %3 119 5
-%318 = OpSampledImage  %317  %296 %299
-%319 = OpImageSampleImplicitLod  %24  %318 %302
-%320 = OpLoad  %24  %307
-%321 = OpFAdd  %24  %320 %319
+%317 = OpSampledImage  %316  %295 %298
+%318 = OpImageSampleImplicitLod  %24  %317 %301
+%319 = OpLoad  %24  %306
+%320 = OpFAdd  %24  %319 %318
 OpLine %3 119 5
-OpStore %307 %321
+OpStore %306 %320
 OpLine %3 120 5
-%322 = OpSampledImage  %317  %296 %299
-%323 = OpImageSampleImplicitLod  %24  %322 %302 ConstOffset %31
-%324 = OpLoad  %24  %307
-%325 = OpFAdd  %24  %324 %323
+%321 = OpSampledImage  %316  %295 %298
+%322 = OpImageSampleImplicitLod  %24  %321 %301 ConstOffset %31
+%323 = OpLoad  %24  %306
+%324 = OpFAdd  %24  %323 %322
 OpLine %3 120 5
-OpStore %307 %325
+OpStore %306 %324
 OpLine %3 121 5
-%326 = OpSampledImage  %317  %296 %299
-%327 = OpImageSampleExplicitLod  %24  %326 %302 Lod %305
-%328 = OpLoad  %24  %307
-%329 = OpFAdd  %24  %328 %327
+%325 = OpSampledImage  %316  %295 %298
+%326 = OpImageSampleExplicitLod  %24  %325 %301 Lod %304
+%327 = OpLoad  %24  %306
+%328 = OpFAdd  %24  %327 %326
 OpLine %3 121 5
-OpStore %307 %329
+OpStore %306 %328
 OpLine %3 122 5
-%330 = OpSampledImage  %317  %296 %299
-%331 = OpImageSampleExplicitLod  %24  %330 %302 Lod|ConstOffset %305 %31
-%332 = OpLoad  %24  %307
-%333 = OpFAdd  %24  %332 %331
+%329 = OpSampledImage  %316  %295 %298
+%330 = OpImageSampleExplicitLod  %24  %329 %301 Lod|ConstOffset %304 %31
+%331 = OpLoad  %24  %306
+%332 = OpFAdd  %24  %331 %330
 OpLine %3 122 5
-OpStore %307 %333
+OpStore %306 %332
 OpLine %3 123 5
-%334 = OpSampledImage  %317  %296 %299
-%335 = OpImageSampleImplicitLod  %24  %334 %302 Bias|ConstOffset %306 %31
-%336 = OpLoad  %24  %307
-%337 = OpFAdd  %24  %336 %335
+%333 = OpSampledImage  %316  %295 %298
+%334 = OpImageSampleImplicitLod  %24  %333 %301 Bias|ConstOffset %305 %31
+%335 = OpLoad  %24  %306
+%336 = OpFAdd  %24  %335 %334
 OpLine %3 123 5
-OpStore %307 %337
+OpStore %306 %336
 OpLine %3 124 5
-%339 = OpConvertUToF  %8  %220
-%340 = OpCompositeConstruct  %303  %302 %339
-%341 = OpSampledImage  %338  %297 %299
-%342 = OpImageSampleImplicitLod  %24  %341 %340
-%343 = OpLoad  %24  %307
-%344 = OpFAdd  %24  %343 %342
+%338 = OpConvertUToF  %8  %219
+%339 = OpCompositeConstruct  %302  %301 %338
+%340 = OpSampledImage  %337  %296 %298
+%341 = OpImageSampleImplicitLod  %24  %340 %339
+%342 = OpLoad  %24  %306
+%343 = OpFAdd  %24  %342 %341
 OpLine %3 124 5
-OpStore %307 %344
+OpStore %306 %343
 OpLine %3 125 5
-%345 = OpConvertUToF  %8  %220
-%346 = OpCompositeConstruct  %303  %302 %345
-%347 = OpSampledImage  %338  %297 %299
-%348 = OpImageSampleImplicitLod  %24  %347 %346 ConstOffset %31
-%349 = OpLoad  %24  %307
-%350 = OpFAdd  %24  %349 %348
+%344 = OpConvertUToF  %8  %219
+%345 = OpCompositeConstruct  %302  %301 %344
+%346 = OpSampledImage  %337  %296 %298
+%347 = OpImageSampleImplicitLod  %24  %346 %345 ConstOffset %31
+%348 = OpLoad  %24  %306
+%349 = OpFAdd  %24  %348 %347
 OpLine %3 125 5
-OpStore %307 %350
+OpStore %306 %349
 OpLine %3 126 5
-%351 = OpConvertUToF  %8  %220
-%352 = OpCompositeConstruct  %303  %302 %351
-%353 = OpSampledImage  %338  %297 %299
-%354 = OpImageSampleExplicitLod  %24  %353 %352 Lod %305
-%355 = OpLoad  %24  %307
-%356 = OpFAdd  %24  %355 %354
+%350 = OpConvertUToF  %8  %219
+%351 = OpCompositeConstruct  %302  %301 %350
+%352 = OpSampledImage  %337  %296 %298
+%353 = OpImageSampleExplicitLod  %24  %352 %351 Lod %304
+%354 = OpLoad  %24  %306
+%355 = OpFAdd  %24  %354 %353
 OpLine %3 126 5
-OpStore %307 %356
+OpStore %306 %355
 OpLine %3 127 5
-%357 = OpConvertUToF  %8  %220
-%358 = OpCompositeConstruct  %303  %302 %357
-%359 = OpSampledImage  %338  %297 %299
-%360 = OpImageSampleExplicitLod  %24  %359 %358 Lod|ConstOffset %305 %31
-%361 = OpLoad  %24  %307
-%362 = OpFAdd  %24  %361 %360
+%356 = OpConvertUToF  %8  %219
+%357 = OpCompositeConstruct  %302  %301 %356
+%358 = OpSampledImage  %337  %296 %298
+%359 = OpImageSampleExplicitLod  %24  %358 %357 Lod|ConstOffset %304 %31
+%360 = OpLoad  %24  %306
+%361 = OpFAdd  %24  %360 %359
 OpLine %3 127 5
-OpStore %307 %362
+OpStore %306 %361
 OpLine %3 128 5
-%363 = OpConvertUToF  %8  %220
-%364 = OpCompositeConstruct  %303  %302 %363
-%365 = OpSampledImage  %338  %297 %299
-%366 = OpImageSampleImplicitLod  %24  %365 %364 Bias|ConstOffset %306 %31
-%367 = OpLoad  %24  %307
-%368 = OpFAdd  %24  %367 %366
+%362 = OpConvertUToF  %8  %219
+%363 = OpCompositeConstruct  %302  %301 %362
+%364 = OpSampledImage  %337  %296 %298
+%365 = OpImageSampleImplicitLod  %24  %364 %363 Bias|ConstOffset %305 %31
+%366 = OpLoad  %24  %306
+%367 = OpFAdd  %24  %366 %365
 OpLine %3 128 5
-OpStore %307 %368
+OpStore %306 %367
 OpLine %3 129 5
-%369 = OpConvertSToF  %8  %82
-%370 = OpCompositeConstruct  %303  %302 %369
-%371 = OpSampledImage  %338  %297 %299
-%372 = OpImageSampleImplicitLod  %24  %371 %370
-%373 = OpLoad  %24  %307
-%374 = OpFAdd  %24  %373 %372
+%368 = OpConvertSToF  %8  %82
+%369 = OpCompositeConstruct  %302  %301 %368
+%370 = OpSampledImage  %337  %296 %298
+%371 = OpImageSampleImplicitLod  %24  %370 %369
+%372 = OpLoad  %24  %306
+%373 = OpFAdd  %24  %372 %371
 OpLine %3 129 5
-OpStore %307 %374
+OpStore %306 %373
 OpLine %3 130 5
-%375 = OpConvertSToF  %8  %82
-%376 = OpCompositeConstruct  %303  %302 %375
-%377 = OpSampledImage  %338  %297 %299
-%378 = OpImageSampleImplicitLod  %24  %377 %376 ConstOffset %31
-%379 = OpLoad  %24  %307
-%380 = OpFAdd  %24  %379 %378
+%374 = OpConvertSToF  %8  %82
+%375 = OpCompositeConstruct  %302  %301 %374
+%376 = OpSampledImage  %337  %296 %298
+%377 = OpImageSampleImplicitLod  %24  %376 %375 ConstOffset %31
+%378 = OpLoad  %24  %306
+%379 = OpFAdd  %24  %378 %377
 OpLine %3 130 5
-OpStore %307 %380
+OpStore %306 %379
 OpLine %3 131 5
-%381 = OpConvertSToF  %8  %82
-%382 = OpCompositeConstruct  %303  %302 %381
-%383 = OpSampledImage  %338  %297 %299
-%384 = OpImageSampleExplicitLod  %24  %383 %382 Lod %305
-%385 = OpLoad  %24  %307
-%386 = OpFAdd  %24  %385 %384
+%380 = OpConvertSToF  %8  %82
+%381 = OpCompositeConstruct  %302  %301 %380
+%382 = OpSampledImage  %337  %296 %298
+%383 = OpImageSampleExplicitLod  %24  %382 %381 Lod %304
+%384 = OpLoad  %24  %306
+%385 = OpFAdd  %24  %384 %383
 OpLine %3 131 5
-OpStore %307 %386
+OpStore %306 %385
 OpLine %3 132 5
-%387 = OpConvertSToF  %8  %82
-%388 = OpCompositeConstruct  %303  %302 %387
-%389 = OpSampledImage  %338  %297 %299
-%390 = OpImageSampleExplicitLod  %24  %389 %388 Lod|ConstOffset %305 %31
-%391 = OpLoad  %24  %307
-%392 = OpFAdd  %24  %391 %390
+%386 = OpConvertSToF  %8  %82
+%387 = OpCompositeConstruct  %302  %301 %386
+%388 = OpSampledImage  %337  %296 %298
+%389 = OpImageSampleExplicitLod  %24  %388 %387 Lod|ConstOffset %304 %31
+%390 = OpLoad  %24  %306
+%391 = OpFAdd  %24  %390 %389
 OpLine %3 132 5
-OpStore %307 %392
+OpStore %306 %391
 OpLine %3 133 5
-%393 = OpConvertSToF  %8  %82
-%394 = OpCompositeConstruct  %303  %302 %393
-%395 = OpSampledImage  %338  %297 %299
-%396 = OpImageSampleImplicitLod  %24  %395 %394 Bias|ConstOffset %306 %31
-%397 = OpLoad  %24  %307
-%398 = OpFAdd  %24  %397 %396
+%392 = OpConvertSToF  %8  %82
+%393 = OpCompositeConstruct  %302  %301 %392
+%394 = OpSampledImage  %337  %296 %298
+%395 = OpImageSampleImplicitLod  %24  %394 %393 Bias|ConstOffset %305 %31
+%396 = OpLoad  %24  %306
+%397 = OpFAdd  %24  %396 %395
 OpLine %3 133 5
-OpStore %307 %398
+OpStore %306 %397
 OpLine %3 134 5
-%400 = OpConvertUToF  %8  %220
-%401 = OpCompositeConstruct  %24  %304 %400
-%402 = OpSampledImage  %399  %298 %299
-%403 = OpImageSampleImplicitLod  %24  %402 %401
-%404 = OpLoad  %24  %307
-%405 = OpFAdd  %24  %404 %403
+%399 = OpConvertUToF  %8  %219
+%400 = OpCompositeConstruct  %24  %303 %399
+%401 = OpSampledImage  %398  %297 %298
+%402 = OpImageSampleImplicitLod  %24  %401 %400
+%403 = OpLoad  %24  %306
+%404 = OpFAdd  %24  %403 %402
 OpLine %3 134 5
-OpStore %307 %405
+OpStore %306 %404
 OpLine %3 135 5
-%406 = OpConvertUToF  %8  %220
-%407 = OpCompositeConstruct  %24  %304 %406
-%408 = OpSampledImage  %399  %298 %299
-%409 = OpImageSampleExplicitLod  %24  %408 %407 Lod %305
-%410 = OpLoad  %24  %307
-%411 = OpFAdd  %24  %410 %409
+%405 = OpConvertUToF  %8  %219
+%406 = OpCompositeConstruct  %24  %303 %405
+%407 = OpSampledImage  %398  %297 %298
+%408 = OpImageSampleExplicitLod  %24  %407 %406 Lod %304
+%409 = OpLoad  %24  %306
+%410 = OpFAdd  %24  %409 %408
 OpLine %3 135 5
-OpStore %307 %411
+OpStore %306 %410
 OpLine %3 136 5
-%412 = OpConvertUToF  %8  %220
-%413 = OpCompositeConstruct  %24  %304 %412
-%414 = OpSampledImage  %399  %298 %299
-%415 = OpImageSampleImplicitLod  %24  %414 %413 Bias %306
-%416 = OpLoad  %24  %307
-%417 = OpFAdd  %24  %416 %415
+%411 = OpConvertUToF  %8  %219
+%412 = OpCompositeConstruct  %24  %303 %411
+%413 = OpSampledImage  %398  %297 %298
+%414 = OpImageSampleImplicitLod  %24  %413 %412 Bias %305
+%415 = OpLoad  %24  %306
+%416 = OpFAdd  %24  %415 %414
 OpLine %3 136 5
-OpStore %307 %417
+OpStore %306 %416
 OpLine %3 137 5
-%418 = OpConvertSToF  %8  %82
-%419 = OpCompositeConstruct  %24  %304 %418
-%420 = OpSampledImage  %399  %298 %299
-%421 = OpImageSampleImplicitLod  %24  %420 %419
-%422 = OpLoad  %24  %307
-%423 = OpFAdd  %24  %422 %421
+%417 = OpConvertSToF  %8  %82
+%418 = OpCompositeConstruct  %24  %303 %417
+%419 = OpSampledImage  %398  %297 %298
+%420 = OpImageSampleImplicitLod  %24  %419 %418
+%421 = OpLoad  %24  %306
+%422 = OpFAdd  %24  %421 %420
 OpLine %3 137 5
-OpStore %307 %423
+OpStore %306 %422
 OpLine %3 138 5
-%424 = OpConvertSToF  %8  %82
-%425 = OpCompositeConstruct  %24  %304 %424
-%426 = OpSampledImage  %399  %298 %299
-%427 = OpImageSampleExplicitLod  %24  %426 %425 Lod %305
-%428 = OpLoad  %24  %307
-%429 = OpFAdd  %24  %428 %427
+%423 = OpConvertSToF  %8  %82
+%424 = OpCompositeConstruct  %24  %303 %423
+%425 = OpSampledImage  %398  %297 %298
+%426 = OpImageSampleExplicitLod  %24  %425 %424 Lod %304
+%427 = OpLoad  %24  %306
+%428 = OpFAdd  %24  %427 %426
 OpLine %3 138 5
-OpStore %307 %429
+OpStore %306 %428
 OpLine %3 139 5
-%430 = OpConvertSToF  %8  %82
-%431 = OpCompositeConstruct  %24  %304 %430
-%432 = OpSampledImage  %399  %298 %299
-%433 = OpImageSampleImplicitLod  %24  %432 %431 Bias %306
-%434 = OpLoad  %24  %307
-%435 = OpFAdd  %24  %434 %433
+%429 = OpConvertSToF  %8  %82
+%430 = OpCompositeConstruct  %24  %303 %429
+%431 = OpSampledImage  %398  %297 %298
+%432 = OpImageSampleImplicitLod  %24  %431 %430 Bias %305
+%433 = OpLoad  %24  %306
+%434 = OpFAdd  %24  %433 %432
 OpLine %3 139 5
-OpStore %307 %435
+OpStore %306 %434
 OpLine %3 1 1
-%436 = OpLoad  %24  %307
-OpStore %293 %436
+%435 = OpLoad  %24  %306
+OpStore %292 %435
 OpReturn
 OpFunctionEnd
-%440 = OpFunction  %2  None %101
-%437 = OpLabel
-%445 = OpVariable  %446  Function %447
-%441 = OpLoad  %25  %67
-%442 = OpLoad  %26  %69
-%443 = OpLoad  %27  %71
-%444 = OpLoad  %28  %73
-OpBranch %448
-%448 = OpLabel
+%439 = OpFunction  %2  None %101
+%436 = OpLabel
+%444 = OpVariable  %445  Function %446
+%440 = OpLoad  %25  %67
+%441 = OpLoad  %26  %69
+%442 = OpLoad  %27  %71
+%443 = OpLoad  %28  %73
+OpBranch %447
+%447 = OpLabel
 OpLine %3 154 14
 OpLine %3 155 15
 OpLine %3 158 5
-%450 = OpSampledImage  %449  %442 %441
-%451 = OpImageSampleDrefImplicitLod  %8  %450 %302 %300
-%452 = OpLoad  %8  %445
-%453 = OpFAdd  %8  %452 %451
+%449 = OpSampledImage  %448  %441 %440
+%450 = OpImageSampleDrefImplicitLod  %8  %449 %301 %299
+%451 = OpLoad  %8  %444
+%452 = OpFAdd  %8  %451 %450
 OpLine %3 158 5
-OpStore %445 %453
+OpStore %444 %452
 OpLine %3 159 5
-%455 = OpConvertUToF  %8  %220
-%456 = OpCompositeConstruct  %303  %302 %455
-%457 = OpSampledImage  %454  %443 %441
-%458 = OpImageSampleDrefImplicitLod  %8  %457 %456 %300
-%459 = OpLoad  %8  %445
-%460 = OpFAdd  %8  %459 %458
+%454 = OpConvertUToF  %8  %219
+%455 = OpCompositeConstruct  %302  %301 %454
+%456 = OpSampledImage  %453  %442 %440
+%457 = OpImageSampleDrefImplicitLod  %8  %456 %455 %299
+%458 = OpLoad  %8  %444
+%459 = OpFAdd  %8  %458 %457
 OpLine %3 159 5
-OpStore %445 %460
+OpStore %444 %459
 OpLine %3 160 5
-%461 = OpConvertSToF  %8  %82
-%462 = OpCompositeConstruct  %303  %302 %461
-%463 = OpSampledImage  %454  %443 %441
-%464 = OpImageSampleDrefImplicitLod  %8  %463 %462 %300
-%465 = OpLoad  %8  %445
-%466 = OpFAdd  %8  %465 %464
+%460 = OpConvertSToF  %8  %82
+%461 = OpCompositeConstruct  %302  %301 %460
+%462 = OpSampledImage  %453  %442 %440
+%463 = OpImageSampleDrefImplicitLod  %8  %462 %461 %299
+%464 = OpLoad  %8  %444
+%465 = OpFAdd  %8  %464 %463
 OpLine %3 160 5
-OpStore %445 %466
+OpStore %444 %465
 OpLine %3 161 5
-%468 = OpSampledImage  %467  %444 %441
-%469 = OpImageSampleDrefImplicitLod  %8  %468 %304 %300
-%470 = OpLoad  %8  %445
-%471 = OpFAdd  %8  %470 %469
+%467 = OpSampledImage  %466  %443 %440
+%468 = OpImageSampleDrefImplicitLod  %8  %467 %303 %299
+%469 = OpLoad  %8  %444
+%470 = OpFAdd  %8  %469 %468
 OpLine %3 161 5
-OpStore %445 %471
+OpStore %444 %470
 OpLine %3 162 5
-%472 = OpSampledImage  %449  %442 %441
-%473 = OpImageSampleDrefExplicitLod  %8  %472 %302 %300 Lod %474
-%475 = OpLoad  %8  %445
-%476 = OpFAdd  %8  %475 %473
+%471 = OpSampledImage  %448  %441 %440
+%472 = OpImageSampleDrefExplicitLod  %8  %471 %301 %299 Lod %473
+%474 = OpLoad  %8  %444
+%475 = OpFAdd  %8  %474 %472
 OpLine %3 162 5
-OpStore %445 %476
+OpStore %444 %475
 OpLine %3 163 5
-%477 = OpConvertUToF  %8  %220
-%478 = OpCompositeConstruct  %303  %302 %477
-%479 = OpSampledImage  %454  %443 %441
-%480 = OpImageSampleDrefExplicitLod  %8  %479 %478 %300 Lod %474
-%481 = OpLoad  %8  %445
-%482 = OpFAdd  %8  %481 %480
+%476 = OpConvertUToF  %8  %219
+%477 = OpCompositeConstruct  %302  %301 %476
+%478 = OpSampledImage  %453  %442 %440
+%479 = OpImageSampleDrefExplicitLod  %8  %478 %477 %299 Lod %473
+%480 = OpLoad  %8  %444
+%481 = OpFAdd  %8  %480 %479
 OpLine %3 163 5
-OpStore %445 %482
+OpStore %444 %481
 OpLine %3 164 5
-%483 = OpConvertSToF  %8  %82
-%484 = OpCompositeConstruct  %303  %302 %483
-%485 = OpSampledImage  %454  %443 %441
-%486 = OpImageSampleDrefExplicitLod  %8  %485 %484 %300 Lod %474
-%487 = OpLoad  %8  %445
-%488 = OpFAdd  %8  %487 %486
+%482 = OpConvertSToF  %8  %82
+%483 = OpCompositeConstruct  %302  %301 %482
+%484 = OpSampledImage  %453  %442 %440
+%485 = OpImageSampleDrefExplicitLod  %8  %484 %483 %299 Lod %473
+%486 = OpLoad  %8  %444
+%487 = OpFAdd  %8  %486 %485
 OpLine %3 164 5
-OpStore %445 %488
+OpStore %444 %487
 OpLine %3 165 5
-%489 = OpSampledImage  %467  %444 %441
-%490 = OpImageSampleDrefExplicitLod  %8  %489 %304 %300 Lod %474
-%491 = OpLoad  %8  %445
-%492 = OpFAdd  %8  %491 %490
+%488 = OpSampledImage  %466  %443 %440
+%489 = OpImageSampleDrefExplicitLod  %8  %488 %303 %299 Lod %473
+%490 = OpLoad  %8  %444
+%491 = OpFAdd  %8  %490 %489
 OpLine %3 165 5
-OpStore %445 %492
+OpStore %444 %491
 OpLine %3 1 1
-%493 = OpLoad  %8  %445
-OpStore %438 %493
+%492 = OpLoad  %8  %444
+OpStore %437 %492
 OpReturn
 OpFunctionEnd
-%496 = OpFunction  %2  None %101
-%494 = OpLabel
-%497 = OpLoad  %17  %50
-%498 = OpLoad  %4  %52
-%499 = OpLoad  %18  %53
-%500 = OpLoad  %25  %65
-%501 = OpLoad  %25  %67
-%502 = OpLoad  %26  %69
-OpBranch %503
-%503 = OpLabel
+%495 = OpFunction  %2  None %101
+%493 = OpLabel
+%496 = OpLoad  %17  %50
+%497 = OpLoad  %4  %52
+%498 = OpLoad  %18  %53
+%499 = OpLoad  %25  %65
+%500 = OpLoad  %25  %67
+%501 = OpLoad  %26  %69
+OpBranch %502
+%502 = OpLabel
 OpLine %3 171 14
 OpLine %3 173 15
-%504 = OpSampledImage  %317  %497 %500
-%505 = OpImageGather  %24  %504 %302 %506
+%503 = OpSampledImage  %316  %496 %499
+%504 = OpImageGather  %24  %503 %301 %505
 OpLine %3 174 22
-%507 = OpSampledImage  %317  %497 %500
-%508 = OpImageGather  %24  %507 %302 %509 ConstOffset %31
+%506 = OpSampledImage  %316  %496 %499
+%507 = OpImageGather  %24  %506 %301 %508 ConstOffset %31
 OpLine %3 175 21
-%510 = OpSampledImage  %449  %502 %501
-%511 = OpImageDrefGather  %24  %510 %302 %300
+%509 = OpSampledImage  %448  %501 %500
+%510 = OpImageDrefGather  %24  %509 %301 %299
 OpLine %3 176 28
-%512 = OpSampledImage  %449  %502 %501
-%513 = OpImageDrefGather  %24  %512 %302 %300 ConstOffset %31
+%511 = OpSampledImage  %448  %501 %500
+%512 = OpImageDrefGather  %24  %511 %301 %299 ConstOffset %31
 OpLine %3 178 13
-%515 = OpSampledImage  %514  %498 %500
-%516 = OpImageGather  %120  %515 %302 %220
+%514 = OpSampledImage  %513  %497 %499
+%515 = OpImageGather  %120  %514 %301 %219
 OpLine %3 179 13
-%519 = OpSampledImage  %518  %499 %500
-%520 = OpImageGather  %517  %519 %302 %220
+%518 = OpSampledImage  %517  %498 %499
+%519 = OpImageGather  %516  %518 %301 %219
 OpLine %3 180 13
-%521 = OpConvertUToF  %24  %516
-%522 = OpConvertSToF  %24  %520
-%523 = OpFAdd  %24  %521 %522
+%520 = OpConvertUToF  %24  %515
+%521 = OpConvertSToF  %24  %519
+%522 = OpFAdd  %24  %520 %521
 OpLine %3 182 12
-%524 = OpFAdd  %24  %505 %508
-%525 = OpFAdd  %24  %524 %511
-%526 = OpFAdd  %24  %525 %513
-%527 = OpFAdd  %24  %526 %523
-OpStore %495 %527
+%523 = OpFAdd  %24  %504 %507
+%524 = OpFAdd  %24  %523 %510
+%525 = OpFAdd  %24  %524 %512
+%526 = OpFAdd  %24  %525 %522
+OpStore %494 %526
 OpReturn
 OpFunctionEnd
-%530 = OpFunction  %2  None %101
-%528 = OpLabel
-%531 = OpLoad  %25  %65
-%532 = OpLoad  %26  %69
-OpBranch %533
-%533 = OpLabel
+%529 = OpFunction  %2  None %101
+%527 = OpLabel
+%530 = OpLoad  %25  %65
+%531 = OpLoad  %26  %69
+OpBranch %532
+%532 = OpLabel
 OpLine %3 187 14
 OpLine %3 189 15
-%534 = OpSampledImage  %449  %532 %531
-%535 = OpImageSampleImplicitLod  %24  %534 %302
-%536 = OpCompositeExtract  %8  %535 0
+%533 = OpSampledImage  %448  %531 %530
+%534 = OpImageSampleImplicitLod  %24  %533 %301
+%535 = OpCompositeExtract  %8  %534 0
 OpLine %3 190 22
-%537 = OpSampledImage  %449  %532 %531
-%538 = OpImageGather  %24  %537 %302 %220
+%536 = OpSampledImage  %448  %531 %530
+%537 = OpImageGather  %24  %536 %301 %219
 OpLine %3 191 21
-%539 = OpSampledImage  %449  %532 %531
-%541 = OpConvertSToF  %8  %30
-%540 = OpImageSampleExplicitLod  %24  %539 %302 Lod %541
-%542 = OpCompositeExtract  %8  %540 0
+%538 = OpSampledImage  %448  %531 %530
+%540 = OpConvertSToF  %8  %30
+%539 = OpImageSampleExplicitLod  %24  %538 %301 Lod %540
+%541 = OpCompositeExtract  %8  %539 0
 OpLine %3 189 15
-%543 = OpCompositeConstruct  %24  %536 %536 %536 %536
-%544 = OpFAdd  %24  %543 %538
-%545 = OpCompositeConstruct  %24  %542 %542 %542 %542
-%546 = OpFAdd  %24  %544 %545
-OpStore %529 %546
+%542 = OpCompositeConstruct  %24  %535 %535 %535 %535
+%543 = OpFAdd  %24  %542 %537
+%544 = OpCompositeConstruct  %24  %541 %541 %541 %541
+%545 = OpFAdd  %24  %543 %544
+OpStore %528 %545
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/extra.wgsl b/naga/tests/out/wgsl/extra.wgsl
index 25f894e7f5..574c35ab0b 100644
--- a/naga/tests/out/wgsl/extra.wgsl
+++ b/naga/tests/out/wgsl/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
     index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 
 struct FragmentIn {
diff --git a/naga/tests/out/wgsl/f16-glsl.comp.wgsl b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
new file mode 100644
index 0000000000..e12dcb1408
--- /dev/null
+++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
@@ -0,0 +1,47 @@
+enable f16;
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+@group(0) @binding(0) 
+var<uniform> global: A;
+@group(0) @binding(1) 
+var<storage, read_write> global_1: B;
+
+fn main_1() {
+    let _e16 = global.a_1_;
+    global_1.b_1_ = _e16;
+    let _e17 = global.a_vec2_;
+    global_1.b_vec2_ = _e17;
+    let _e18 = global.a_vec3_;
+    global_1.b_vec3_ = _e18;
+    let _e19 = global.a_vec4_;
+    global_1.b_vec4_ = _e19;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+    return;
+}
diff --git a/naga/tests/out/wgsl/f16-spv.wgsl b/naga/tests/out/wgsl/f16-spv.wgsl
new file mode 100644
index 0000000000..4c29ace8c3
--- /dev/null
+++ b/naga/tests/out/wgsl/f16-spv.wgsl
@@ -0,0 +1,46 @@
+enable f16;
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+@group(0) @binding(1) 
+var<storage, read_write> unnamed: B;
+@group(0) @binding(0) 
+var<uniform> unnamed_1: A;
+
+fn main_1() {
+    let _e3 = unnamed_1.a_1_;
+    unnamed.b_1_ = _e3;
+    let _e6 = unnamed_1.a_vec2_;
+    unnamed.b_vec2_ = _e6;
+    let _e9 = unnamed_1.a_vec3_;
+    unnamed.b_vec3_ = _e9;
+    let _e12 = unnamed_1.a_vec4_;
+    unnamed.b_vec4_ = _e12;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+}
diff --git a/naga/tests/out/wgsl/f16.wgsl b/naga/tests/out/wgsl/f16.wgsl
new file mode 100644
index 0000000000..a45edce3b5
--- /dev/null
+++ b/naga/tests/out/wgsl/f16.wgsl
@@ -0,0 +1,167 @@
+enable f16;
+
+struct UniformCompatible {
+    val_u32_: u32,
+    val_i32_: i32,
+    val_f32_: f32,
+    val_f16_: f16,
+    val_f16_2_: vec2<f16>,
+    val_f16_3_: vec3<f16>,
+    val_f16_4_: vec4<f16>,
+    final_value: f16,
+    val_mat2x2_: mat2x2<f16>,
+    val_mat2x3_: mat2x3<f16>,
+    val_mat2x4_: mat2x4<f16>,
+    val_mat3x2_: mat3x2<f16>,
+    val_mat3x3_: mat3x3<f16>,
+    val_mat3x4_: mat3x4<f16>,
+    val_mat4x2_: mat4x2<f16>,
+    val_mat4x3_: mat4x3<f16>,
+    val_mat4x4_: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+    val_f16_array_2_: array<f16, 2>,
+}
+
+struct LayoutTest {
+    scalar1_: f16,
+    scalar2_: f16,
+    v3_: vec3<f16>,
+    tuck_in: f16,
+    scalar4_: f16,
+    larger: u32,
+}
+
+const constant_variable: f16 = 15.203125h;
+
+var<private> private_variable: f16 = 1h;
+@group(0) @binding(0) 
+var<uniform> input_uniform: UniformCompatible;
+@group(0) @binding(1) 
+var<storage> input_storage: UniformCompatible;
+@group(0) @binding(2) 
+var<storage> input_arrays: StorageCompatible;
+@group(0) @binding(3) 
+var<storage, read_write> output: UniformCompatible;
+@group(0) @binding(4) 
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+    var val: f16 = 15.203125h;
+
+    let _e4 = val;
+    val = (_e4 + -33344h);
+    let _e6 = val;
+    let _e9 = val;
+    val = (_e9 + (_e6 + 5h));
+    let _e13 = input_uniform.val_f32_;
+    let _e14 = val;
+    let _e18 = val;
+    val = (_e18 + f16((_e13 + f32(_e14))));
+    let _e22 = input_uniform.val_f16_;
+    let _e25 = val;
+    val = (_e25 + vec3(_e22).z);
+    let _e31 = input_uniform.val_f16_;
+    let _e34 = input_storage.val_f16_;
+    output.val_f16_ = (_e31 + _e34);
+    let _e40 = input_uniform.val_f16_2_;
+    let _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = (_e40 + _e43);
+    let _e49 = input_uniform.val_f16_3_;
+    let _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = (_e49 + _e52);
+    let _e58 = input_uniform.val_f16_4_;
+    let _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = (_e58 + _e61);
+    let _e67 = input_uniform.val_mat2x2_;
+    let _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = (_e67 + _e70);
+    let _e76 = input_uniform.val_mat2x3_;
+    let _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = (_e76 + _e79);
+    let _e85 = input_uniform.val_mat2x4_;
+    let _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = (_e85 + _e88);
+    let _e94 = input_uniform.val_mat3x2_;
+    let _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = (_e94 + _e97);
+    let _e103 = input_uniform.val_mat3x3_;
+    let _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = (_e103 + _e106);
+    let _e112 = input_uniform.val_mat3x4_;
+    let _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = (_e112 + _e115);
+    let _e121 = input_uniform.val_mat4x2_;
+    let _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = (_e121 + _e124);
+    let _e130 = input_uniform.val_mat4x3_;
+    let _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = (_e130 + _e133);
+    let _e139 = input_uniform.val_mat4x4_;
+    let _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = (_e139 + _e142);
+    let _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    let _e149 = val;
+    let _e151 = val;
+    val = (_e151 + abs(_e149));
+    let _e153 = val;
+    let _e154 = val;
+    let _e155 = val;
+    let _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    let _e159 = val;
+    let _e161 = val;
+    let _e164 = val;
+    val = (_e164 + dot(vec2(_e159), vec2(_e161)));
+    let _e166 = val;
+    let _e167 = val;
+    let _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    let _e171 = val;
+    let _e172 = val;
+    let _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    let _e176 = val;
+    let _e178 = val;
+    val = (_e178 + sign(_e176));
+    let _e181 = val;
+    val = (_e181 + 1h);
+    let _e185 = input_uniform.val_f16_2_;
+    let float_vec2_ = vec2<f32>(_e185);
+    output.val_f16_2_ = vec2<f16>(float_vec2_);
+    let _e192 = input_uniform.val_f16_3_;
+    let float_vec3_ = vec3<f32>(_e192);
+    output.val_f16_3_ = vec3<f16>(float_vec3_);
+    let _e199 = input_uniform.val_f16_4_;
+    let float_vec4_ = vec4<f32>(_e199);
+    output.val_f16_4_ = vec4<f16>(float_vec4_);
+    let _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = mat2x2<f16>(mat2x2<f32>(_e208));
+    let _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = mat2x3<f16>(mat2x3<f32>(_e215));
+    let _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = mat2x4<f16>(mat2x4<f32>(_e222));
+    let _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = mat3x2<f16>(mat3x2<f32>(_e229));
+    let _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = mat3x3<f16>(mat3x3<f32>(_e236));
+    let _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = mat3x4<f16>(mat3x4<f32>(_e243));
+    let _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = mat4x2<f16>(mat4x2<f32>(_e250));
+    let _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = mat4x3<f16>(mat4x3<f32>(_e257));
+    let _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = mat4x4<f16>(mat4x4<f32>(_e264));
+    let _e267 = val;
+    return _e267;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    let _e3 = f16_function(2h);
+    output.final_value = _e3;
+    return;
+}
diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs
index f221c7896e..2d46e37f72 100644
--- a/naga/tests/spirv_capabilities.rs
+++ b/naga/tests/spirv_capabilities.rs
@@ -208,3 +208,8 @@ fn int64() {
         "#,
     );
 }
+
+#[test]
+fn float16() {
+    require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
+}
diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs
index 2dcd0588ca..34e1e18ece 100644
--- a/naga/tests/wgsl_errors.rs
+++ b/naga/tests/wgsl_errors.rs
@@ -3,9 +3,11 @@ Tests for the WGSL front end.
 */
 #![cfg(feature = "wgsl-in")]
 
+#[track_caller]
 fn check(input: &str, snapshot: &str) {
     let output = naga::front::wgsl::parse_str(input)
-        .expect_err("expected parser error")
+        .map(|_| panic!("expected parser error, but parsing succeeded!"))
+        .unwrap_err()
         .emit_to_string(input);
     if output != snapshot {
         for diff in diff::lines(snapshot, &output) {
@@ -19,6 +21,19 @@ fn check(input: &str, snapshot: &str) {
     }
 }
 
+#[track_caller]
+fn check_success(input: &str) {
+    match naga::front::wgsl::parse_str(input) {
+        Ok(_) => {}
+        Err(err) => {
+            panic!(
+                "expected success, but parsing failed with:\n{}",
+                err.emit_to_string(input)
+            );
+        }
+    }
+}
+
 #[test]
 fn very_negative_integers() {
     // wgpu#4492
@@ -824,6 +839,50 @@ fn matrix_constructor_inferred() {
     );
 }
 
+#[test]
+fn float16_requires_enable() {
+    check(
+        r#"
+            const a: f16 = 1.0;
+        "#,
+        r#"error: the `f16` language extension is not enabled
+  ┌─ wgsl:2:22
+  │
+2 │             const a: f16 = 1.0;
+  │                      ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+
+"#,
+    );
+
+    check(
+        r#"
+            const a = 1.0h;
+        "#,
+        r#"error: the `f16` language extension is not enabled
+  ┌─ wgsl:2:23
+  │
+2 │             const a = 1.0h;
+  │                       ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+
+"#,
+    );
+}
+
+#[test]
+fn multiple_enables_valid() {
+    check_success(
+        r#"
+            enable f16;
+            enable f16;
+            const a: f16 = 1.0h;
+        "#,
+    );
+}
+
 /// Check the result of validating a WGSL program against a pattern.
 ///
 /// Unless you are generating code programmatically, the
@@ -900,6 +959,7 @@ macro_rules! check_validation {
     }
 }
 
+#[track_caller]
 fn validation_error(
     source: &str,
     caps: naga::valid::Capabilities,
@@ -928,6 +988,53 @@ fn int64_capability() {
     }
 }
 
+#[test]
+fn float16_capability() {
+    check_validation! {
+        "enable f16; var input: f16;",
+        "enable f16; var input: vec2<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}),
+            ..
+        })
+    }
+}
+
+#[test]
+fn float16_in_push_constant() {
+    check_validation! {
+        "enable f16; var<push_constant> input: f16;",
+        "enable f16; var<push_constant> input: vec2<f16>;",
+        "enable f16; var<push_constant> input: mat4x4<f16>;",
+        "enable f16; struct S { a: f16 }; var<push_constant> input: S;",
+        "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var<push_constant> input: S2;":
+        Err(naga::valid::ValidationError::GlobalVariable {
+            source: naga::valid::GlobalVariableError::InvalidPushConstantType(
+                naga::valid::PushConstantError::InvalidScalar(
+                    naga::Scalar::F16
+                )
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT
+    }
+}
+
+#[test]
+fn float16_in_atomic() {
+    check_validation! {
+        "enable f16; var<storage> a: atomic<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::InvalidAtomicWidth(
+                naga::ScalarKind::Float,
+                2
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16
+    }
+}
+
 #[test]
 fn invalid_arrays() {
     check_validation! {
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
index 5ee691ba05..bc2659da8a 100644
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -43,6 +43,7 @@ cfg-if.workspace = true
 ctor.workspace = true
 futures-lite.workspace = true
 glam.workspace = true
+half = { workspace = true, features = ["bytemuck"] }
 itertools.workspace = true
 image.workspace = true
 libtest-mimic.workspace = true
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index 9a3bae0d40..15d4c5e5cf 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -143,8 +143,8 @@ impl ShaderTest {
             body,
             input_type: String::from("CustomStruct"),
             output_type: String::from("array<u32>"),
-            input_values: bytemuck::cast_slice(input_values).to_vec(),
-            output_values: vec![bytemuck::cast_slice(output_values).to_vec()],
+            input_values: bytemuck::pod_collect_to_vec(input_values),
+            output_values: vec![bytemuck::pod_collect_to_vec(output_values)],
             output_comparison_fn: Self::default_comparison_function::<O>,
             output_initialization: u32::MAX,
             failures: Backends::empty(),
diff --git a/tests/tests/shader/struct_layout.rs b/tests/tests/shader/struct_layout.rs
index 38a040fcad..25bd32bf77 100644
--- a/tests/tests/shader/struct_layout.rs
+++ b/tests/tests/shader/struct_layout.rs
@@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits};
 use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
 
+#[gpu_test]
+static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
+            .expect_fail(
+                FailureCase::backend(wgpu::Backends::VULKAN)
+                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
+            )
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Uniform,
+            create_struct_layout_tests(InputStorageType::Uniform),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_struct_layout_tests(InputStorageType::Storage),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_struct_layout_tests(InputStorageType::PushConstant),
+        )
+    });
+
 fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
 
@@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
     tests
 }
 
+#[gpu_test]
+static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
 fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
 
@@ -356,29 +461,26 @@ fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
 }
 
 #[gpu_test]
-static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
+            .features(Features::SHADER_F16)
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
-            .expect_fail(
-                FailureCase::backend(wgpu::Backends::VULKAN)
-                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
-            )
             .limits(Limits::downlevel_defaults()),
     )
     .run_async(|ctx| {
         shader_input_output_test(
             ctx,
-            InputStorageType::Uniform,
-            create_struct_layout_tests(InputStorageType::Uniform),
+            InputStorageType::Storage,
+            create_16bit_struct_layout_test(),
         )
     });
 
 #[gpu_test]
-static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
+            .features(Features::SHADER_F16)
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
             .limits(Limits::downlevel_defaults()),
     )
@@ -386,76 +488,205 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
         shader_input_output_test(
             ctx,
             InputStorageType::Storage,
-            create_struct_layout_tests(InputStorageType::Storage),
+            create_16bit_struct_layout_test(),
         )
     });
 
-#[gpu_test]
-static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_struct_layout_tests(InputStorageType::PushConstant),
-        )
-    });
+fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
+    let mut tests = Vec::new();
 
-#[gpu_test]
-static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    fn f16asu16(f32: f32) -> u16 {
+        half::f16::from_f32(f32).to_bits()
+    }
 
-#[gpu_test]
-static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    // 16 bit alignment tests
+    {
+        let members =
+            "scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.scalar1);
+            output[1] = u32(input.scalar2);
+            output[2] = u32(input.v3.x);
+            output[3] = u32(input.v3.y);
+            output[4] = u32(input.v3.z);
+            output[5] = u32(input.tuck_in);
+            output[6] = u32(input.scalar4);
+            output[7] = u32(extractBits(input.larger, 0u, 16u));
+            output[8] = u32(extractBits(input.larger, 16u, 16u));
+        ",
+        );
 
-#[gpu_test]
-static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+        tests.push(ShaderTest::new(
+            "f16 alignment".into(),
+            members.into(),
+            direct,
+            &[
+                f16asu16(0.0),
+                f16asu16(1.0),
+                f16asu16(2.0),
+                f16asu16(3.0),
+                f16asu16(4.0),
+                f16asu16(5.0),
+                f16asu16(6.0),
+                f16asu16(7.0),
+                f16asu16(8.0),
+                f16asu16(9.0),
+                10_u16,
+                11_u16,
+                // Some extra values to help debug if the test fails.
+                12_u16,
+                13_u16,
+                14_u16,
+                15_u16,
+                16_u16,
+                17_u16,
+                18_u16,
+                19_u16,
+                20_u16,
+            ],
+            &[
+                0, // scalar1
+                1, // scalar2
+                4, 5, 6,  // v3
+                7,  // tuck_in
+                8,  // scalar4
+                10, // larger[0..16]
+                11, // larger[16..32]
+            ],
+        ));
+    }
+
+    // Matrix tests
+    {
+        let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.m2[0].x);
+            output[1] = u32(input.m2[0].y);
+            output[2] = u32(input.m2[1].x);
+            output[3] = u32(input.m2[1].y);
+
+            output[4] = u32(input.m3[0].x);
+            output[5] = u32(input.m3[0].y);
+            output[6] = u32(input.m3[0].z);
+            output[7] = u32(input.m3[1].x);
+            output[8] = u32(input.m3[1].y);
+            output[9] = u32(input.m3[1].z);
+            output[10] = u32(input.m3[2].x);
+            output[11] = u32(input.m3[2].y);
+            output[12] = u32(input.m3[2].z);
+
+            output[13] = u32(input.m4[0].x);
+            output[14] = u32(input.m4[0].y);
+            output[15] = u32(input.m4[0].z);
+            output[16] = u32(input.m4[0].w);
+            output[17] = u32(input.m4[1].x);
+            output[18] = u32(input.m4[1].y);
+            output[19] = u32(input.m4[1].z);
+            output[20] = u32(input.m4[1].w);
+            output[21] = u32(input.m4[2].x);
+            output[22] = u32(input.m4[2].y);
+            output[23] = u32(input.m4[2].z);
+            output[24] = u32(input.m4[2].w);
+            output[25] = u32(input.m4[3].x);
+            output[26] = u32(input.m4[3].y);
+            output[27] = u32(input.m4[3].z);
+            output[28] = u32(input.m4[3].w);
+        ",
+        );
+
+        tests.push(ShaderTest::new(
+            "f16 matrix alignment".into(),
+            members.into(),
+            direct,
+            &(0..32).map(|x| f16asu16(x as f32)).collect::<Vec<_>>(),
+            &[
+                0, 1, // m2[0]
+                2, 3, // m2[1]
+                //
+                4, 5, 6, // m3[0]
+                8, 9, 10, // m3[1]
+                12, 13, 14, // m3[2]
+                //
+                16, 17, 18, 19, // m4[0]
+                20, 21, 22, 23, // m4[1]
+                24, 25, 26, 27, // m4[2]
+                28, 29, 30, 31, // m4[3]
+            ],
+        ));
+    }
+
+    // // Nested struct and array test.
+    // //
+    // // This tries to exploit all the weird edge cases of the struct layout algorithm.
+    // // We dont go as all-out as the other nested struct test because
+    // // all our primitives are twice as wide and we have only so much buffer to spare.
+    // {
+    //     let header = String::from(
+    //         "struct Inner { scalar: u64, scalar32: u32, member: array<vec3<u64>, 2> }",
+    //     );
+    //     let members = String::from("inner: Inner");
+    //     let direct = String::from(
+    //         "\
+
+    //     ",
+    //     );
+
+    //     tests.push(
+    //         ShaderTest::new(
+    //             String::from("nested struct and array"),
+    //             members,
+    //             direct,
+    //             &input_values,
+    //             &[
+    //                 0, 1, // inner.scalar
+    //                 2, // inner.scalar32
+    //                 8, 9, 10, 11, 12, 13, // inner.member[0]
+    //                 16, 17, 18, 19, 20, 21, // inner.member[1]
+    //             ],
+    //         )
+    //         .header(header),
+    //     );
+    // }
+    // {
+    //     let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }");
+    //     let members = String::from("inner: Inner, vector: vec3<i64>");
+    //     let direct = String::from(
+    //         "\
+    //         output[0] = bitcast<u32>(input.inner.scalar32);
+    //         output[1] = u32(bitcast<u64>(input.inner.scalar) & 0xFFFFFFFF);
+    //         output[2] = u32((bitcast<u64>(input.inner.scalar) >> 32) & 0xFFFFFFFF);
+    //         output[3] = bitcast<u32>(input.inner.scalar32_2);
+    //         output[4] = u32(bitcast<u64>(input.vector.x) & 0xFFFFFFFF);
+    //         output[5] = u32((bitcast<u64>(input.vector.x) >> 32) & 0xFFFFFFFF);
+    //         output[6] = u32(bitcast<u64>(input.vector.y) & 0xFFFFFFFF);
+    //         output[7] = u32((bitcast<u64>(input.vector.y) >> 32) & 0xFFFFFFFF);
+    //         output[8] = u32(bitcast<u64>(input.vector.z) & 0xFFFFFFFF);
+    //         output[9] = u32((bitcast<u64>(input.vector.z) >> 32) & 0xFFFFFFFF);
+    //     ",
+    //     );
+
+    //     tests.push(
+    //         ShaderTest::new(
+    //             String::from("nested struct and array"),
+    //             members,
+    //             direct,
+    //             &input_values,
+    //             &[
+    //                 0, // inner.scalar32
+    //                 2, 3, // inner.scalar
+    //                 4, // inner.scalar32_2
+    //                 8, 9, 10, 11, 12, 13, // vector
+    //             ],
+    //         )
+    //         .header(header),
+    //     );
+    // }
+
+    // Insert `enable f16;` header
+    tests
+        .into_iter()
+        .map(|test| test.header("enable f16;".into()))
+        .collect()
+}
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 5bb1aad6f2..78fb02566c 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -383,6 +383,10 @@ pub fn create_validator(
         features.contains(wgt::Features::PUSH_CONSTANTS),
     );
     caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
+    caps.set(
+        Caps::SHADER_FLOAT16,
+        features.contains(wgt::Features::SHADER_F16),
+    );
     caps.set(
         Caps::PRIMITIVE_INDEX,
         features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index bc5c943d88..34438c6eb9 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -1,5 +1,5 @@
 use std::{
-    mem::{size_of, size_of_val},
+    mem::{self, size_of, size_of_val},
     ptr,
     string::String,
     sync::Arc,
@@ -432,6 +432,24 @@ impl super::Adapter {
                 && features1.Int64ShaderOps.as_bool(),
         );
 
+        let float16_supported = {
+            let mut features4: Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4 =
+                unsafe { mem::zeroed() };
+            let hr = unsafe {
+                device.CheckFeatureSupport(
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax
+                    ptr::from_mut(&mut features4).cast(),
+                    size_of::<Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4>() as _,
+                )
+            };
+            hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool()
+        };
+
+        features.set(
+            wgt::Features::SHADER_F16,
+            shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported,
+        );
+
         features.set(
             wgt::Features::TEXTURE_INT64_ATOMIC,
             shader_model >= naga::back::hlsl::ShaderModel::V6_6
@@ -619,7 +637,7 @@ impl crate::Adapter for super::Adapter {
 
     unsafe fn open(
         &self,
-        _features: wgt::Features,
+        features: wgt::Features,
         limits: &wgt::Limits,
         memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -640,6 +658,7 @@ impl crate::Adapter for super::Adapter {
         let device = super::Device::new(
             self.device.clone(),
             queue.clone(),
+            features,
             limits,
             memory_hints,
             self.private_caps,
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index a5fbe283db..9aa8573ad0 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -34,9 +34,11 @@ use crate::{
 const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0";
 
 impl super::Device {
+    #[allow(clippy::too_many_arguments)]
     pub(super) fn new(
         raw: Direct3D12::ID3D12Device,
         present_queue: Direct3D12::ID3D12CommandQueue,
+        features: wgt::Features,
         limits: &wgt::Limits,
         memory_hints: &wgt::MemoryHints,
         private_caps: super::PrivateCapabilities,
@@ -179,6 +181,7 @@ impl super::Device {
                 event: Event::create(false, false)?,
             },
             private_caps,
+            features,
             shared: Arc::new(shared),
             rtv_pool: Mutex::new(rtv_pool),
             dsv_pool: Mutex::new(descriptor::CpuPool::new(
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 99e29e87c9..151f109a51 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -629,6 +629,7 @@ pub struct Device {
     present_queue: Direct3D12::ID3D12CommandQueue,
     idler: Idler,
     private_caps: PrivateCapabilities,
+    features: wgt::Features,
     shared: Arc<DeviceShared>,
     // CPU only pools
     rtv_pool: Mutex<descriptor::CpuPool>,
diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs
index f9f3a6b740..e12784605d 100644
--- a/wgpu-hal/src/dx12/shader_compilation.rs
+++ b/wgpu-hal/src/dx12/shader_compilation.rs
@@ -268,7 +268,7 @@ pub(super) fn compile_dxc(
     let raw_ep = OPCWSTR::new(raw_ep);
     let full_stage = OPCWSTR::new(full_stage);
 
-    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 12>::new_const();
+    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 13>::new_const();
 
     if let Some(source_name) = source_name.as_ref() {
         compile_args.push(source_name.ptr())
@@ -298,6 +298,10 @@ pub(super) fn compile_dxc(
         compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS);
     }
 
+    if device.features.contains(wgt::Features::SHADER_F16) {
+        compile_args.push(windows::core::w!("-enable-16bit-types"));
+    }
+
     let buffer = Dxc::DxcBuffer {
         Ptr: source.as_ptr().cast(),
         Size: source.len(),
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 23b7cdd94b..08cba45ce0 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -378,6 +378,7 @@ impl PhysicalDeviceFeatures {
                     vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
+                        .storage_input_output16(true)
                         .uniform_and_storage_buffer16_bit_access(true),
                 ))
             } else {
@@ -668,7 +669,8 @@ impl PhysicalDeviceFeatures {
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0
+                    && bit16.storage_input_output16 != 0,
             );
         }
 
@@ -1846,6 +1848,10 @@ impl super::Adapter {
                 capabilities.push(spv::Capability::Int64);
             }
 
+            if features.contains(wgt::Features::SHADER_F16) {
+                capabilities.push(spv::Capability::Float16);
+            }
+
             if features.intersects(
                 wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
                     | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX