From 9c0628039c615953d362f45622eaf5428dd71793 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 19 Feb 2025 21:50:37 -0500 Subject: [PATCH] Implement F16 Support Co-authored-by: FL33TW00D Co-authored-by: ErichDonGubler --- Cargo.lock | 24 + Cargo.toml | 2 + naga/Cargo.toml | 11 +- naga/src/back/glsl/mod.rs | 3 + naga/src/back/hlsl/writer.rs | 1 + naga/src/back/msl/writer.rs | 33 +- naga/src/back/spv/block.rs | 386 +++++--- naga/src/back/spv/instructions.rs | 4 + naga/src/back/spv/writer.rs | 26 + naga/src/back/wgsl/writer.rs | 45 + naga/src/front/glsl/error.rs | 12 +- naga/src/front/glsl/offset.rs | 24 +- naga/src/front/glsl/types.rs | 5 + naga/src/front/spv/mod.rs | 5 + naga/src/front/wgsl/error.rs | 16 +- naga/src/front/wgsl/lower/mod.rs | 1 + naga/src/front/wgsl/parse/conv.rs | 27 +- .../wgsl/parse/directive/enable_extension.rs | 45 +- naga/src/front/wgsl/parse/lexer.rs | 40 +- naga/src/front/wgsl/parse/mod.rs | 189 +++- naga/src/front/wgsl/parse/number.rs | 23 +- naga/src/lib.rs | 2 + naga/src/proc/constant_evaluator.rs | 91 +- naga/src/proc/mod.rs | 4 + naga/src/proc/type_methods.rs | 4 + naga/src/valid/expression.rs | 2 +- naga/src/valid/interface.rs | 7 +- naga/src/valid/mod.rs | 4 +- naga/src/valid/type.rs | 71 +- naga/tests/in/extra.wgsl | 2 +- naga/tests/in/f16.param.ron | 24 + naga/tests/in/f16.wgsl | 127 +++ naga/tests/in/glsl/f16-glsl.comp | 57 ++ naga/tests/in/glsl/f16-glsl.param.ron | 3 + naga/tests/in/spv/f16-spv.comp | 57 ++ naga/tests/in/spv/f16-spv.param.ron | 3 + naga/tests/in/spv/f16-spv.spv | Bin 0 -> 2268 bytes naga/tests/in/spv/f16-spv.spvasm | 130 +++ naga/tests/out/hlsl/f16.hlsl | 351 +++++++ naga/tests/out/hlsl/f16.ron | 12 + naga/tests/out/msl/extra.msl | 2 +- naga/tests/out/msl/f16.msl | 177 ++++ .../spv/atomicCompareExchange-int64.spvasm | 164 ++-- naga/tests/out/spv/extra.spvasm | 118 ++- naga/tests/out/spv/f16.spvasm | 633 +++++++++++++ naga/tests/out/spv/image.spvasm | 896 +++++++++--------- naga/tests/out/wgsl/extra.wgsl | 2 +- naga/tests/out/wgsl/f16-glsl.comp.wgsl | 47 + naga/tests/out/wgsl/f16-spv.wgsl | 46 + naga/tests/out/wgsl/f16.wgsl | 167 ++++ naga/tests/spirv_capabilities.rs | 5 + naga/tests/wgsl_errors.rs | 109 ++- tests/Cargo.toml | 1 + tests/tests/shader/mod.rs | 4 +- tests/tests/shader/struct_layout.rs | 383 ++++++-- wgpu-core/src/device/mod.rs | 4 + wgpu-hal/src/dx12/adapter.rs | 23 +- wgpu-hal/src/dx12/device.rs | 3 + wgpu-hal/src/dx12/mod.rs | 1 + wgpu-hal/src/dx12/shader_compilation.rs | 6 +- wgpu-hal/src/vulkan/adapter.rs | 8 +- 61 files changed, 3746 insertions(+), 926 deletions(-) create mode 100644 naga/tests/in/f16.param.ron create mode 100644 naga/tests/in/f16.wgsl create mode 100644 naga/tests/in/glsl/f16-glsl.comp create mode 100644 naga/tests/in/glsl/f16-glsl.param.ron create mode 100644 naga/tests/in/spv/f16-spv.comp create mode 100644 naga/tests/in/spv/f16-spv.param.ron create mode 100644 naga/tests/in/spv/f16-spv.spv create mode 100644 naga/tests/in/spv/f16-spv.spvasm create mode 100644 naga/tests/out/hlsl/f16.hlsl create mode 100644 naga/tests/out/hlsl/f16.ron create mode 100644 naga/tests/out/msl/f16.msl create mode 100644 naga/tests/out/spv/f16.spvasm create mode 100644 naga/tests/out/wgsl/f16-glsl.comp.wgsl create mode 100644 naga/tests/out/wgsl/f16-spv.wgsl create mode 100644 naga/tests/out/wgsl/f16.wgsl diff --git a/Cargo.lock b/Cargo.lock index 5e2422cc54..84c28f3c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1757,6 +1757,20 @@ dependencies = [ "crunchy", ] +[[package]] +name = "half-2" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864" +dependencies = [ + "arbitrary", + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "serde", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -2137,6 +2151,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.1.3" @@ -2337,12 +2357,14 @@ dependencies = [ "codespan-reporting", "diff", "env_logger", + "half-2", "hashbrown", "hexf-parse", "hlsl-snapshots", "indexmap", "itertools 0.13.0", "log", + "num-traits", "petgraph 0.7.1", "pp-rs", "ron", @@ -2517,6 +2539,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -4704,6 +4727,7 @@ dependencies = [ "env_logger", "futures-lite", "glam", + "half-2", "image", "itertools 0.13.0", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 61d9c74a3a..c86bef44af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,8 @@ flume = "0.11" futures-lite = "2" getrandom = "0.2" glam = "0.29" +# TODO: Use `half` directly again after is released upstream. +half = { package = "half-2", version = "2.4.1" } hashbrown = { version = "0.14.5", default-features = false, features = [ "ahash", "inline-more", diff --git a/naga/Cargo.toml b/naga/Cargo.toml index e0f609e787..a84ab9d2ba 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -41,10 +41,17 @@ msl-out = [] ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`. msl-out-if-target-apple = [] -serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"] +serialize = [ + "dep:serde", + "bitflags/serde", + "half/serde", + "hashbrown/serde", + "indexmap/serde", +] deserialize = [ "dep:serde", "bitflags/serde", + "half/serde", "hashbrown/serde", "indexmap/serde", ] @@ -84,9 +91,11 @@ termcolor = { version = "1.4.1" } # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e codespan-reporting = { version = "0.11.0" } hashbrown.workspace = true +half = { workspace = true, features = ["arbitrary", "num-traits"] } rustc-hash.workspace = true indexmap.workspace = true log = "0.4" +num-traits = "0.2" strum = { workspace = true, optional = true } spirv = { version = "0.3", optional = true } thiserror.workspace = true diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index 0798fac82d..3c93084a2f 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2669,6 +2669,9 @@ impl<'a, W: Write> Writer<'a, W> { // decimal part even it's zero which is needed for a valid glsl float constant crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?, crate::Literal::F32(value) => write!(self.out, "{value:?}")?, + crate::Literal::F16(_) => { + return Err(Error::Custom("GLSL has no 16-bit float type".into())); + } // Unsigned integers need a `u` at the end // // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index a8283388ce..eac1cae574 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -2634,6 +2634,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // decimal part even it's zero crate::Literal::F64(value) => write!(self.out, "{value:?}L")?, crate::Literal::F32(value) => write!(self.out, "{value:?}")?, + crate::Literal::F16(value) => write!(self.out, "{value:?}h")?, crate::Literal::U32(value) => write!(self.out, "{value}u")?, // HLSL has no suffix for explicit i32 literals, but not using any suffix // makes the type ambiguous which prevents overload resolution from diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index bc77a7f6d0..0fe26270f8 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -5,6 +5,8 @@ use crate::{ proc::{self, index, ExpressionKindTracker, NameKey, TypeResolution}, valid, FastHashMap, FastHashSet, }; +use half::f16; +use num_traits::real::Real; #[cfg(test)] use std::ptr; use std::{ @@ -174,9 +176,11 @@ impl Display for TypeContext<'_> { write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name()) } crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]), - crate::TypeInner::Matrix { columns, rows, .. } => { - put_numeric_type(out, crate::Scalar::F32, &[rows, columns]) - } + crate::TypeInner::Matrix { + columns, + rows, + scalar, + } => put_numeric_type(out, scalar, &[rows, columns]), crate::TypeInner::Pointer { base, space } => { let sub = Self { handle: base, @@ -413,8 +417,12 @@ impl crate::Scalar { match self { Self { kind: Sk::Float, - width: _, + width: 4, } => "float", + Self { + kind: Sk::Float, + width: 2, + } => "half", Self { kind: Sk::Sint, width: 4, @@ -471,7 +479,7 @@ fn should_pack_struct_member( match *ty_inner { crate::TypeInner::Vector { size: crate::VectorSize::Tri, - scalar: scalar @ crate::Scalar { width: 4, .. }, + scalar: scalar @ crate::Scalar { width: 4 | 2, .. }, } if is_tight => Some(scalar), _ => None, } @@ -1446,6 +1454,21 @@ impl Writer { crate::Literal::F64(_) => { return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64)) } + crate::Literal::F16(value) => { + if value.is_infinite() { + let sign = if value.is_sign_negative() { "-" } else { "" }; + write!(self.out, "{sign}INFINITY")?; + } else if value.is_nan() { + write!(self.out, "NAN")?; + } else { + let suffix = if value.fract() == f16::from_f32(0.0) { + ".0h" + } else { + "h" + }; + write!(self.out, "{value}{suffix}")?; + } + } crate::Literal::F32(value) => { if value.is_infinite() { let sign = if value.is_sign_negative() { "-" } else { "" }; diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 6b26c1c2aa..9c85608b70 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -8,6 +8,7 @@ use super::{ WriterFlags, }; use crate::{arena::Handle, proc::index::GuardedIndex, Statement}; +use arrayvec::ArrayVec; use spirv::Word; fn get_dimension(type_inner: &crate::TypeInner) -> Dimension { @@ -1476,159 +1477,7 @@ impl BlockContext<'_> { expr, kind, convert, - } => { - use crate::ScalarKind as Sk; - - let expr_id = self.cached[expr]; - let (src_scalar, src_size, is_matrix) = - match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) { - crate::TypeInner::Scalar(scalar) => (scalar, None, false), - crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false), - crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true), - ref other => { - log::error!("As source {:?}", other); - return Err(Error::Validation("Unexpected Expression::As source")); - } - }; - - enum Cast { - Identity, - Unary(spirv::Op), - Binary(spirv::Op, Word), - Ternary(spirv::Op, Word, Word), - } - - let cast = if is_matrix { - // we only support identity casts for matrices - Cast::Unary(spirv::Op::CopyObject) - } else { - match (src_scalar.kind, kind, convert) { - // Filter out identity casts. Some Adreno drivers are - // confused by no-op OpBitCast instructions. - (src_kind, kind, convert) - if src_kind == kind - && convert.filter(|&width| width != src_scalar.width).is_none() => - { - Cast::Identity - } - (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject), - (_, _, None) => Cast::Unary(spirv::Op::Bitcast), - // casting to a bool - generate `OpXxxNotEqual` - (_, Sk::Bool, Some(_)) => { - let op = match src_scalar.kind { - Sk::Sint | Sk::Uint => spirv::Op::INotEqual, - Sk::Float => spirv::Op::FUnordNotEqual, - Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(), - }; - let zero_scalar_id = - self.writer.get_constant_scalar_with(0, src_scalar)?; - let zero_id = match src_size { - Some(size) => { - let ty = LocalType::Numeric(NumericType::Vector { - size, - scalar: src_scalar, - }) - .into(); - - self.temp_list.clear(); - self.temp_list.resize(size as _, zero_scalar_id); - - self.writer.get_constant_composite(ty, &self.temp_list) - } - None => zero_scalar_id, - }; - - Cast::Binary(op, zero_id) - } - // casting from a bool - generate `OpSelect` - (Sk::Bool, _, Some(dst_width)) => { - let dst_scalar = crate::Scalar { - kind, - width: dst_width, - }; - let zero_scalar_id = - self.writer.get_constant_scalar_with(0, dst_scalar)?; - let one_scalar_id = - self.writer.get_constant_scalar_with(1, dst_scalar)?; - let (accept_id, reject_id) = match src_size { - Some(size) => { - let ty = LocalType::Numeric(NumericType::Vector { - size, - scalar: dst_scalar, - }) - .into(); - - self.temp_list.clear(); - self.temp_list.resize(size as _, zero_scalar_id); - - let vec0_id = - self.writer.get_constant_composite(ty, &self.temp_list); - - self.temp_list.fill(one_scalar_id); - - let vec1_id = - self.writer.get_constant_composite(ty, &self.temp_list); - - (vec1_id, vec0_id) - } - None => (one_scalar_id, zero_scalar_id), - }; - - Cast::Ternary(spirv::Op::Select, accept_id, reject_id) - } - (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU), - (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS), - (Sk::Float, Sk::Float, Some(dst_width)) - if src_scalar.width != dst_width => - { - Cast::Unary(spirv::Op::FConvert) - } - (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF), - (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::SConvert) - } - (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF), - (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::UConvert) - } - (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::SConvert) - } - (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::UConvert) - } - // We assume it's either an identity cast, or int-uint. - _ => Cast::Unary(spirv::Op::Bitcast), - } - }; - - let id = self.gen_id(); - let instruction = match cast { - Cast::Identity => None, - Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)), - Cast::Binary(op, operand) => Some(Instruction::binary( - op, - result_type_id, - id, - expr_id, - operand, - )), - Cast::Ternary(op, op1, op2) => Some(Instruction::ternary( - op, - result_type_id, - id, - expr_id, - op1, - op2, - )), - }; - if let Some(instruction) = instruction { - block.body.push(instruction); - id - } else { - expr_id - } - } + } => self.write_as_expression(expr, convert, kind, block, result_type_id)?, crate::Expression::ImageLoad { image, coordinate, @@ -1784,6 +1633,237 @@ impl BlockContext<'_> { Ok(()) } + /// Helper which focuses on generating the `As` expressions and the various conversions + /// that need to happen because of that. + fn write_as_expression( + &mut self, + expr: Handle, + convert: Option, + kind: crate::ScalarKind, + + block: &mut Block, + result_type_id: u32, + ) -> Result { + use crate::ScalarKind as Sk; + let expr_id = self.cached[expr]; + let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types); + + // Matrix casts needs special treatment in SPIR-V, as the cast functions + // can take vectors or scalars, but not matrices. In order to cast a matrix + // we need to cast each column of the matrix individually and construct a new + // matrix from the converted columns. + if let crate::TypeInner::Matrix { + columns, + rows, + scalar, + } = *ty + { + let Some(convert) = convert else { + // No conversion needs to be done, passes through. + return Ok(expr_id); + }; + + if convert == scalar.width { + // No conversion needs to be done, passes through. + return Ok(expr_id); + } + + if kind != Sk::Float { + // Only float conversions are supported for matrices. + return Err(Error::Validation("Matrices must be floats")); + } + + // Type of each extracted column + let column_src_ty = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: rows, + scalar, + }))); + + // Type of the column after conversion + let column_dst_ty = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: rows, + scalar: crate::Scalar { + kind, + width: convert, + }, + }))); + + let mut components = ArrayVec::::new(); + + for column in 0..columns as usize { + let column_id = self.gen_id(); + block.body.push(Instruction::composite_extract( + column_src_ty, + column_id, + expr_id, + &[column as u32], + )); + + let column_conv_id = self.gen_id(); + block.body.push(Instruction::unary( + spirv::Op::FConvert, + column_dst_ty, + column_conv_id, + column_id, + )); + + components.push(column_conv_id); + } + + let construct_id = self.gen_id(); + + block.body.push(Instruction::composite_construct( + result_type_id, + construct_id, + &components, + )); + + return Ok(construct_id); + } + + let (src_scalar, src_size) = match *ty { + crate::TypeInner::Scalar(scalar) => (scalar, None), + crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)), + ref other => { + log::error!("As source {:?}", other); + return Err(Error::Validation("Unexpected Expression::As source")); + } + }; + + enum Cast { + Identity, + Unary(spirv::Op), + Binary(spirv::Op, Word), + Ternary(spirv::Op, Word, Word), + } + let cast = match (src_scalar.kind, kind, convert) { + // Filter out identity casts. Some Adreno drivers are + // confused by no-op OpBitCast instructions. + (src_kind, kind, convert) + if src_kind == kind + && convert.filter(|&width| width != src_scalar.width).is_none() => + { + Cast::Identity + } + (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject), + (_, _, None) => Cast::Unary(spirv::Op::Bitcast), + // casting to a bool - generate `OpXxxNotEqual` + (_, Sk::Bool, Some(_)) => { + let op = match src_scalar.kind { + Sk::Sint | Sk::Uint => spirv::Op::INotEqual, + Sk::Float => spirv::Op::FUnordNotEqual, + Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(), + }; + let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?; + let zero_id = match src_size { + Some(size) => { + let ty = LocalType::Numeric(NumericType::Vector { + size, + scalar: src_scalar, + }) + .into(); + + self.temp_list.clear(); + self.temp_list.resize(size as _, zero_scalar_id); + + self.writer.get_constant_composite(ty, &self.temp_list) + } + None => zero_scalar_id, + }; + + Cast::Binary(op, zero_id) + } + // casting from a bool - generate `OpSelect` + (Sk::Bool, _, Some(dst_width)) => { + let dst_scalar = crate::Scalar { + kind, + width: dst_width, + }; + let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?; + let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?; + let (accept_id, reject_id) = match src_size { + Some(size) => { + let ty = LocalType::Numeric(NumericType::Vector { + size, + scalar: dst_scalar, + }) + .into(); + + self.temp_list.clear(); + self.temp_list.resize(size as _, zero_scalar_id); + + let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list); + + self.temp_list.fill(one_scalar_id); + + let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list); + + (vec1_id, vec0_id) + } + None => (one_scalar_id, zero_scalar_id), + }; + + Cast::Ternary(spirv::Op::Select, accept_id, reject_id) + } + (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU), + (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS), + (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::FConvert) + } + (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF), + (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF), + (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } + (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } + // We assume it's either an identity cast, or int-uint. + _ => Cast::Unary(spirv::Op::Bitcast), + }; + Ok(match cast { + Cast::Identity => expr_id, + Cast::Unary(op) => { + let id = self.gen_id(); + block + .body + .push(Instruction::unary(op, result_type_id, id, expr_id)); + id + } + Cast::Binary(op, operand) => { + let id = self.gen_id(); + block.body.push(Instruction::binary( + op, + result_type_id, + id, + expr_id, + operand, + )); + id + } + Cast::Ternary(op, op1, op2) => { + let id = self.gen_id(); + block.body.push(Instruction::ternary( + op, + result_type_id, + id, + expr_id, + op1, + op2, + )); + id + } + }) + } + /// Build an `OpAccessChain` instruction. /// /// Emit any needed bounds-checking expressions to `block`. diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs index 57bfb4e9f3..bb4b4f98c1 100644 --- a/naga/src/back/spv/instructions.rs +++ b/naga/src/back/spv/instructions.rs @@ -406,6 +406,10 @@ impl super::Instruction { instruction } + pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self { + Self::constant(result_type_id, id, &[low]) + } + pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self { Self::constant(result_type_id, id, &[value]) } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index cc0c227bec..56f9985790 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -1049,6 +1049,15 @@ impl Writer { if bits == 64 { self.capabilities_used.insert(spirv::Capability::Float64); } + if bits == 16 { + self.capabilities_used.insert(spirv::Capability::Float16); + self.capabilities_used + .insert(spirv::Capability::StorageBuffer16BitAccess); + self.capabilities_used + .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess); + self.capabilities_used + .insert(spirv::Capability::StorageInputOutput16); + } Instruction::type_float(id, bits) } Sk::Bool => Instruction::type_bool(id), @@ -1117,6 +1126,19 @@ impl Writer { )?; self.use_extension("SPV_EXT_shader_atomic_float_add"); } + // 16 bit floating-point support requires Float16 capability + crate::TypeInner::Matrix { + scalar: crate::Scalar::F16, + .. + } + | crate::TypeInner::Vector { + scalar: crate::Scalar::F16, + .. + } + | crate::TypeInner::Scalar(crate::Scalar::F16) => { + self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?; + self.use_extension("SPV_KHR_16bit_storage"); + } _ => {} } Ok(()) @@ -1410,6 +1432,10 @@ impl Writer { Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32) } crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()), + crate::Literal::F16(value) => { + let low = value.to_bits(); + Instruction::constant_16bit(type_id, id, low as u32) + } crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value), crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32), crate::Literal::U64(value) => { diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index a7cd8f95c9..cbcf107985 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -1,3 +1,5 @@ +use hashbrown::HashSet; + use super::Error; use crate::back::wgsl::polyfill::InversePolyfill; use crate::{ @@ -125,6 +127,9 @@ impl Writer { } } + // Write all `enable` declarations + self.write_enable_declarations(module)?; + // Write all structs for (handle, ty) in module.types.iter() { if let TypeInner::Struct { ref members, .. } = ty.inner { @@ -217,6 +222,41 @@ impl Writer { Ok(()) } + /// Helper method which writes all the `enable` declarations + /// needed for a module. + fn write_enable_declarations(&mut self, module: &Module) -> BackendResult { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + enum WrittenDeclarations { + F16, + } + + let mut written_declarations = HashSet::new(); + + // Write all the `enable` declarations + for (_, ty) in module.types.iter() { + match ty.inner { + TypeInner::Scalar(scalar) + | TypeInner::Vector { scalar, .. } + | TypeInner::Matrix { scalar, .. } => { + if scalar == crate::Scalar::F16 + && !written_declarations.contains(&WrittenDeclarations::F16) + { + writeln!(self.out, "enable f16;")?; + written_declarations.insert(WrittenDeclarations::F16); + } + } + _ => {} + } + } + + if !written_declarations.is_empty() { + // Empty line for readability + writeln!(self.out)?; + } + + Ok(()) + } + /// Helper method used to write struct name /// /// # Notes @@ -1262,6 +1302,7 @@ impl Writer { match expressions[expr] { Expression::Literal(literal) => match literal { + crate::Literal::F16(value) => write!(self.out, "{value}h")?, crate::Literal::F32(value) => write!(self.out, "{value}f")?, crate::Literal::U32(value) => write!(self.out, "{value}u")?, crate::Literal::I32(value) => { @@ -2026,6 +2067,10 @@ const fn scalar_kind_str(scalar: crate::Scalar) -> &'static str { kind: Sk::Float, width: 4, } => "f32", + Scalar { + kind: Sk::Float, + width: 2, + } => "f16", Scalar { kind: Sk::Sint, width: 4, diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs index 92962db00d..201ad5333d 100644 --- a/naga/src/front/glsl/error.rs +++ b/naga/src/front/glsl/error.rs @@ -102,9 +102,15 @@ pub enum ErrorKind { /// Unsupported matrix of the form matCx2 /// /// Our IR expects matrices of the form matCx2 to have a stride of 8 however - /// matrices in the std140 layout have a stride of at least 16 - #[error("unsupported matrix of the form matCx2 in std140 block layout")] - UnsupportedMatrixTypeInStd140, + /// matrices in the std140 layout have a stride of at least 16. + #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")] + UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 }, + /// Unsupported matrix of the form f16matCxR + /// + /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count, + /// however matrices in the std140 layout have a stride of at least 16. + #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")] + UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 }, /// A variable with the same name already exists in the current scope. #[error("Variable already declared: {0}")] VariableAlreadyDeclared(String), diff --git a/naga/src/front/glsl/offset.rs b/naga/src/front/glsl/offset.rs index 6e8d5ada10..d3d29ca428 100644 --- a/naga/src/front/glsl/offset.rs +++ b/naga/src/front/glsl/offset.rs @@ -120,11 +120,25 @@ pub fn calculate_offset( } // See comment on the error kind - if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi { - errors.push(Error { - kind: ErrorKind::UnsupportedMatrixTypeInStd140, - meta, - }); + if StructLayout::Std140 == layout { + // Do the f16 test first, as it's more specific + if scalar == Scalar::F16 { + errors.push(Error { + kind: ErrorKind::UnsupportedF16MatrixInStd140 { + columns: columns as u8, + rows: rows as u8, + }, + meta, + }); + } + if rows == crate::VectorSize::Bi { + errors.push(Error { + kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 { + columns: columns as u8, + }, + meta, + }); + } } (align, align * columns as u32) diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs index ad5e188fd9..d4bd5a69fa 100644 --- a/naga/src/front/glsl/types.rs +++ b/naga/src/front/glsl/types.rs @@ -10,6 +10,10 @@ pub fn parse_type(type_name: &str) -> Option { name: None, inner: TypeInner::Scalar(Scalar::BOOL), }), + "float16_t" => Some(Type { + name: None, + inner: TypeInner::Scalar(Scalar::F16), + }), "float" => Some(Type { name: None, inner: TypeInner::Scalar(Scalar::F32), @@ -40,6 +44,7 @@ pub fn parse_type(type_name: &str) -> Option { "i" => Scalar::I32, "u" => Scalar::U32, "d" => Scalar::F64, + "f16" => Scalar::F16, _ => return None, }) } diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index b8087fc8b0..473011dbf2 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -36,6 +36,7 @@ mod null; use convert::*; pub use error::Error; use function::*; +use half::f16; use crate::{ arena::{Arena, Handle, UniqueArena}, @@ -80,6 +81,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[ "SPV_KHR_vulkan_memory_model", "SPV_KHR_multiview", "SPV_EXT_shader_atomic_float_add", + "SPV_KHR_16bit_storage", ]; pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"]; @@ -5598,6 +5600,9 @@ impl> Frontend { }) => { let low = self.next()?; match width { + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal + // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word. + 2 => crate::Literal::F16(f16::from_bits(low as u16)), 4 => crate::Literal::F32(f32::from_bits(low)), 8 => { inst.expect(5)?; diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index 7bdbf12d2c..504c0a1f90 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -146,8 +146,6 @@ pub enum NumberError { Invalid, #[error("numeric literal not representable by target type")] NotRepresentable, - #[error("unimplemented f16 type")] - UnimplementedF16, } #[derive(Copy, Clone, Debug, PartialEq)] @@ -1008,19 +1006,18 @@ impl<'a> Error<'a> { )], }, Error::EnableExtensionNotEnabled { kind, span } => ParseError { - message: format!("`{}` enable-extension is not enabled", kind.to_ident()), + message: format!("the `{}` language extension is not enabled", kind.to_ident()), labels: vec![( span, format!( concat!( - "the `{}` enable-extension is needed for this functionality, ", - "but it is not currently enabled" + "the `{0}` language extension is needed for this functionality, ", + "but it is not currently enabled." ), kind.to_ident() ) .into(), )], - #[allow(irrefutable_let_patterns)] notes: if let EnableExtension::Unimplemented(kind) = kind { vec![format!( concat!( @@ -1032,7 +1029,12 @@ impl<'a> Error<'a> { kind.tracking_issue_num() )] } else { - vec![] + vec![ + format!( + "You can enable this extension by adding `enable {};` at the top of the shader.", + kind.to_ident() + ), + ] }, }, Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError { diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index 7cd3ef90f8..14b969a685 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -1886,6 +1886,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { let expr: Typed = match *expr { ast::Expression::Literal(literal) => { let literal = match literal { + ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f), ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f), ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i), ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u), diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs index 00c19d877d..1e07ff39e6 100644 --- a/naga/src/front/wgsl/parse/conv.rs +++ b/naga/src/front/wgsl/parse/conv.rs @@ -1,4 +1,7 @@ use super::Error; +use crate::front::wgsl::parse::directive::enable_extension::{ + EnableExtensions, ImplementedEnableExtension, +}; use crate::front::wgsl::Scalar; use crate::Span; @@ -112,10 +115,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result Option { +pub fn get_scalar_type( + enable_extensions: &EnableExtensions, + span: Span, + word: &str, +) -> Result, Error<'static>> { use crate::ScalarKind as Sk; - match word { - // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }), + let scalar = match word { + "f16" => Some(Scalar { + kind: Sk::Float, + width: 2, + }), "f32" => Some(Scalar { kind: Sk::Float, width: 4, @@ -145,7 +155,18 @@ pub fn get_scalar_type(word: &str) -> Option { width: crate::BOOL_WIDTH, }), _ => None, + }; + + if matches!(scalar, Some(Scalar::F16)) + && !enable_extensions.contains(ImplementedEnableExtension::F16) + { + return Err(Error::EnableExtensionNotEnabled { + span, + kind: ImplementedEnableExtension::F16.into(), + }); } + + Ok(scalar) } pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> { diff --git a/naga/src/front/wgsl/parse/directive/enable_extension.rs b/naga/src/front/wgsl/parse/directive/enable_extension.rs index 147ec0b5e0..3efa3b5283 100644 --- a/naga/src/front/wgsl/parse/directive/enable_extension.rs +++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs @@ -5,24 +5,29 @@ use crate::{front::wgsl::error::Error, Span}; /// Tracks the status of every enable-extension known to Naga. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct EnableExtensions {} +pub struct EnableExtensions { + /// Whether `enable f16;` was written earlier in the shader module. + f16: bool, +} impl EnableExtensions { pub(crate) const fn empty() -> Self { - Self {} + Self { f16: false } } /// Add an enable-extension to the set requested by a module. - #[allow(unreachable_code)] pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) { - let _field: &mut bool = match ext {}; - *_field = true; + let field = match ext { + ImplementedEnableExtension::F16 => &mut self.f16, + }; + *field = true; } /// Query whether an enable-extension tracked here has been requested. - #[allow(unused)] pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool { - match ext {} + match ext { + ImplementedEnableExtension::F16 => self.f16, + } } } @@ -37,7 +42,6 @@ impl Default for EnableExtensions { /// WGSL spec.: #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] pub enum EnableExtension { - #[allow(unused)] Implemented(ImplementedEnableExtension), Unimplemented(UnimplementedEnableExtension), } @@ -50,7 +54,7 @@ impl EnableExtension { /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible. pub(crate) fn from_ident(word: &str, span: Span) -> Result> { Ok(match word { - Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16), + Self::F16 => Self::Implemented(ImplementedEnableExtension::F16), Self::CLIP_DISTANCES => { Self::Unimplemented(UnimplementedEnableExtension::ClipDistances) } @@ -64,9 +68,10 @@ impl EnableExtension { /// Maps this [`EnableExtension`] into the sentinel word associated with it in WGSL. pub const fn to_ident(self) -> &'static str { match self { - Self::Implemented(kind) => match kind {}, + Self::Implemented(kind) => match kind { + ImplementedEnableExtension::F16 => Self::F16, + }, Self::Unimplemented(kind) => match kind { - UnimplementedEnableExtension::F16 => Self::F16, UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES, UnimplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING, }, @@ -76,17 +81,24 @@ impl EnableExtension { /// A variant of [`EnableExtension::Implemented`]. #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub enum ImplementedEnableExtension {} - -/// A variant of [`EnableExtension::Unimplemented`]. -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub enum UnimplementedEnableExtension { +pub enum ImplementedEnableExtension { /// Enables `f16`/`half` primitive support in all shader languages. /// /// In the WGSL standard, this corresponds to [`enable f16;`]. /// /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16 F16, +} + +impl From for EnableExtension { + fn from(value: ImplementedEnableExtension) -> Self { + Self::Implemented(value) + } +} + +/// A variant of [`EnableExtension::Unimplemented`]. +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub enum UnimplementedEnableExtension { /// Enables the `clip_distances` variable in WGSL. /// /// In the WGSL standard, this corresponds to [`enable clip_distances;`]. @@ -104,7 +116,6 @@ pub enum UnimplementedEnableExtension { impl UnimplementedEnableExtension { pub(crate) const fn tracking_issue_num(self) -> u16 { match self { - Self::F16 => 4384, Self::ClipDistances => 6236, Self::DualSourceBlending => 6402, } diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs index d55720972e..8d64f52ac6 100644 --- a/naga/src/front/wgsl/parse/lexer.rs +++ b/naga/src/front/wgsl/parse/lexer.rs @@ -408,14 +408,17 @@ impl<'a> Lexer<'a> { /// Parses a generic scalar type, for example ``. pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result> { self.expect_generic_paren('<')?; - let pair = match self.next() { + let (scalar, _span) = match self.next() { (Token::Word(word), span) => { - conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span)) + conv::get_scalar_type(&self.enable_extensions, span, word)? + .map(|scalar| (scalar, span)) + .ok_or(Error::UnknownScalarType(span))? } - (_, span) => Err(Error::UnknownScalarType(span)), - }?; + (_, span) => return Err(Error::UnknownScalarType(span)), + }; + self.expect_generic_paren('>')?; - Ok(pair) + Ok(scalar) } /// Parses a generic scalar type, for example ``. @@ -425,14 +428,18 @@ impl<'a> Lexer<'a> { &mut self, ) -> Result<(Scalar, Span), Error<'a>> { self.expect_generic_paren('<')?; - let pair = match self.next() { - (Token::Word(word), span) => conv::get_scalar_type(word) - .map(|scalar| (scalar, span)) - .ok_or(Error::UnknownScalarType(span)), - (_, span) => Err(Error::UnknownScalarType(span)), - }?; + + let (scalar, span) = match self.next() { + (Token::Word(word), span) => { + conv::get_scalar_type(&self.enable_extensions, span, word)? + .map(|scalar| (scalar, span)) + .ok_or(Error::UnknownScalarType(span))? + } + (_, span) => return Err(Error::UnknownScalarType(span)), + }; + self.expect_generic_paren('>')?; - Ok(pair) + Ok((scalar, span)) } pub(in crate::front::wgsl) fn next_storage_access( @@ -493,6 +500,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) { #[test] fn test_numbers() { + use half::f16; // WGSL spec examples // // decimal integer @@ -517,14 +525,16 @@ fn test_numbers() { Token::Number(Ok(Number::AbstractFloat(0.01))), Token::Number(Ok(Number::AbstractFloat(12.34))), Token::Number(Ok(Number::F32(0.))), - Token::Number(Err(NumberError::UnimplementedF16)), + Token::Number(Ok(Number::F16(f16::from_f32(0.)))), Token::Number(Ok(Number::AbstractFloat(0.001))), Token::Number(Ok(Number::AbstractFloat(43.75))), Token::Number(Ok(Number::F32(16.))), Token::Number(Ok(Number::AbstractFloat(0.1875))), - Token::Number(Err(NumberError::UnimplementedF16)), + // https://github.com/gfx-rs/wgpu/issues/7046 + Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75 Token::Number(Ok(Number::AbstractFloat(0.12109375))), - Token::Number(Err(NumberError::UnimplementedF16)), + // https://github.com/gfx-rs/wgpu/issues/7046 + Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5 ], ); diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index 23b2984e75..f4115014b8 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -3,9 +3,7 @@ use crate::diagnostic_filter::{ ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule, }; use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken}; -use crate::front::wgsl::parse::directive::enable_extension::{ - EnableExtension, EnableExtensions, UnimplementedEnableExtension, -}; +use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions}; use crate::front::wgsl::parse::directive::language_extension::LanguageExtension; use crate::front::wgsl::parse::directive::DirectiveKind; use crate::front::wgsl::parse::lexer::{Lexer, Token}; @@ -346,7 +344,7 @@ impl Parser { span: Span, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result>, Error<'a>> { - if let Some(scalar) = conv::get_scalar_type(word) { + if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? { return Ok(Some(ast::ConstructorType::Scalar(scalar))); } @@ -375,6 +373,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec2h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "vec3" => ast::ConstructorType::PartialVector { size: crate::VectorSize::Tri, }, @@ -399,6 +404,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec3h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "vec4" => ast::ConstructorType::PartialVector { size: crate::VectorSize::Quad, }, @@ -423,6 +435,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec4h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Bi, @@ -435,6 +454,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Tri, @@ -447,6 +474,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Quad, @@ -459,6 +494,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Bi, @@ -471,6 +514,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Tri, @@ -483,6 +534,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Quad, @@ -495,6 +554,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Bi, @@ -507,6 +574,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Tri, @@ -519,6 +594,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Quad, @@ -531,6 +614,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "array" => ast::ConstructorType::PartialArray, "atomic" | "binding_array" @@ -725,15 +816,17 @@ impl Parser { } (Token::Number(res), span) => { let _ = lexer.next(); - let num = res.map_err(|err| match err { - super::error::NumberError::UnimplementedF16 => { - Error::EnableExtensionNotEnabled { - kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16), + let num = res.map_err(|err| Error::BadNumber(span, err))?; + + if let Some(enable_extension) = num.required_enable_extension() { + if !lexer.enable_extensions.contains(enable_extension) { + return Err(Error::EnableExtensionNotEnabled { + kind: enable_extension.into(), span, - } + }); } - err => Error::BadNumber(span, err), - })?; + } + ast::Expression::Literal(ast::Literal::Number(num)) } (Token::Word("RAY_FLAG_NONE"), _) => { @@ -1268,9 +1361,10 @@ impl Parser { &mut self, lexer: &mut Lexer<'a>, word: &'a str, + span: Span, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result>, Error<'a>> { - if let Some(scalar) = conv::get_scalar_type(word) { + if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? { return Ok(Some(ast::Type::Scalar(scalar))); } @@ -1298,6 +1392,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec2h" => ast::Type::Vector { + size: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "vec3" => { let (ty, ty_span) = self.singular_generic(lexer, ctx)?; ast::Type::Vector { @@ -1321,6 +1420,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec3h" => ast::Type::Vector { + size: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "vec4" => { let (ty, ty_span) = self.singular_generic(lexer, ctx)?; ast::Type::Vector { @@ -1344,6 +1448,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec4h" => ast::Type::Vector { + size: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)? } @@ -1353,6 +1462,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)? } @@ -1362,6 +1477,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)? } @@ -1371,6 +1492,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)? } @@ -1380,6 +1507,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)? } @@ -1389,6 +1522,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)? } @@ -1398,6 +1537,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)? } @@ -1407,6 +1552,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)? } @@ -1416,6 +1567,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)? } @@ -1425,6 +1582,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "atomic" => { let scalar = lexer.next_scalar_generic()?; ast::Type::Atomic(scalar) @@ -1681,7 +1844,7 @@ impl Parser { let (name, span) = lexer.next_ident_with_span()?; - let ty = match this.type_decl_impl(lexer, name, ctx)? { + let ty = match this.type_decl_impl(lexer, name, span, ctx)? { Some(ty) => ty, None => { ctx.unresolved.insert(ast::Dependency { diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs index 72795de6b4..87dfb0a02c 100644 --- a/naga/src/front/wgsl/parse/number.rs +++ b/naga/src/front/wgsl/parse/number.rs @@ -1,5 +1,7 @@ use crate::front::wgsl::error::NumberError; +use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension; use crate::front::wgsl::parse::lexer::Token; +use half::f16; /// When using this type assume no Abstract Int/Float for now #[derive(Copy, Clone, Debug, PartialEq)] @@ -16,12 +18,23 @@ pub enum Number { I64(i64), /// Concrete u64 U64(u64), + /// Concrete f16 + F16(f16), /// Concrete f32 F32(f32), /// Concrete f64 F64(f64), } +impl Number { + pub(super) const fn required_enable_extension(&self) -> Option { + match *self { + Number::F16(_) => Some(ImplementedEnableExtension::F16), + _ => None, + } + } +} + pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) { let (result, rest) = parse(input); (Token::Number(result), rest) @@ -367,7 +380,8 @@ fn parse_hex_float(input: &str, kind: Option) -> Result Err(NumberError::NotRepresentable), }, - Some(FloatKind::F16) => Err(NumberError::UnimplementedF16), + // TODO: f16 is not supported by hexf_parse + Some(FloatKind::F16) => Err(NumberError::NotRepresentable), Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) { Ok(num) => Ok(Number::F32(num)), // can only be ParseHexfErrorKind::Inexact but we can't check since it's private @@ -403,7 +417,12 @@ fn parse_dec_float(input: &str, kind: Option) -> Result Err(NumberError::UnimplementedF16), + Some(FloatKind::F16) => { + let num = input.parse::().unwrap(); // will never fail + num.is_finite() + .then_some(Number::F16(num)) + .ok_or(NumberError::NotRepresentable) + } } } diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 2e917d34e0..d78bdfb7af 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -271,6 +271,7 @@ pub use crate::span::{SourceLocation, Span, SpanContext, WithSpan}; #[cfg(feature = "arbitrary")] use arbitrary::Arbitrary; use diagnostic_filter::DiagnosticFilterNode; +use half::f16; #[cfg(feature = "deserialize")] use serde::Deserialize; #[cfg(feature = "serialize")] @@ -895,6 +896,7 @@ pub enum Literal { F64(f64), /// May not be NaN or infinity. F32(f32), + F16(f16), U32(u32), I32(i32), U64(u64), diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs index be7c503a01..ed713bf921 100644 --- a/naga/src/proc/constant_evaluator.rs +++ b/naga/src/proc/constant_evaluator.rs @@ -1,6 +1,8 @@ use std::iter; use arrayvec::ArrayVec; +use half::f16; +use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero}; use crate::{ arena::{Arena, Handle, HandleVec, UniqueArena}, @@ -199,6 +201,7 @@ gen_component_wise_extractor! { literals: [ AbstractFloat => AbstractFloat: f64, F32 => F32: f32, + F16 => F16: f16, AbstractInt => AbstractInt: i64, U32 => U32: u32, I32 => I32: i32, @@ -219,6 +222,7 @@ gen_component_wise_extractor! { literals: [ AbstractFloat => Abstract: f64, F32 => F32: f32, + F16 => F16: f16, ], scalar_kinds: [ Float, @@ -244,6 +248,7 @@ gen_component_wise_extractor! { AbstractFloat => AbstractFloat: f64, AbstractInt => AbstractInt: i64, F32 => F32: f32, + F16 => F16: f16, I32 => I32: i32, ], scalar_kinds: [ @@ -1103,6 +1108,7 @@ impl<'a> ConstantEvaluator<'a> { component_wise_scalar(self, span, [arg], |args| match args { Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])), Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])), + Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])), Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])), Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])), Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz @@ -1134,9 +1140,13 @@ impl<'a> ConstantEvaluator<'a> { } ) } - crate::MathFunction::Saturate => { - component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) }) - } + crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e { + Float::F16([e]) => Ok(Float::F16( + [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))], + )), + Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])), + Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])), + }), // trigonometry crate::MathFunction::Cos => { @@ -1213,6 +1223,9 @@ impl<'a> ConstantEvaluator<'a> { component_wise_float(self, span, [arg], |e| match e { Float::Abstract([e]) => Ok(Float::Abstract([round_ties_even(e)])), Float::F32([e]) => Ok(Float::F32([(round_ties_even(e as f64) as f32)])), + Float::F16([e]) => { + Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))])) + } }) } crate::MathFunction::Fract => { @@ -1258,15 +1271,27 @@ impl<'a> ConstantEvaluator<'a> { ) } crate::MathFunction::Step => { - component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| { - Ok([if edge <= x { 1.0 } else { 0.0 }]) + component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x { + Float::Abstract([edge, x]) => { + Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }])) + } + Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])), + Float::F16([edge, x]) => Ok(Float::F16([if edge <= x { + f16::one() + } else { + f16::zero() + }])), }) } crate::MathFunction::Sqrt => { component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) }) } crate::MathFunction::InverseSqrt => { - component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) }) + component_wise_float(self, span, [arg], |e| match e { + Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])), + Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])), + Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])), + }) } // bits @@ -1561,6 +1586,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v, Literal::U32(v) => v as i32, Literal::F32(v) => v as i32, + Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf Literal::Bool(v) => v as i32, Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); @@ -1572,6 +1598,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v as u32, Literal::U32(v) => v, Literal::F32(v) => v as u32, + Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf Literal::Bool(v) => v as u32, Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); @@ -1587,6 +1614,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(v) => v as i64, Literal::I64(v) => v, Literal::U64(v) => v as i64, + Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf Literal::AbstractInt(v) => i64::try_from_abstract(v)?, Literal::AbstractFloat(v) => i64::try_from_abstract(v)?, }), @@ -1598,9 +1626,22 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(v) => v as u64, Literal::I64(v) => v as u64, Literal::U64(v) => v, + Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf Literal::AbstractInt(v) => u64::try_from_abstract(v)?, Literal::AbstractFloat(v) => u64::try_from_abstract(v)?, }), + Sc::F16 => Literal::F16(match literal { + Literal::F16(v) => v, + Literal::F32(v) => f16::from_f32(v), + Literal::F64(v) => f16::from_f64(v), + Literal::Bool(v) => f16::from_u32(v as u32).unwrap(), + Literal::I64(v) => f16::from_i64(v).unwrap(), + Literal::U64(v) => f16::from_u64(v).unwrap(), + Literal::I32(v) => f16::from_i32(v).unwrap(), + Literal::U32(v) => f16::from_u32(v).unwrap(), + Literal::AbstractFloat(v) => f16::try_from_abstract(v)?, + Literal::AbstractInt(v) => f16::try_from_abstract(v)?, + }), Sc::F32 => Literal::F32(match literal { Literal::I32(v) => v as f32, Literal::U32(v) => v as f32, @@ -1609,12 +1650,14 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); } + Literal::F16(v) => f16::to_f32(v), Literal::AbstractInt(v) => f32::try_from_abstract(v)?, Literal::AbstractFloat(v) => f32::try_from_abstract(v)?, }), Sc::F64 => Literal::F64(match literal { Literal::I32(v) => v as f64, Literal::U32(v) => v as f64, + Literal::F16(v) => f16::to_f64(v), Literal::F32(v) => v as f64, Literal::F64(v) => v, Literal::Bool(v) => v as u32 as f64, @@ -1626,6 +1669,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v != 0, Literal::U32(v) => v != 0, Literal::F32(v) => v != 0.0, + Literal::F16(v) => v != f16::zero(), Literal::Bool(v) => v, Literal::F64(_) | Literal::I64(_) @@ -1780,6 +1824,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => Literal::I32(v.wrapping_neg()), Literal::I64(v) => Literal::I64(v.wrapping_neg()), Literal::F32(v) => Literal::F32(-v), + Literal::F16(v) => Literal::F16(-v), Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()), Literal::AbstractFloat(v) => Literal::AbstractFloat(-v), _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg), @@ -1928,6 +1973,14 @@ impl<'a> ConstantEvaluator<'a> { _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs), }) } + (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op { + BinaryOperator::Add => a + b, + BinaryOperator::Subtract => a - b, + BinaryOperator::Multiply => a * b, + BinaryOperator::Divide => a / b, + BinaryOperator::Modulo => a % b, + _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs), + }), (Literal::AbstractInt(a), Literal::AbstractInt(b)) => { Literal::AbstractInt(match op { BinaryOperator::Add => a.checked_add(b).ok_or_else(|| { @@ -2532,6 +2585,32 @@ impl TryFromAbstract for u64 { } } +impl TryFromAbstract for f16 { + fn try_from_abstract(value: f64) -> Result { + let f = f16::from_f64(value); + if f.is_infinite() { + return Err(ConstantEvaluatorError::AutomaticConversionLossy { + value: format!("{value:?}"), + to_type: "f16", + }); + } + Ok(f) + } +} + +impl TryFromAbstract for f16 { + fn try_from_abstract(value: i64) -> Result { + let f = f16::from_i64(value); + if f.is_none() { + return Err(ConstantEvaluatorError::AutomaticConversionLossy { + value: format!("{value:?}"), + to_type: "f16", + }); + } + Ok(f.unwrap()) + } +} + #[cfg(test)] mod tests { use std::vec; diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index a6a19f70ed..ef095cff39 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -79,6 +79,7 @@ impl From for super::Scalar { pub enum HashableLiteral { F64(u64), F32(u32), + F16(u16), U32(u32), I32(i32), U64(u64), @@ -93,6 +94,7 @@ impl From for HashableLiteral { match l { crate::Literal::F64(v) => Self::F64(v.to_bits()), crate::Literal::F32(v) => Self::F32(v.to_bits()), + crate::Literal::F16(v) => Self::F16(v.to_bits()), crate::Literal::U32(v) => Self::U32(v), crate::Literal::I32(v) => Self::I32(v), crate::Literal::U64(v) => Self::U64(v), @@ -131,6 +133,7 @@ impl crate::Literal { match *self { Self::F64(_) | Self::I64(_) | Self::U64(_) => 8, Self::F32(_) | Self::U32(_) | Self::I32(_) => 4, + Self::F16(_) => 2, Self::Bool(_) => crate::BOOL_WIDTH, Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH, } @@ -139,6 +142,7 @@ impl crate::Literal { match *self { Self::F64(_) => crate::Scalar::F64, Self::F32(_) => crate::Scalar::F32, + Self::F16(_) => crate::Scalar::F16, Self::U32(_) => crate::Scalar::U32, Self::I32(_) => crate::Scalar::I32, Self::U64(_) => crate::Scalar::U64, diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs index 351f4c5368..9b44f05db5 100644 --- a/naga/src/proc/type_methods.rs +++ b/naga/src/proc/type_methods.rs @@ -20,6 +20,10 @@ impl crate::ScalarKind { } impl crate::Scalar { + pub const F16: Self = Self { + kind: crate::ScalarKind::Float, + width: 2, + }; pub const I32: Self = Self { kind: crate::ScalarKind::Sint, width: 4, diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index b0c54a3df4..15e01b661c 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -1767,7 +1767,7 @@ impl super::Validator { } pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> { - self.check_width(literal.scalar())?; + let _ = self.check_width(literal.scalar())?; check_literal_value(literal)?; Ok(()) diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index a6fff61d82..3ae16b232b 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -1,6 +1,6 @@ use super::{ analyzer::{FunctionInfo, GlobalUse}, - Capabilities, Disalignment, FunctionError, ModuleInfo, + Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError, }; use crate::arena::{Handle, UniqueArena}; @@ -39,6 +39,8 @@ pub enum GlobalVariableError { InitializerNotAllowed(crate::AddressSpace), #[error("Storage address space doesn't support write-only access")] StorageAddressSpaceWriteOnlyNotSupported, + #[error("Type is not valid for use as a push constant")] + InvalidPushConstantType(#[source] PushConstantError), } #[derive(Clone, Debug, thiserror::Error)] @@ -569,6 +571,9 @@ impl super::Validator { Capabilities::PUSH_CONSTANT, )); } + if let Err(ref err) = type_info.push_constant_compatibility { + return Err(GlobalVariableError::InvalidPushConstantType(err.clone())); + } ( TypeFlags::DATA | TypeFlags::COPY diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index 8560404703..3ffed6a8e4 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -28,7 +28,7 @@ pub use expression::{check_literal_value, LiteralError}; pub use expression::{ConstExpressionError, ExpressionError}; pub use function::{CallError, FunctionError, LocalVariableError}; pub use interface::{EntryPointError, GlobalVariableError, VaryingError}; -pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError}; +pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError}; use self::handles::InvalidHandleError; @@ -158,6 +158,8 @@ bitflags::bitflags! { const TEXTURE_ATOMIC = 1 << 23; /// Support for atomic operations on 64-bit images. const TEXTURE_INT64_ATOMIC = 1 << 24; + /// Support for 16-bit floating-point types. + const SHADER_FLOAT16 = 1 << 25; } } diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 8c6825b842..837e00e239 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -167,8 +167,16 @@ pub enum WidthError { Abstract, } +#[derive(Clone, Debug, thiserror::Error)] +#[cfg_attr(test, derive(PartialEq))] +pub enum PushConstantError { + #[error("The scalar type {0:?} is not supported in push constants")] + InvalidScalar(crate::Scalar), +} + // Only makes sense if `flags.contains(HOST_SHAREABLE)` type LayoutCompatibility = Result, Disalignment)>; +type PushConstantCompatibility = Result<(), PushConstantError>; fn check_member_layout( accum: &mut LayoutCompatibility, @@ -219,6 +227,7 @@ pub(super) struct TypeInfo { pub flags: TypeFlags, pub uniform_layout: LayoutCompatibility, pub storage_layout: LayoutCompatibility, + pub push_constant_compatibility: PushConstantCompatibility, } impl TypeInfo { @@ -227,6 +236,7 @@ impl TypeInfo { flags: TypeFlags::empty(), uniform_layout: Ok(Alignment::ONE), storage_layout: Ok(Alignment::ONE), + push_constant_compatibility: Ok(()), } } @@ -235,6 +245,7 @@ impl TypeInfo { flags, uniform_layout: Ok(alignment), storage_layout: Ok(alignment), + push_constant_compatibility: Ok(()), } } } @@ -248,11 +259,15 @@ impl super::Validator { } } - pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> { + pub(super) const fn check_width( + &self, + scalar: crate::Scalar, + ) -> Result { + let mut push_constant_compatibility = Ok(()); let good = match scalar.kind { crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH, - crate::ScalarKind::Float => { - if scalar.width == 8 { + crate::ScalarKind::Float => match scalar.width { + 8 => { if !self.capabilities.contains(Capabilities::FLOAT64) { return Err(WidthError::MissingCapability { name: "f64", @@ -260,10 +275,21 @@ impl super::Validator { }); } true - } else { - scalar.width == 4 } - } + 2 => { + if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) { + return Err(WidthError::MissingCapability { + name: "f16", + flag: "FLOAT16", + }); + } + + push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar)); + + true + } + _ => scalar.width == 4, + }, crate::ScalarKind::Sint => { if scalar.width == 8 { if !self.capabilities.contains(Capabilities::SHADER_INT64) { @@ -295,7 +321,7 @@ impl super::Validator { } }; if good { - Ok(()) + Ok(push_constant_compatibility) } else { Err(WidthError::Invalid(scalar.kind, scalar.width)) } @@ -315,13 +341,13 @@ impl super::Validator { use crate::TypeInner as Ti; Ok(match gctx.types[handle].inner { Ti::Scalar(scalar) => { - self.check_width(scalar)?; + let push_constant_compatibility = self.check_width(scalar)?; let shareable = if scalar.kind.is_numeric() { TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE } else { TypeFlags::empty() }; - TypeInfo::new( + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -330,16 +356,18 @@ impl super::Validator { | TypeFlags::CREATION_RESOLVED | shareable, Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Vector { size, scalar } => { - self.check_width(scalar)?; + let push_constant_compatibility = self.check_width(scalar)?; let shareable = if scalar.kind.is_numeric() { TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE } else { TypeFlags::empty() }; - TypeInfo::new( + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -348,7 +376,9 @@ impl super::Validator { | TypeFlags::CREATION_RESOLVED | shareable, Alignment::from(size) * Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Matrix { columns: _, @@ -358,8 +388,8 @@ impl super::Validator { if scalar.kind != crate::ScalarKind::Float { return Err(TypeError::MatrixElementNotFloat); } - self.check_width(scalar)?; - TypeInfo::new( + let push_constant_compatibility = self.check_width(scalar)?; + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -368,7 +398,9 @@ impl super::Validator { | TypeFlags::CONSTRUCTIBLE | TypeFlags::CREATION_RESOLVED, Alignment::from(rows) * Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Atomic(scalar) => { match scalar { @@ -463,7 +495,7 @@ impl super::Validator { // However, some cases are trivial: All our implicit base types // are DATA and SIZED, so we can never return // `InvalidPointerBase` or `InvalidPointerToUnsized`. - self.check_width(scalar)?; + let _ = self.check_width(scalar)?; // `Validator::validate_function` actually checks the address // space of pointer arguments explicitly before checking the @@ -549,6 +581,7 @@ impl super::Validator { flags: base_info.flags & type_info_mask, uniform_layout, storage_layout, + push_constant_compatibility: base_info.push_constant_compatibility.clone(), } } Ti::Struct { ref members, span } => { @@ -629,6 +662,10 @@ impl super::Validator { base_info.storage_layout, handle, ); + if base_info.push_constant_compatibility.is_err() { + ti.push_constant_compatibility = + base_info.push_constant_compatibility.clone(); + } // Validate rule: If a structure member itself has a structure type S, // then the number of bytes between the start of that member and diff --git a/naga/tests/in/extra.wgsl b/naga/tests/in/extra.wgsl index ef68f4aa80..1a7ab91ff5 100644 --- a/naga/tests/in/extra.wgsl +++ b/naga/tests/in/extra.wgsl @@ -1,6 +1,6 @@ struct PushConstants { index: u32, - double: vec2, + double: vec2, } var pc: PushConstants; diff --git a/naga/tests/in/f16.param.ron b/naga/tests/in/f16.param.ron new file mode 100644 index 0000000000..4a6f36e279 --- /dev/null +++ b/naga/tests/in/f16.param.ron @@ -0,0 +1,24 @@ +( + // No GLSL support for f16 + targets: "SPIRV | METAL | HLSL | WGSL", + god_mode: true, + spv: ( + version: (1, 0), + ), + hlsl: ( + shader_model: V6_2, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), + msl: ( + lang_version: (1, 0), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/f16.wgsl b/naga/tests/in/f16.wgsl new file mode 100644 index 0000000000..dbef9aedfc --- /dev/null +++ b/naga/tests/in/f16.wgsl @@ -0,0 +1,127 @@ +enable f16; + +var private_variable: f16 = 1h; +const constant_variable: f16 = f16(15.2); + +struct UniformCompatible { + // Other types + val_u32: u32, + val_i32: i32, + val_f32: f32, + + // f16 + val_f16: f16, + val_f16_2: vec2, + val_f16_3: vec3, + val_f16_4: vec4, + final_value: f16, + + val_mat2x2: mat2x2, + val_mat2x3: mat2x3, + val_mat2x4: mat2x4, + val_mat3x2: mat3x2, + val_mat3x3: mat3x3, + val_mat3x4: mat3x4, + val_mat4x2: mat4x2, + val_mat4x3: mat4x3, + val_mat4x4: mat4x4, +} + +struct StorageCompatible { + val_f16_array_2: array, +} + +struct LayoutTest { + scalar1: f16, scalar2: f16, v3: vec3, tuck_in: f16, scalar4: f16, larger: u32 +} + +@group(0) @binding(0) +var input_uniform: UniformCompatible; + +@group(0) @binding(1) +var input_storage: UniformCompatible; + +@group(0) @binding(2) +var input_arrays: StorageCompatible; + +@group(0) @binding(3) +var output: UniformCompatible; + +@group(0) @binding(4) +var output_arrays: StorageCompatible; + +fn f16_function(x: f16) -> f16 { + var val: f16 = f16(constant_variable); + // A number too big for f16 + val += 1h - 33333h; + // Constructing an f16 from an AbstractInt + val += val + f16(5.); + // Constructing a f16 from other types and other types from f16. + val += f16(input_uniform.val_f32 + f32(val)); + // Constructing a vec3 from a i64 + val += vec3(input_uniform.val_f16).z; + + // Reading/writing to a uniform/storage buffer + output.val_f16 = input_uniform.val_f16 + input_storage.val_f16; + output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2; + output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3; + output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4; + + output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2; + output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3; + output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4; + output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2; + output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3; + output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4; + output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2; + output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3; + output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4; + + output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2; + + // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin + // constants based on the size of the type, and which are arguments. + + // Numeric functions + val += abs(val); + val += clamp(val, val, val); + val += dot(vec2(val), vec2(val)); + val += max(val, val); + val += min(val, val); + val += sign(val); + + val += f16(1.0); + + // We use the shorthand aliases here to ensure the aliases + // work correctly. + + // Cast vectors to/from f32 + let float_vec2 = vec2f(input_uniform.val_f16_2); + output.val_f16_2 = vec2h(float_vec2); + + let float_vec3 = vec3f(input_uniform.val_f16_3); + output.val_f16_3 = vec3h(float_vec3); + + let float_vec4 = vec4f(input_uniform.val_f16_4); + output.val_f16_4 = vec4h(float_vec4); + + // Cast matrices to/from f32 + output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2)); + output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3)); + output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4)); + output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2)); + output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3)); + output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4)); + output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2)); + output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3)); + output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4)); + + // Make sure all the variables are used. + return val; +} + +@compute @workgroup_size(1) +fn main() { + output.final_value = f16_function(2h); +} + diff --git a/naga/tests/in/glsl/f16-glsl.comp b/naga/tests/in/glsl/f16-glsl.comp new file mode 100644 index 0000000000..af8c89cb8a --- /dev/null +++ b/naga/tests/in/glsl/f16-glsl.comp @@ -0,0 +1,57 @@ +#version 460 + +#extension GL_AMD_gpu_shader_half_float: enable + +layout(set = 0, binding = 0) uniform A { + float16_t a_1; + f16vec2 a_vec2; + f16vec3 a_vec3; + f16vec4 a_vec4; + // So the rules here are particularly nasty for any f16 matries in uniform buffers + // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform + // buffer is over-aligned to what naga-ir wants. + // + // This is https://github.com/gfx-rs/wgpu/issues/4375. + + // f16mat2 a_mat2; + // f16mat2x3 a_mat2x3; + // f16mat2x4 a_mat2x4; + // f16mat3x2 a_mat3x2; + // f16mat3 a_mat3; + // f16mat3x4 a_mat3x4; + // f16mat4x2 a_mat4x2; + // f16mat4x3 a_mat4x3; + // f16mat4 a_mat4; +}; + +layout(set = 0, binding = 1) buffer B { + float16_t b_1; + f16vec2 b_vec2; + f16vec3 b_vec3; + f16vec4 b_vec4; + f16mat2 b_mat2; + f16mat2x3 b_mat2x3; + f16mat2x4 b_mat2x4; + f16mat3x2 b_mat3x2; + f16mat3 b_mat3; + f16mat3x4 b_mat3x4; + f16mat4x2 b_mat4x2; + f16mat4x3 b_mat4x3; + f16mat4 b_mat4; +}; + +void main() { + b_1 = a_1; + b_vec2 = a_vec2; + b_vec3 = a_vec3; + b_vec4 = a_vec4; + // b_mat2 = a_mat2; + // b_mat2x3 = a_mat2x3; + // b_mat2x4 = a_mat2x4; + // b_mat3x2 = a_mat3x2; + // b_mat3 = a_mat3; + // b_mat3x4 = a_mat3x4; + // b_mat4x2 = a_mat4x2; + // b_mat4x3 = a_mat4x3; + // b_mat4 = a_mat4; +} diff --git a/naga/tests/in/glsl/f16-glsl.param.ron b/naga/tests/in/glsl/f16-glsl.param.ron new file mode 100644 index 0000000000..c70ada9939 --- /dev/null +++ b/naga/tests/in/glsl/f16-glsl.param.ron @@ -0,0 +1,3 @@ +( + god_mode: true, +) \ No newline at end of file diff --git a/naga/tests/in/spv/f16-spv.comp b/naga/tests/in/spv/f16-spv.comp new file mode 100644 index 0000000000..af8c89cb8a --- /dev/null +++ b/naga/tests/in/spv/f16-spv.comp @@ -0,0 +1,57 @@ +#version 460 + +#extension GL_AMD_gpu_shader_half_float: enable + +layout(set = 0, binding = 0) uniform A { + float16_t a_1; + f16vec2 a_vec2; + f16vec3 a_vec3; + f16vec4 a_vec4; + // So the rules here are particularly nasty for any f16 matries in uniform buffers + // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform + // buffer is over-aligned to what naga-ir wants. + // + // This is https://github.com/gfx-rs/wgpu/issues/4375. + + // f16mat2 a_mat2; + // f16mat2x3 a_mat2x3; + // f16mat2x4 a_mat2x4; + // f16mat3x2 a_mat3x2; + // f16mat3 a_mat3; + // f16mat3x4 a_mat3x4; + // f16mat4x2 a_mat4x2; + // f16mat4x3 a_mat4x3; + // f16mat4 a_mat4; +}; + +layout(set = 0, binding = 1) buffer B { + float16_t b_1; + f16vec2 b_vec2; + f16vec3 b_vec3; + f16vec4 b_vec4; + f16mat2 b_mat2; + f16mat2x3 b_mat2x3; + f16mat2x4 b_mat2x4; + f16mat3x2 b_mat3x2; + f16mat3 b_mat3; + f16mat3x4 b_mat3x4; + f16mat4x2 b_mat4x2; + f16mat4x3 b_mat4x3; + f16mat4 b_mat4; +}; + +void main() { + b_1 = a_1; + b_vec2 = a_vec2; + b_vec3 = a_vec3; + b_vec4 = a_vec4; + // b_mat2 = a_mat2; + // b_mat2x3 = a_mat2x3; + // b_mat2x4 = a_mat2x4; + // b_mat3x2 = a_mat3x2; + // b_mat3 = a_mat3; + // b_mat3x4 = a_mat3x4; + // b_mat4x2 = a_mat4x2; + // b_mat4x3 = a_mat4x3; + // b_mat4 = a_mat4; +} diff --git a/naga/tests/in/spv/f16-spv.param.ron b/naga/tests/in/spv/f16-spv.param.ron new file mode 100644 index 0000000000..c70ada9939 --- /dev/null +++ b/naga/tests/in/spv/f16-spv.param.ron @@ -0,0 +1,3 @@ +( + god_mode: true, +) \ No newline at end of file diff --git a/naga/tests/in/spv/f16-spv.spv b/naga/tests/in/spv/f16-spv.spv new file mode 100644 index 0000000000000000000000000000000000000000..7e7f1763ef881940b76fbd21675bd01ed56055d7 GIT binary patch literal 2268 zcmZ9M>rPWa6op4hxynuC;tdM*jy57lOpHd1rX<#Ykob3UdI6f0OQ4YY&qwf4d=MYV zKPIm4oU=LOnPzwPUbEJoJ$t4t^pyvlyXHpSn5(#COu0TG&M|+TO6Fb7BW}>`?R@i} zw|4#NM!ntj=iQUD=%6L;s2hki)!JUIa^7uL)?d9K+qf$*#cLC;sE>2~vZJVd3_d9x zvC_vJ_QeYN^lARBpx+IF_G_d6$1S2S@(;8eG_tF=$Q zaeAuSzIR%_(>`wbX8Www=(aCaXulf@cIrqk z@WMLr(e21g$NLF&#CaVUxm|)_Ig_p5 zPchaXq&|JI^(XS_A?r`mb$YR(H%Bh|#enli2yc8--_&wfMtz*^t(eqxPX=zB1^M&_ z&O2oxhQoSE^OB4loP!+HzAOVbPFX%V;E;nD4(puhp^O}yl^mS!kqq28kL8mC4mpV7 hu+F(xWaOY9>v8>igW+~ literal 0 HcmV?d00001 diff --git a/naga/tests/in/spv/f16-spv.spvasm b/naga/tests/in/spv/f16-spv.spvasm new file mode 100644 index 0000000000..806de32754 --- /dev/null +++ b/naga/tests/in/spv/f16-spv.spvasm @@ -0,0 +1,130 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 11 +; Bound: 46 +; Schema: 0 + OpCapability Shader + OpCapability StorageBuffer16BitAccess + OpCapability UniformAndStorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 460 + OpSourceExtension "GL_AMD_gpu_shader_half_float" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %B "B" + OpMemberName %B 0 "b_1" + OpMemberName %B 1 "b_vec2" + OpMemberName %B 2 "b_vec3" + OpMemberName %B 3 "b_vec4" + OpMemberName %B 4 "b_mat2" + OpMemberName %B 5 "b_mat2x3" + OpMemberName %B 6 "b_mat2x4" + OpMemberName %B 7 "b_mat3x2" + OpMemberName %B 8 "b_mat3" + OpMemberName %B 9 "b_mat3x4" + OpMemberName %B 10 "b_mat4x2" + OpMemberName %B 11 "b_mat4x3" + OpMemberName %B 12 "b_mat4" + OpName %_ "" + OpName %A "A" + OpMemberName %A 0 "a_1" + OpMemberName %A 1 "a_vec2" + OpMemberName %A 2 "a_vec3" + OpMemberName %A 3 "a_vec4" + OpName %__0 "" + OpDecorate %B BufferBlock + OpMemberDecorate %B 0 Offset 0 + OpMemberDecorate %B 1 Offset 4 + OpMemberDecorate %B 2 Offset 8 + OpMemberDecorate %B 3 Offset 16 + OpMemberDecorate %B 4 ColMajor + OpMemberDecorate %B 4 MatrixStride 4 + OpMemberDecorate %B 4 Offset 24 + OpMemberDecorate %B 5 ColMajor + OpMemberDecorate %B 5 MatrixStride 8 + OpMemberDecorate %B 5 Offset 32 + OpMemberDecorate %B 6 ColMajor + OpMemberDecorate %B 6 MatrixStride 8 + OpMemberDecorate %B 6 Offset 48 + OpMemberDecorate %B 7 ColMajor + OpMemberDecorate %B 7 MatrixStride 4 + OpMemberDecorate %B 7 Offset 64 + OpMemberDecorate %B 8 ColMajor + OpMemberDecorate %B 8 MatrixStride 8 + OpMemberDecorate %B 8 Offset 80 + OpMemberDecorate %B 9 ColMajor + OpMemberDecorate %B 9 MatrixStride 8 + OpMemberDecorate %B 9 Offset 104 + OpMemberDecorate %B 10 ColMajor + OpMemberDecorate %B 10 MatrixStride 4 + OpMemberDecorate %B 10 Offset 128 + OpMemberDecorate %B 11 ColMajor + OpMemberDecorate %B 11 MatrixStride 8 + OpMemberDecorate %B 11 Offset 144 + OpMemberDecorate %B 12 ColMajor + OpMemberDecorate %B 12 MatrixStride 8 + OpMemberDecorate %B 12 Offset 176 + OpDecorate %_ Binding 1 + OpDecorate %_ DescriptorSet 0 + OpDecorate %A Block + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 4 + OpMemberDecorate %A 2 Offset 8 + OpMemberDecorate %A 3 Offset 16 + OpDecorate %__0 Binding 0 + OpDecorate %__0 DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 + %mat2v2half = OpTypeMatrix %v2half 2 + %mat2v3half = OpTypeMatrix %v3half 2 + %mat2v4half = OpTypeMatrix %v4half 2 + %mat3v2half = OpTypeMatrix %v2half 3 + %mat3v3half = OpTypeMatrix %v3half 3 + %mat3v4half = OpTypeMatrix %v4half 3 + %mat4v2half = OpTypeMatrix %v2half 4 + %mat4v3half = OpTypeMatrix %v3half 4 + %mat4v4half = OpTypeMatrix %v4half 4 + %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half +%_ptr_Uniform_B = OpTypePointer Uniform %B + %_ = OpVariable %_ptr_Uniform_B Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %A = OpTypeStruct %half %v2half %v3half %v4half +%_ptr_Uniform_A = OpTypePointer Uniform %A + %__0 = OpVariable %_ptr_Uniform_A Uniform +%_ptr_Uniform_half = OpTypePointer Uniform %half + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %int_2 = OpConstant %int 2 +%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half + %int_3 = OpConstant %int 3 +%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half + %main = OpFunction %void None %3 + %5 = OpLabel + %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0 + %29 = OpLoad %half %28 + %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0 + OpStore %30 %29 + %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1 + %34 = OpLoad %v2half %33 + %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1 + OpStore %35 %34 + %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2 + %39 = OpLoad %v3half %38 + %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2 + OpStore %40 %39 + %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3 + %44 = OpLoad %v4half %43 + %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3 + OpStore %45 %44 + OpReturn + OpFunctionEnd diff --git a/naga/tests/out/hlsl/f16.hlsl b/naga/tests/out/hlsl/f16.hlsl new file mode 100644 index 0000000000..05e2387212 --- /dev/null +++ b/naga/tests/out/hlsl/f16.hlsl @@ -0,0 +1,351 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct UniformCompatible { + uint val_u32_; + int val_i32_; + float val_f32_; + half val_f16_; + half2 val_f16_2_; + int _pad5_0; + half3 val_f16_3_; + half4 val_f16_4_; + half final_value; + half2 val_mat2x2__0; half2 val_mat2x2__1; + int _pad9_0; + row_major half2x3 val_mat2x3_; + row_major half2x4 val_mat2x4_; + half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2; + int _pad12_0; + row_major half3x3 val_mat3x3_; + row_major half3x4 val_mat3x4_; + half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3; + row_major half4x3 val_mat4x3_; + row_major half4x4 val_mat4x4_; +}; + +struct StorageCompatible { + half val_f16_array_2_[2]; +}; + +struct LayoutTest { + half scalar1_; + half scalar2_; + int _pad2_0; + half3 v3_; + half tuck_in; + half scalar4_; + uint larger; +}; + +static const half constant_variable = 15.203125h; + +static half private_variable = 1.0h; +cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; } +ByteAddressBuffer input_storage : register(t1); +ByteAddressBuffer input_arrays : register(t2); +RWByteAddressBuffer output : register(u3); +RWByteAddressBuffer output_arrays : register(u4); + +half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) { + return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1); +} + +void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) { + obj.val_mat2x2__0 = mat[0]; + obj.val_mat2x2__1 = mat[1]; +} + +void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat2x2__0 = vec; break; } + case 1: { obj.val_mat2x2__1 = vec; break; } + } +} + +void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; } + } +} + +half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) { + return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2); +} + +void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) { + obj.val_mat3x2__0 = mat[0]; + obj.val_mat3x2__1 = mat[1]; + obj.val_mat3x2__2 = mat[2]; +} + +void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat3x2__0 = vec; break; } + case 1: { obj.val_mat3x2__1 = vec; break; } + case 2: { obj.val_mat3x2__2 = vec; break; } + } +} + +void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; } + case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; } + } +} + +half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) { + return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3); +} + +void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) { + obj.val_mat4x2__0 = mat[0]; + obj.val_mat4x2__1 = mat[1]; + obj.val_mat4x2__2 = mat[2]; + obj.val_mat4x2__3 = mat[3]; +} + +void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat4x2__0 = vec; break; } + case 1: { obj.val_mat4x2__1 = vec; break; } + case 2: { obj.val_mat4x2__2 = vec; break; } + case 3: { obj.val_mat4x2__3 = vec; break; } + } +} + +void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; } + case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; } + case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; } + } +} + +typedef half ret_Constructarray2_half_[2]; +ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) { + half ret[2] = { arg0, arg1 }; + return ret; +} + +half f16_function(half x) +{ + half val = 15.203125h; + + half _e4 = val; + val = (_e4 + -33344.0h); + half _e6 = val; + half _e9 = val; + val = (_e9 + (_e6 + 5.0h)); + float _e13 = input_uniform.val_f32_; + half _e14 = val; + half _e18 = val; + val = (_e18 + half((_e13 + float(_e14)))); + half _e22 = input_uniform.val_f16_; + half _e25 = val; + val = (_e25 + (_e22).xxx.z); + half _e31 = input_uniform.val_f16_; + half _e34 = input_storage.Load(12); + output.Store(12, (_e31 + _e34)); + half2 _e40 = input_uniform.val_f16_2_; + half2 _e43 = input_storage.Load(16); + output.Store(16, (_e40 + _e43)); + half3 _e49 = input_uniform.val_f16_3_; + half3 _e52 = input_storage.Load(24); + output.Store(24, (_e49 + _e52)); + half4 _e58 = input_uniform.val_f16_4_; + half4 _e61 = input_storage.Load(32); + output.Store(32, (_e58 + _e61)); + half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform); + half2x2 _e70 = half2x2(input_storage.Load(44+0), input_storage.Load(44+4)); + { + half2x2 _value2 = (_e67 + _e70); + output.Store(44+0, _value2[0]); + output.Store(44+4, _value2[1]); + } + half2x3 _e76 = input_uniform.val_mat2x3_; + half2x3 _e79 = half2x3(input_storage.Load(56+0), input_storage.Load(56+8)); + { + half2x3 _value2 = (_e76 + _e79); + output.Store(56+0, _value2[0]); + output.Store(56+8, _value2[1]); + } + half2x4 _e85 = input_uniform.val_mat2x4_; + half2x4 _e88 = half2x4(input_storage.Load(72+0), input_storage.Load(72+8)); + { + half2x4 _value2 = (_e85 + _e88); + output.Store(72+0, _value2[0]); + output.Store(72+8, _value2[1]); + } + half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform); + half3x2 _e97 = half3x2(input_storage.Load(88+0), input_storage.Load(88+4), input_storage.Load(88+8)); + { + half3x2 _value2 = (_e94 + _e97); + output.Store(88+0, _value2[0]); + output.Store(88+4, _value2[1]); + output.Store(88+8, _value2[2]); + } + half3x3 _e103 = input_uniform.val_mat3x3_; + half3x3 _e106 = half3x3(input_storage.Load(104+0), input_storage.Load(104+8), input_storage.Load(104+16)); + { + half3x3 _value2 = (_e103 + _e106); + output.Store(104+0, _value2[0]); + output.Store(104+8, _value2[1]); + output.Store(104+16, _value2[2]); + } + half3x4 _e112 = input_uniform.val_mat3x4_; + half3x4 _e115 = half3x4(input_storage.Load(128+0), input_storage.Load(128+8), input_storage.Load(128+16)); + { + half3x4 _value2 = (_e112 + _e115); + output.Store(128+0, _value2[0]); + output.Store(128+8, _value2[1]); + output.Store(128+16, _value2[2]); + } + half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform); + half4x2 _e124 = half4x2(input_storage.Load(152+0), input_storage.Load(152+4), input_storage.Load(152+8), input_storage.Load(152+12)); + { + half4x2 _value2 = (_e121 + _e124); + output.Store(152+0, _value2[0]); + output.Store(152+4, _value2[1]); + output.Store(152+8, _value2[2]); + output.Store(152+12, _value2[3]); + } + half4x3 _e130 = input_uniform.val_mat4x3_; + half4x3 _e133 = half4x3(input_storage.Load(168+0), input_storage.Load(168+8), input_storage.Load(168+16), input_storage.Load(168+24)); + { + half4x3 _value2 = (_e130 + _e133); + output.Store(168+0, _value2[0]); + output.Store(168+8, _value2[1]); + output.Store(168+16, _value2[2]); + output.Store(168+24, _value2[3]); + } + half4x4 _e139 = input_uniform.val_mat4x4_; + half4x4 _e142 = half4x4(input_storage.Load(200+0), input_storage.Load(200+8), input_storage.Load(200+16), input_storage.Load(200+24)); + { + half4x4 _value2 = (_e139 + _e142); + output.Store(200+0, _value2[0]); + output.Store(200+8, _value2[1]); + output.Store(200+16, _value2[2]); + output.Store(200+24, _value2[3]); + } + half _e148[2] = Constructarray2_half_(input_arrays.Load(0+0), input_arrays.Load(0+2)); + { + half _value2[2] = _e148; + output_arrays.Store(0+0, _value2[0]); + output_arrays.Store(0+2, _value2[1]); + } + half _e149 = val; + half _e151 = val; + val = (_e151 + abs(_e149)); + half _e153 = val; + half _e154 = val; + half _e155 = val; + half _e157 = val; + val = (_e157 + clamp(_e153, _e154, _e155)); + half _e159 = val; + half _e161 = val; + half _e164 = val; + val = (_e164 + dot((_e159).xx, (_e161).xx)); + half _e166 = val; + half _e167 = val; + half _e169 = val; + val = (_e169 + max(_e166, _e167)); + half _e171 = val; + half _e172 = val; + half _e174 = val; + val = (_e174 + min(_e171, _e172)); + half _e176 = val; + half _e178 = val; + val = (_e178 + sign(_e176)); + half _e181 = val; + val = (_e181 + 1.0h); + half2 _e185 = input_uniform.val_f16_2_; + float2 float_vec2_ = float2(_e185); + output.Store(16, half2(float_vec2_)); + half3 _e192 = input_uniform.val_f16_3_; + float3 float_vec3_ = float3(_e192); + output.Store(24, half3(float_vec3_)); + half4 _e199 = input_uniform.val_f16_4_; + float4 float_vec4_ = float4(_e199); + output.Store(32, half4(float_vec4_)); + half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform); + { + half2x2 _value2 = half2x2(float2x2(_e208)); + output.Store(44+0, _value2[0]); + output.Store(44+4, _value2[1]); + } + half2x3 _e215 = input_uniform.val_mat2x3_; + { + half2x3 _value2 = half2x3(float2x3(_e215)); + output.Store(56+0, _value2[0]); + output.Store(56+8, _value2[1]); + } + half2x4 _e222 = input_uniform.val_mat2x4_; + { + half2x4 _value2 = half2x4(float2x4(_e222)); + output.Store(72+0, _value2[0]); + output.Store(72+8, _value2[1]); + } + half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform); + { + half3x2 _value2 = half3x2(float3x2(_e229)); + output.Store(88+0, _value2[0]); + output.Store(88+4, _value2[1]); + output.Store(88+8, _value2[2]); + } + half3x3 _e236 = input_uniform.val_mat3x3_; + { + half3x3 _value2 = half3x3(float3x3(_e236)); + output.Store(104+0, _value2[0]); + output.Store(104+8, _value2[1]); + output.Store(104+16, _value2[2]); + } + half3x4 _e243 = input_uniform.val_mat3x4_; + { + half3x4 _value2 = half3x4(float3x4(_e243)); + output.Store(128+0, _value2[0]); + output.Store(128+8, _value2[1]); + output.Store(128+16, _value2[2]); + } + half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform); + { + half4x2 _value2 = half4x2(float4x2(_e250)); + output.Store(152+0, _value2[0]); + output.Store(152+4, _value2[1]); + output.Store(152+8, _value2[2]); + output.Store(152+12, _value2[3]); + } + half4x3 _e257 = input_uniform.val_mat4x3_; + { + half4x3 _value2 = half4x3(float4x3(_e257)); + output.Store(168+0, _value2[0]); + output.Store(168+8, _value2[1]); + output.Store(168+16, _value2[2]); + output.Store(168+24, _value2[3]); + } + half4x4 _e264 = input_uniform.val_mat4x4_; + { + half4x4 _value2 = half4x4(float4x4(_e264)); + output.Store(200+0, _value2[0]); + output.Store(200+8, _value2[1]); + output.Store(200+16, _value2[2]); + output.Store(200+24, _value2[3]); + } + half _e267 = val; + return _e267; +} + +[numthreads(1, 1, 1)] +void main() +{ + const half _e3 = f16_function(2.0h); + output.Store(40, _e3); + return; +} diff --git a/naga/tests/out/hlsl/f16.ron b/naga/tests/out/hlsl/f16.ron new file mode 100644 index 0000000000..b396a4626e --- /dev/null +++ b/naga/tests/out/hlsl/f16.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_2", + ), + ], +) diff --git a/naga/tests/out/msl/extra.msl b/naga/tests/out/msl/extra.msl index 8288dfad92..4d6bb568f3 100644 --- a/naga/tests/out/msl/extra.msl +++ b/naga/tests/out/msl/extra.msl @@ -6,7 +6,7 @@ using metal::uint; struct PushConstants { uint index; - char _pad1[12]; + char _pad1[4]; metal::float2 double_; }; struct FragmentIn { diff --git a/naga/tests/out/msl/f16.msl b/naga/tests/out/msl/f16.msl new file mode 100644 index 0000000000..40be58e901 --- /dev/null +++ b/naga/tests/out/msl/f16.msl @@ -0,0 +1,177 @@ +// language: metal1.0 +#include +#include + +using metal::uint; + +struct UniformCompatible { + uint val_u32_; + int val_i32_; + float val_f32_; + half val_f16_; + char _pad4[2]; + metal::half2 val_f16_2_; + char _pad5[4]; + metal::half3 val_f16_3_; + metal::half4 val_f16_4_; + half final_value; + char _pad8[2]; + metal::half2x2 val_mat2x2_; + char _pad9[4]; + metal::half2x3 val_mat2x3_; + metal::half2x4 val_mat2x4_; + metal::half3x2 val_mat3x2_; + char _pad12[4]; + metal::half3x3 val_mat3x3_; + metal::half3x4 val_mat3x4_; + metal::half4x2 val_mat4x2_; + metal::half4x3 val_mat4x3_; + metal::half4x4 val_mat4x4_; +}; +struct type_16 { + half inner[2]; +}; +struct StorageCompatible { + type_16 val_f16_array_2_; +}; +struct LayoutTest { + half scalar1_; + half scalar2_; + char _pad2[4]; + metal::packed_half3 v3_; + half tuck_in; + half scalar4_; + char _pad5[2]; + uint larger; +}; +constant half constant_variable = 15.203125h; + +half f16_function( + half x, + constant UniformCompatible& input_uniform, + device UniformCompatible const& input_storage, + device StorageCompatible const& input_arrays, + device UniformCompatible& output, + device StorageCompatible& output_arrays +) { + half val = 15.203125h; + half _e4 = val; + val = _e4 + -33344.0h; + half _e6 = val; + half _e9 = val; + val = _e9 + (_e6 + 5.0h); + float _e13 = input_uniform.val_f32_; + half _e14 = val; + half _e18 = val; + val = _e18 + static_cast(_e13 + static_cast(_e14)); + half _e22 = input_uniform.val_f16_; + half _e25 = val; + val = _e25 + metal::half3(_e22).z; + half _e31 = input_uniform.val_f16_; + half _e34 = input_storage.val_f16_; + output.val_f16_ = _e31 + _e34; + metal::half2 _e40 = input_uniform.val_f16_2_; + metal::half2 _e43 = input_storage.val_f16_2_; + output.val_f16_2_ = _e40 + _e43; + metal::half3 _e49 = input_uniform.val_f16_3_; + metal::half3 _e52 = input_storage.val_f16_3_; + output.val_f16_3_ = _e49 + _e52; + metal::half4 _e58 = input_uniform.val_f16_4_; + metal::half4 _e61 = input_storage.val_f16_4_; + output.val_f16_4_ = _e58 + _e61; + metal::half2x2 _e67 = input_uniform.val_mat2x2_; + metal::half2x2 _e70 = input_storage.val_mat2x2_; + output.val_mat2x2_ = _e67 + _e70; + metal::half2x3 _e76 = input_uniform.val_mat2x3_; + metal::half2x3 _e79 = input_storage.val_mat2x3_; + output.val_mat2x3_ = _e76 + _e79; + metal::half2x4 _e85 = input_uniform.val_mat2x4_; + metal::half2x4 _e88 = input_storage.val_mat2x4_; + output.val_mat2x4_ = _e85 + _e88; + metal::half3x2 _e94 = input_uniform.val_mat3x2_; + metal::half3x2 _e97 = input_storage.val_mat3x2_; + output.val_mat3x2_ = _e94 + _e97; + metal::half3x3 _e103 = input_uniform.val_mat3x3_; + metal::half3x3 _e106 = input_storage.val_mat3x3_; + output.val_mat3x3_ = _e103 + _e106; + metal::half3x4 _e112 = input_uniform.val_mat3x4_; + metal::half3x4 _e115 = input_storage.val_mat3x4_; + output.val_mat3x4_ = _e112 + _e115; + metal::half4x2 _e121 = input_uniform.val_mat4x2_; + metal::half4x2 _e124 = input_storage.val_mat4x2_; + output.val_mat4x2_ = _e121 + _e124; + metal::half4x3 _e130 = input_uniform.val_mat4x3_; + metal::half4x3 _e133 = input_storage.val_mat4x3_; + output.val_mat4x3_ = _e130 + _e133; + metal::half4x4 _e139 = input_uniform.val_mat4x4_; + metal::half4x4 _e142 = input_storage.val_mat4x4_; + output.val_mat4x4_ = _e139 + _e142; + type_16 _e148 = input_arrays.val_f16_array_2_; + output_arrays.val_f16_array_2_ = _e148; + half _e149 = val; + half _e151 = val; + val = _e151 + metal::abs(_e149); + half _e153 = val; + half _e154 = val; + half _e155 = val; + half _e157 = val; + val = _e157 + metal::clamp(_e153, _e154, _e155); + half _e159 = val; + half _e161 = val; + half _e164 = val; + val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161)); + half _e166 = val; + half _e167 = val; + half _e169 = val; + val = _e169 + metal::max(_e166, _e167); + half _e171 = val; + half _e172 = val; + half _e174 = val; + val = _e174 + metal::min(_e171, _e172); + half _e176 = val; + half _e178 = val; + val = _e178 + metal::sign(_e176); + half _e181 = val; + val = _e181 + 1.0h; + metal::half2 _e185 = input_uniform.val_f16_2_; + metal::float2 float_vec2_ = static_cast(_e185); + output.val_f16_2_ = static_cast(float_vec2_); + metal::half3 _e192 = input_uniform.val_f16_3_; + metal::float3 float_vec3_ = static_cast(_e192); + output.val_f16_3_ = static_cast(float_vec3_); + metal::half4 _e199 = input_uniform.val_f16_4_; + metal::float4 float_vec4_ = static_cast(_e199); + output.val_f16_4_ = static_cast(float_vec4_); + metal::half2x2 _e208 = input_uniform.val_mat2x2_; + output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208)); + metal::half2x3 _e215 = input_uniform.val_mat2x3_; + output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215)); + metal::half2x4 _e222 = input_uniform.val_mat2x4_; + output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222)); + metal::half3x2 _e229 = input_uniform.val_mat3x2_; + output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229)); + metal::half3x3 _e236 = input_uniform.val_mat3x3_; + output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236)); + metal::half3x4 _e243 = input_uniform.val_mat3x4_; + output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243)); + metal::half4x2 _e250 = input_uniform.val_mat4x2_; + output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250)); + metal::half4x3 _e257 = input_uniform.val_mat4x3_; + output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257)); + metal::half4x4 _e264 = input_uniform.val_mat4x4_; + output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264)); + half _e267 = val; + return _e267; +} + +kernel void main_( + constant UniformCompatible& input_uniform [[user(fake0)]] +, device UniformCompatible const& input_storage [[user(fake0)]] +, device StorageCompatible const& input_arrays [[user(fake0)]] +, device UniformCompatible& output [[user(fake0)]] +, device StorageCompatible& output_arrays [[user(fake0)]] +) { + half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays); + output.final_value = _e3; + return; +} diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm index f174ad3b38..7fa0c3f0de 100644 --- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm +++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 125 +; Bound: 123 OpCapability Shader OpCapability Int64Atomics OpCapability Int64 @@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64" -OpEntryPoint GLCompute %77 "test_atomic_compare_exchange_u64" +OpEntryPoint GLCompute %76 "test_atomic_compare_exchange_u64" OpExecutionMode %19 LocalSize 1 1 1 -OpExecutionMode %77 LocalSize 1 1 1 +OpExecutionMode %76 LocalSize 1 1 1 OpDecorate %5 ArrayStride 8 OpDecorate %8 ArrayStride 8 OpMemberDecorate %10 0 Offset 0 @@ -57,12 +57,12 @@ OpMemberDecorate %16 0 Offset 0 %51 = OpTypeInt 32 1 %50 = OpConstant %51 1 %52 = OpConstant %3 64 -%78 = OpTypePointer StorageBuffer %8 -%80 = OpConstant %7 10 -%83 = OpTypePointer Function %7 -%84 = OpConstantNull %7 -%86 = OpConstantNull %9 -%99 = OpTypePointer StorageBuffer %7 +%77 = OpTypePointer StorageBuffer %8 +%79 = OpConstant %7 10 +%82 = OpTypePointer Function %7 +%83 = OpConstantNull %7 +%85 = OpConstantNull %9 +%98 = OpTypePointer StorageBuffer %7 %19 = OpFunction %2 None %20 %18 = OpLabel %27 = OpVariable %28 Function %22 @@ -106,16 +106,16 @@ OpBranch %61 %61 = OpLabel %63 = OpLoad %4 %29 %64 = OpIAdd %4 %63 %25 -%66 = OpLoad %3 %27 -%67 = OpLoad %4 %29 -%69 = OpAccessChain %47 %23 %66 -%70 = OpAtomicCompareExchange %4 %69 %50 %52 %52 %64 %67 -%71 = OpIEqual %9 %70 %67 -%68 = OpCompositeConstruct %10 %70 %71 -%72 = OpCompositeExtract %4 %68 0 -OpStore %29 %72 -%73 = OpCompositeExtract %9 %68 1 -OpStore %32 %73 +%65 = OpLoad %3 %27 +%66 = OpLoad %4 %29 +%68 = OpAccessChain %47 %23 %65 +%69 = OpAtomicCompareExchange %4 %68 %50 %52 %52 %64 %66 +%70 = OpIEqual %9 %69 %66 +%67 = OpCompositeConstruct %10 %69 %70 +%71 = OpCompositeExtract %4 %67 0 +OpStore %29 %71 +%72 = OpCompositeExtract %9 %67 1 +OpStore %32 %72 OpBranch %62 %62 = OpLabel OpBranch %56 @@ -126,80 +126,80 @@ OpBranch %45 %45 = OpLabel OpBranch %39 %39 = OpLabel -%74 = OpLoad %3 %27 -%75 = OpIAdd %3 %74 %26 -OpStore %27 %75 +%73 = OpLoad %3 %27 +%74 = OpIAdd %3 %73 %26 +OpStore %27 %74 OpBranch %36 %37 = OpLabel OpReturn OpFunctionEnd -%77 = OpFunction %2 None %20 -%76 = OpLabel -%81 = OpVariable %28 Function %22 -%82 = OpVariable %83 Function %84 -%85 = OpVariable %33 Function %86 -%79 = OpAccessChain %78 %15 %22 +%76 = OpFunction %2 None %20 +%75 = OpLabel +%80 = OpVariable %28 Function %22 +%81 = OpVariable %82 Function %83 +%84 = OpVariable %33 Function %85 +%78 = OpAccessChain %77 %15 %22 +OpBranch %86 +%86 = OpLabel OpBranch %87 %87 = OpLabel -OpBranch %88 -%88 = OpLabel -OpLoopMerge %89 %91 None -OpBranch %90 -%90 = OpLabel -%92 = OpLoad %3 %81 -%93 = OpULessThan %9 %92 %6 -OpSelectionMerge %94 None -OpBranchConditional %93 %94 %95 -%95 = OpLabel +OpLoopMerge %88 %90 None OpBranch %89 +%89 = OpLabel +%91 = OpLoad %3 %80 +%92 = OpULessThan %9 %91 %6 +OpSelectionMerge %93 None +OpBranchConditional %92 %93 %94 %94 = OpLabel -OpBranch %96 -%96 = OpLabel -%98 = OpLoad %3 %81 -%100 = OpAccessChain %99 %79 %98 -%101 = OpAtomicLoad %7 %100 %50 %52 -OpStore %82 %101 -OpStore %85 %24 -OpBranch %102 -%102 = OpLabel -OpLoopMerge %103 %105 None -OpBranch %104 -%104 = OpLabel -%106 = OpLoad %9 %85 -%107 = OpLogicalNot %9 %106 -OpSelectionMerge %108 None -OpBranchConditional %107 %108 %109 -%109 = OpLabel +OpBranch %88 +%93 = OpLabel +OpBranch %95 +%95 = OpLabel +%97 = OpLoad %3 %80 +%99 = OpAccessChain %98 %78 %97 +%100 = OpAtomicLoad %7 %99 %50 %52 +OpStore %81 %100 +OpStore %84 %24 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %102 %104 None OpBranch %103 +%103 = OpLabel +%105 = OpLoad %9 %84 +%106 = OpLogicalNot %9 %105 +OpSelectionMerge %107 None +OpBranchConditional %106 %107 %108 %108 = OpLabel +OpBranch %102 +%107 = OpLabel +OpBranch %109 +%109 = OpLabel +%111 = OpLoad %7 %81 +%112 = OpIAdd %7 %111 %79 +%113 = OpLoad %3 %80 +%114 = OpLoad %7 %81 +%116 = OpAccessChain %98 %78 %113 +%117 = OpAtomicCompareExchange %7 %116 %50 %52 %52 %112 %114 +%118 = OpIEqual %9 %117 %114 +%115 = OpCompositeConstruct %11 %117 %118 +%119 = OpCompositeExtract %7 %115 0 +OpStore %81 %119 +%120 = OpCompositeExtract %9 %115 1 +OpStore %84 %120 OpBranch %110 %110 = OpLabel -%112 = OpLoad %7 %82 -%113 = OpIAdd %7 %112 %80 -%115 = OpLoad %3 %81 -%116 = OpLoad %7 %82 -%118 = OpAccessChain %99 %79 %115 -%119 = OpAtomicCompareExchange %7 %118 %50 %52 %52 %113 %116 -%120 = OpIEqual %9 %119 %116 -%117 = OpCompositeConstruct %11 %119 %120 -%121 = OpCompositeExtract %7 %117 0 -OpStore %82 %121 -%122 = OpCompositeExtract %9 %117 1 -OpStore %85 %122 -OpBranch %111 -%111 = OpLabel -OpBranch %105 -%105 = OpLabel -OpBranch %102 -%103 = OpLabel -OpBranch %97 -%97 = OpLabel -OpBranch %91 -%91 = OpLabel -%123 = OpLoad %3 %81 -%124 = OpIAdd %3 %123 %26 -OpStore %81 %124 -OpBranch %88 -%89 = OpLabel +OpBranch %104 +%104 = OpLabel +OpBranch %101 +%102 = OpLabel +OpBranch %96 +%96 = OpLabel +OpBranch %90 +%90 = OpLabel +%121 = OpLoad %3 %80 +%122 = OpIAdd %3 %121 %26 +OpStore %80 %122 +OpBranch %87 +%88 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/extra.spvasm b/naga/tests/out/spv/extra.spvasm index 9c434a8ce2..0e84427bad 100644 --- a/naga/tests/out/spv/extra.spvasm +++ b/naga/tests/out/spv/extra.spvasm @@ -1,76 +1,74 @@ ; SPIR-V ; Version: 1.2 ; Generator: rspirv -; Bound: 48 +; Bound: 47 OpCapability Shader -OpCapability Float64 OpCapability Geometry %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %23 "main" %15 %18 %21 -OpExecutionMode %23 OriginUpperLeft +OpEntryPoint Fragment %22 "main" %14 %17 %20 +OpExecutionMode %22 OriginUpperLeft OpMemberDecorate %6 0 Offset 0 -OpMemberDecorate %6 1 Offset 16 -OpMemberDecorate %9 0 Offset 0 -OpMemberDecorate %9 1 Offset 16 -OpDecorate %11 Block -OpMemberDecorate %11 0 Offset 0 -OpDecorate %15 Location 0 -OpDecorate %18 BuiltIn PrimitiveId -OpDecorate %18 Flat -OpDecorate %21 Location 0 +OpMemberDecorate %6 1 Offset 8 +OpMemberDecorate %8 0 Offset 0 +OpMemberDecorate %8 1 Offset 16 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %14 Location 0 +OpDecorate %17 BuiltIn PrimitiveId +OpDecorate %17 Flat +OpDecorate %20 Location 0 %2 = OpTypeVoid %3 = OpTypeInt 32 0 -%5 = OpTypeFloat 64 +%5 = OpTypeFloat 32 %4 = OpTypeVector %5 2 %6 = OpTypeStruct %3 %4 -%8 = OpTypeFloat 32 -%7 = OpTypeVector %8 4 -%9 = OpTypeStruct %7 %3 -%11 = OpTypeStruct %6 -%12 = OpTypePointer PushConstant %11 -%10 = OpVariable %12 PushConstant -%16 = OpTypePointer Input %7 -%15 = OpVariable %16 Input -%19 = OpTypePointer Input %3 -%18 = OpVariable %19 Input -%22 = OpTypePointer Output %7 -%21 = OpVariable %22 Output -%24 = OpTypeFunction %2 -%25 = OpTypePointer PushConstant %6 -%26 = OpConstant %3 0 -%28 = OpConstant %8 1.0 -%29 = OpTypeVector %8 3 -%30 = OpConstantComposite %29 %28 %28 %28 -%33 = OpTypePointer PushConstant %3 -%36 = OpTypeBool -%23 = OpFunction %2 None %24 -%13 = OpLabel -%17 = OpLoad %7 %15 -%20 = OpLoad %3 %18 -%14 = OpCompositeConstruct %9 %17 %20 -%27 = OpAccessChain %25 %10 %26 -OpBranch %31 -%31 = OpLabel -%32 = OpCompositeExtract %3 %14 1 -%34 = OpAccessChain %33 %27 %26 -%35 = OpLoad %3 %34 -%37 = OpIEqual %36 %32 %35 -OpSelectionMerge %38 None -OpBranchConditional %37 %39 %40 -%39 = OpLabel -%41 = OpCompositeExtract %7 %14 0 -OpStore %21 %41 +%7 = OpTypeVector %5 4 +%8 = OpTypeStruct %7 %3 +%10 = OpTypeStruct %6 +%11 = OpTypePointer PushConstant %10 +%9 = OpVariable %11 PushConstant +%15 = OpTypePointer Input %7 +%14 = OpVariable %15 Input +%18 = OpTypePointer Input %3 +%17 = OpVariable %18 Input +%21 = OpTypePointer Output %7 +%20 = OpVariable %21 Output +%23 = OpTypeFunction %2 +%24 = OpTypePointer PushConstant %6 +%25 = OpConstant %3 0 +%27 = OpConstant %5 1.0 +%28 = OpTypeVector %5 3 +%29 = OpConstantComposite %28 %27 %27 %27 +%32 = OpTypePointer PushConstant %3 +%35 = OpTypeBool +%22 = OpFunction %2 None %23 +%12 = OpLabel +%16 = OpLoad %7 %14 +%19 = OpLoad %3 %17 +%13 = OpCompositeConstruct %8 %16 %19 +%26 = OpAccessChain %24 %9 %25 +OpBranch %30 +%30 = OpLabel +%31 = OpCompositeExtract %3 %13 1 +%33 = OpAccessChain %32 %26 %25 +%34 = OpLoad %3 %33 +%36 = OpIEqual %35 %31 %34 +OpSelectionMerge %37 None +OpBranchConditional %36 %38 %39 +%38 = OpLabel +%40 = OpCompositeExtract %7 %13 0 +OpStore %20 %40 OpReturn -%40 = OpLabel -%42 = OpCompositeExtract %7 %14 0 -%43 = OpVectorShuffle %29 %42 %42 0 1 2 -%44 = OpFSub %29 %30 %43 -%45 = OpCompositeExtract %7 %14 0 -%46 = OpCompositeExtract %8 %45 3 -%47 = OpCompositeConstruct %7 %44 %46 -OpStore %21 %47 +%39 = OpLabel +%41 = OpCompositeExtract %7 %13 0 +%42 = OpVectorShuffle %28 %41 %41 0 1 2 +%43 = OpFSub %28 %29 %42 +%44 = OpCompositeExtract %7 %13 0 +%45 = OpCompositeExtract %5 %44 3 +%46 = OpCompositeConstruct %7 %43 %45 +OpStore %20 %46 OpReturn -%38 = OpLabel +%37 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/f16.spvasm b/naga/tests/out/spv/f16.spvasm new file mode 100644 index 0000000000..3d8fcea7a7 --- /dev/null +++ b/naga/tests/out/spv/f16.spvasm @@ -0,0 +1,633 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 511 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability StorageInputOutput16 +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %499 "main" +OpExecutionMode %499 LocalSize 1 1 1 +OpMemberDecorate %19 0 Offset 0 +OpMemberDecorate %19 1 Offset 4 +OpMemberDecorate %19 2 Offset 8 +OpMemberDecorate %19 3 Offset 12 +OpMemberDecorate %19 4 Offset 16 +OpMemberDecorate %19 5 Offset 24 +OpMemberDecorate %19 6 Offset 32 +OpMemberDecorate %19 7 Offset 40 +OpMemberDecorate %19 8 Offset 44 +OpMemberDecorate %19 8 ColMajor +OpMemberDecorate %19 8 MatrixStride 4 +OpMemberDecorate %19 9 Offset 56 +OpMemberDecorate %19 9 ColMajor +OpMemberDecorate %19 9 MatrixStride 8 +OpMemberDecorate %19 10 Offset 72 +OpMemberDecorate %19 10 ColMajor +OpMemberDecorate %19 10 MatrixStride 8 +OpMemberDecorate %19 11 Offset 88 +OpMemberDecorate %19 11 ColMajor +OpMemberDecorate %19 11 MatrixStride 4 +OpMemberDecorate %19 12 Offset 104 +OpMemberDecorate %19 12 ColMajor +OpMemberDecorate %19 12 MatrixStride 8 +OpMemberDecorate %19 13 Offset 128 +OpMemberDecorate %19 13 ColMajor +OpMemberDecorate %19 13 MatrixStride 8 +OpMemberDecorate %19 14 Offset 152 +OpMemberDecorate %19 14 ColMajor +OpMemberDecorate %19 14 MatrixStride 4 +OpMemberDecorate %19 15 Offset 168 +OpMemberDecorate %19 15 ColMajor +OpMemberDecorate %19 15 MatrixStride 8 +OpMemberDecorate %19 16 Offset 200 +OpMemberDecorate %19 16 ColMajor +OpMemberDecorate %19 16 MatrixStride 8 +OpDecorate %20 ArrayStride 2 +OpMemberDecorate %22 0 Offset 0 +OpMemberDecorate %23 0 Offset 0 +OpMemberDecorate %23 1 Offset 2 +OpMemberDecorate %23 2 Offset 8 +OpMemberDecorate %23 3 Offset 14 +OpMemberDecorate %23 4 Offset 16 +OpMemberDecorate %23 5 Offset 20 +OpDecorate %28 DescriptorSet 0 +OpDecorate %28 Binding 0 +OpDecorate %29 Block +OpMemberDecorate %29 0 Offset 0 +OpDecorate %31 NonWritable +OpDecorate %31 DescriptorSet 0 +OpDecorate %31 Binding 1 +OpDecorate %32 Block +OpMemberDecorate %32 0 Offset 0 +OpDecorate %34 NonWritable +OpDecorate %34 DescriptorSet 0 +OpDecorate %34 Binding 2 +OpDecorate %35 Block +OpMemberDecorate %35 0 Offset 0 +OpDecorate %37 DescriptorSet 0 +OpDecorate %37 Binding 3 +OpDecorate %38 Block +OpMemberDecorate %38 0 Offset 0 +OpDecorate %40 DescriptorSet 0 +OpDecorate %40 Binding 4 +OpDecorate %41 Block +OpMemberDecorate %41 0 Offset 0 +%2 = OpTypeVoid +%3 = OpTypeFloat 16 +%4 = OpTypeInt 32 0 +%5 = OpTypeInt 32 1 +%6 = OpTypeFloat 32 +%7 = OpTypeVector %3 2 +%8 = OpTypeVector %3 3 +%9 = OpTypeVector %3 4 +%10 = OpTypeMatrix %7 2 +%11 = OpTypeMatrix %8 2 +%12 = OpTypeMatrix %9 2 +%13 = OpTypeMatrix %7 3 +%14 = OpTypeMatrix %8 3 +%15 = OpTypeMatrix %9 3 +%16 = OpTypeMatrix %7 4 +%17 = OpTypeMatrix %8 4 +%18 = OpTypeMatrix %9 4 +%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18 +%21 = OpConstant %4 2 +%20 = OpTypeArray %3 %21 +%22 = OpTypeStruct %20 +%23 = OpTypeStruct %3 %3 %8 %3 %3 %4 +%24 = OpConstant %3 2.1524e-41 +%25 = OpConstant %3 2.7121e-41 +%27 = OpTypePointer Private %3 +%26 = OpVariable %27 Private %24 +%29 = OpTypeStruct %19 +%30 = OpTypePointer Uniform %29 +%28 = OpVariable %30 Uniform +%32 = OpTypeStruct %19 +%33 = OpTypePointer StorageBuffer %32 +%31 = OpVariable %33 StorageBuffer +%35 = OpTypeStruct %22 +%36 = OpTypePointer StorageBuffer %35 +%34 = OpVariable %36 StorageBuffer +%38 = OpTypeStruct %19 +%39 = OpTypePointer StorageBuffer %38 +%37 = OpVariable %39 StorageBuffer +%41 = OpTypeStruct %22 +%42 = OpTypePointer StorageBuffer %41 +%40 = OpVariable %42 StorageBuffer +%46 = OpTypeFunction %3 %3 +%47 = OpTypePointer Uniform %19 +%48 = OpConstant %4 0 +%50 = OpTypePointer StorageBuffer %19 +%52 = OpTypePointer StorageBuffer %22 +%56 = OpConstant %3 8.8991e-41 +%57 = OpConstant %3 2.4753e-41 +%59 = OpTypePointer Function %3 +%67 = OpTypePointer Uniform %6 +%76 = OpTypePointer Uniform %3 +%77 = OpConstant %4 3 +%84 = OpTypePointer StorageBuffer %3 +%91 = OpTypePointer StorageBuffer %7 +%92 = OpTypePointer Uniform %7 +%93 = OpConstant %4 4 +%100 = OpTypePointer StorageBuffer %8 +%101 = OpTypePointer Uniform %8 +%102 = OpConstant %4 5 +%109 = OpTypePointer StorageBuffer %9 +%110 = OpTypePointer Uniform %9 +%111 = OpConstant %4 6 +%118 = OpTypePointer StorageBuffer %10 +%119 = OpTypePointer Uniform %10 +%120 = OpConstant %4 8 +%133 = OpTypePointer StorageBuffer %11 +%134 = OpTypePointer Uniform %11 +%135 = OpConstant %4 9 +%148 = OpTypePointer StorageBuffer %12 +%149 = OpTypePointer Uniform %12 +%150 = OpConstant %4 10 +%163 = OpTypePointer StorageBuffer %13 +%164 = OpTypePointer Uniform %13 +%165 = OpConstant %4 11 +%181 = OpTypePointer StorageBuffer %14 +%182 = OpTypePointer Uniform %14 +%183 = OpConstant %4 12 +%199 = OpTypePointer StorageBuffer %15 +%200 = OpTypePointer Uniform %15 +%201 = OpConstant %4 13 +%217 = OpTypePointer StorageBuffer %16 +%218 = OpTypePointer Uniform %16 +%219 = OpConstant %4 14 +%238 = OpTypePointer StorageBuffer %17 +%239 = OpTypePointer Uniform %17 +%240 = OpConstant %4 15 +%259 = OpTypePointer StorageBuffer %18 +%260 = OpTypePointer Uniform %18 +%261 = OpConstant %4 16 +%280 = OpTypePointer StorageBuffer %20 +%319 = OpTypeVector %6 2 +%325 = OpTypeVector %6 3 +%331 = OpTypeVector %6 4 +%337 = OpTypeMatrix %319 2 +%351 = OpTypeMatrix %325 2 +%365 = OpTypeMatrix %331 2 +%379 = OpTypeMatrix %319 3 +%397 = OpTypeMatrix %325 3 +%415 = OpTypeMatrix %331 3 +%433 = OpTypeMatrix %319 4 +%455 = OpTypeMatrix %325 4 +%477 = OpTypeMatrix %331 4 +%500 = OpTypeFunction %2 +%506 = OpConstant %3 2.2959e-41 +%509 = OpConstant %4 7 +%45 = OpFunction %3 None %46 +%44 = OpFunctionParameter %3 +%43 = OpLabel +%58 = OpVariable %59 Function %25 +%49 = OpAccessChain %47 %28 %48 +%51 = OpAccessChain %50 %31 %48 +%53 = OpAccessChain %52 %34 %48 +%54 = OpAccessChain %50 %37 %48 +%55 = OpAccessChain %52 %40 %48 +OpBranch %60 +%60 = OpLabel +%61 = OpLoad %3 %58 +%62 = OpFAdd %3 %61 %56 +OpStore %58 %62 +%63 = OpLoad %3 %58 +%64 = OpFAdd %3 %63 %57 +%65 = OpLoad %3 %58 +%66 = OpFAdd %3 %65 %64 +OpStore %58 %66 +%68 = OpAccessChain %67 %49 %21 +%69 = OpLoad %6 %68 +%70 = OpLoad %3 %58 +%71 = OpFConvert %6 %70 +%72 = OpFAdd %6 %69 %71 +%73 = OpFConvert %3 %72 +%74 = OpLoad %3 %58 +%75 = OpFAdd %3 %74 %73 +OpStore %58 %75 +%78 = OpAccessChain %76 %49 %77 +%79 = OpLoad %3 %78 +%80 = OpCompositeConstruct %8 %79 %79 %79 +%81 = OpCompositeExtract %3 %80 2 +%82 = OpLoad %3 %58 +%83 = OpFAdd %3 %82 %81 +OpStore %58 %83 +%85 = OpAccessChain %76 %49 %77 +%86 = OpLoad %3 %85 +%87 = OpAccessChain %84 %51 %77 +%88 = OpLoad %3 %87 +%89 = OpFAdd %3 %86 %88 +%90 = OpAccessChain %84 %54 %77 +OpStore %90 %89 +%94 = OpAccessChain %92 %49 %93 +%95 = OpLoad %7 %94 +%96 = OpAccessChain %91 %51 %93 +%97 = OpLoad %7 %96 +%98 = OpFAdd %7 %95 %97 +%99 = OpAccessChain %91 %54 %93 +OpStore %99 %98 +%103 = OpAccessChain %101 %49 %102 +%104 = OpLoad %8 %103 +%105 = OpAccessChain %100 %51 %102 +%106 = OpLoad %8 %105 +%107 = OpFAdd %8 %104 %106 +%108 = OpAccessChain %100 %54 %102 +OpStore %108 %107 +%112 = OpAccessChain %110 %49 %111 +%113 = OpLoad %9 %112 +%114 = OpAccessChain %109 %51 %111 +%115 = OpLoad %9 %114 +%116 = OpFAdd %9 %113 %115 +%117 = OpAccessChain %109 %54 %111 +OpStore %117 %116 +%121 = OpAccessChain %119 %49 %120 +%122 = OpLoad %10 %121 +%123 = OpAccessChain %118 %51 %120 +%124 = OpLoad %10 %123 +%126 = OpCompositeExtract %7 %122 0 +%127 = OpCompositeExtract %7 %124 0 +%128 = OpFAdd %7 %126 %127 +%129 = OpCompositeExtract %7 %122 1 +%130 = OpCompositeExtract %7 %124 1 +%131 = OpFAdd %7 %129 %130 +%125 = OpCompositeConstruct %10 %128 %131 +%132 = OpAccessChain %118 %54 %120 +OpStore %132 %125 +%136 = OpAccessChain %134 %49 %135 +%137 = OpLoad %11 %136 +%138 = OpAccessChain %133 %51 %135 +%139 = OpLoad %11 %138 +%141 = OpCompositeExtract %8 %137 0 +%142 = OpCompositeExtract %8 %139 0 +%143 = OpFAdd %8 %141 %142 +%144 = OpCompositeExtract %8 %137 1 +%145 = OpCompositeExtract %8 %139 1 +%146 = OpFAdd %8 %144 %145 +%140 = OpCompositeConstruct %11 %143 %146 +%147 = OpAccessChain %133 %54 %135 +OpStore %147 %140 +%151 = OpAccessChain %149 %49 %150 +%152 = OpLoad %12 %151 +%153 = OpAccessChain %148 %51 %150 +%154 = OpLoad %12 %153 +%156 = OpCompositeExtract %9 %152 0 +%157 = OpCompositeExtract %9 %154 0 +%158 = OpFAdd %9 %156 %157 +%159 = OpCompositeExtract %9 %152 1 +%160 = OpCompositeExtract %9 %154 1 +%161 = OpFAdd %9 %159 %160 +%155 = OpCompositeConstruct %12 %158 %161 +%162 = OpAccessChain %148 %54 %150 +OpStore %162 %155 +%166 = OpAccessChain %164 %49 %165 +%167 = OpLoad %13 %166 +%168 = OpAccessChain %163 %51 %165 +%169 = OpLoad %13 %168 +%171 = OpCompositeExtract %7 %167 0 +%172 = OpCompositeExtract %7 %169 0 +%173 = OpFAdd %7 %171 %172 +%174 = OpCompositeExtract %7 %167 1 +%175 = OpCompositeExtract %7 %169 1 +%176 = OpFAdd %7 %174 %175 +%177 = OpCompositeExtract %7 %167 2 +%178 = OpCompositeExtract %7 %169 2 +%179 = OpFAdd %7 %177 %178 +%170 = OpCompositeConstruct %13 %173 %176 %179 +%180 = OpAccessChain %163 %54 %165 +OpStore %180 %170 +%184 = OpAccessChain %182 %49 %183 +%185 = OpLoad %14 %184 +%186 = OpAccessChain %181 %51 %183 +%187 = OpLoad %14 %186 +%189 = OpCompositeExtract %8 %185 0 +%190 = OpCompositeExtract %8 %187 0 +%191 = OpFAdd %8 %189 %190 +%192 = OpCompositeExtract %8 %185 1 +%193 = OpCompositeExtract %8 %187 1 +%194 = OpFAdd %8 %192 %193 +%195 = OpCompositeExtract %8 %185 2 +%196 = OpCompositeExtract %8 %187 2 +%197 = OpFAdd %8 %195 %196 +%188 = OpCompositeConstruct %14 %191 %194 %197 +%198 = OpAccessChain %181 %54 %183 +OpStore %198 %188 +%202 = OpAccessChain %200 %49 %201 +%203 = OpLoad %15 %202 +%204 = OpAccessChain %199 %51 %201 +%205 = OpLoad %15 %204 +%207 = OpCompositeExtract %9 %203 0 +%208 = OpCompositeExtract %9 %205 0 +%209 = OpFAdd %9 %207 %208 +%210 = OpCompositeExtract %9 %203 1 +%211 = OpCompositeExtract %9 %205 1 +%212 = OpFAdd %9 %210 %211 +%213 = OpCompositeExtract %9 %203 2 +%214 = OpCompositeExtract %9 %205 2 +%215 = OpFAdd %9 %213 %214 +%206 = OpCompositeConstruct %15 %209 %212 %215 +%216 = OpAccessChain %199 %54 %201 +OpStore %216 %206 +%220 = OpAccessChain %218 %49 %219 +%221 = OpLoad %16 %220 +%222 = OpAccessChain %217 %51 %219 +%223 = OpLoad %16 %222 +%225 = OpCompositeExtract %7 %221 0 +%226 = OpCompositeExtract %7 %223 0 +%227 = OpFAdd %7 %225 %226 +%228 = OpCompositeExtract %7 %221 1 +%229 = OpCompositeExtract %7 %223 1 +%230 = OpFAdd %7 %228 %229 +%231 = OpCompositeExtract %7 %221 2 +%232 = OpCompositeExtract %7 %223 2 +%233 = OpFAdd %7 %231 %232 +%234 = OpCompositeExtract %7 %221 3 +%235 = OpCompositeExtract %7 %223 3 +%236 = OpFAdd %7 %234 %235 +%224 = OpCompositeConstruct %16 %227 %230 %233 %236 +%237 = OpAccessChain %217 %54 %219 +OpStore %237 %224 +%241 = OpAccessChain %239 %49 %240 +%242 = OpLoad %17 %241 +%243 = OpAccessChain %238 %51 %240 +%244 = OpLoad %17 %243 +%246 = OpCompositeExtract %8 %242 0 +%247 = OpCompositeExtract %8 %244 0 +%248 = OpFAdd %8 %246 %247 +%249 = OpCompositeExtract %8 %242 1 +%250 = OpCompositeExtract %8 %244 1 +%251 = OpFAdd %8 %249 %250 +%252 = OpCompositeExtract %8 %242 2 +%253 = OpCompositeExtract %8 %244 2 +%254 = OpFAdd %8 %252 %253 +%255 = OpCompositeExtract %8 %242 3 +%256 = OpCompositeExtract %8 %244 3 +%257 = OpFAdd %8 %255 %256 +%245 = OpCompositeConstruct %17 %248 %251 %254 %257 +%258 = OpAccessChain %238 %54 %240 +OpStore %258 %245 +%262 = OpAccessChain %260 %49 %261 +%263 = OpLoad %18 %262 +%264 = OpAccessChain %259 %51 %261 +%265 = OpLoad %18 %264 +%267 = OpCompositeExtract %9 %263 0 +%268 = OpCompositeExtract %9 %265 0 +%269 = OpFAdd %9 %267 %268 +%270 = OpCompositeExtract %9 %263 1 +%271 = OpCompositeExtract %9 %265 1 +%272 = OpFAdd %9 %270 %271 +%273 = OpCompositeExtract %9 %263 2 +%274 = OpCompositeExtract %9 %265 2 +%275 = OpFAdd %9 %273 %274 +%276 = OpCompositeExtract %9 %263 3 +%277 = OpCompositeExtract %9 %265 3 +%278 = OpFAdd %9 %276 %277 +%266 = OpCompositeConstruct %18 %269 %272 %275 %278 +%279 = OpAccessChain %259 %54 %261 +OpStore %279 %266 +%281 = OpAccessChain %280 %53 %48 +%282 = OpLoad %20 %281 +%283 = OpAccessChain %280 %55 %48 +OpStore %283 %282 +%284 = OpLoad %3 %58 +%285 = OpExtInst %3 %1 FAbs %284 +%286 = OpLoad %3 %58 +%287 = OpFAdd %3 %286 %285 +OpStore %58 %287 +%288 = OpLoad %3 %58 +%289 = OpLoad %3 %58 +%290 = OpLoad %3 %58 +%291 = OpExtInst %3 %1 FClamp %288 %289 %290 +%292 = OpLoad %3 %58 +%293 = OpFAdd %3 %292 %291 +OpStore %58 %293 +%294 = OpLoad %3 %58 +%295 = OpCompositeConstruct %7 %294 %294 +%296 = OpLoad %3 %58 +%297 = OpCompositeConstruct %7 %296 %296 +%298 = OpDot %3 %295 %297 +%299 = OpLoad %3 %58 +%300 = OpFAdd %3 %299 %298 +OpStore %58 %300 +%301 = OpLoad %3 %58 +%302 = OpLoad %3 %58 +%303 = OpExtInst %3 %1 FMax %301 %302 +%304 = OpLoad %3 %58 +%305 = OpFAdd %3 %304 %303 +OpStore %58 %305 +%306 = OpLoad %3 %58 +%307 = OpLoad %3 %58 +%308 = OpExtInst %3 %1 FMin %306 %307 +%309 = OpLoad %3 %58 +%310 = OpFAdd %3 %309 %308 +OpStore %58 %310 +%311 = OpLoad %3 %58 +%312 = OpExtInst %3 %1 FSign %311 +%313 = OpLoad %3 %58 +%314 = OpFAdd %3 %313 %312 +OpStore %58 %314 +%315 = OpLoad %3 %58 +%316 = OpFAdd %3 %315 %24 +OpStore %58 %316 +%317 = OpAccessChain %92 %49 %93 +%318 = OpLoad %7 %317 +%320 = OpFConvert %319 %318 +%321 = OpFConvert %7 %320 +%322 = OpAccessChain %91 %54 %93 +OpStore %322 %321 +%323 = OpAccessChain %101 %49 %102 +%324 = OpLoad %8 %323 +%326 = OpFConvert %325 %324 +%327 = OpFConvert %8 %326 +%328 = OpAccessChain %100 %54 %102 +OpStore %328 %327 +%329 = OpAccessChain %110 %49 %111 +%330 = OpLoad %9 %329 +%332 = OpFConvert %331 %330 +%333 = OpFConvert %9 %332 +%334 = OpAccessChain %109 %54 %111 +OpStore %334 %333 +%335 = OpAccessChain %119 %49 %120 +%336 = OpLoad %10 %335 +%338 = OpCompositeExtract %7 %336 0 +%339 = OpFConvert %319 %338 +%340 = OpCompositeExtract %7 %336 1 +%341 = OpFConvert %319 %340 +%342 = OpCompositeConstruct %337 %339 %341 +%343 = OpCompositeExtract %319 %342 0 +%344 = OpFConvert %7 %343 +%345 = OpCompositeExtract %319 %342 1 +%346 = OpFConvert %7 %345 +%347 = OpCompositeConstruct %10 %344 %346 +%348 = OpAccessChain %118 %54 %120 +OpStore %348 %347 +%349 = OpAccessChain %134 %49 %135 +%350 = OpLoad %11 %349 +%352 = OpCompositeExtract %8 %350 0 +%353 = OpFConvert %325 %352 +%354 = OpCompositeExtract %8 %350 1 +%355 = OpFConvert %325 %354 +%356 = OpCompositeConstruct %351 %353 %355 +%357 = OpCompositeExtract %325 %356 0 +%358 = OpFConvert %8 %357 +%359 = OpCompositeExtract %325 %356 1 +%360 = OpFConvert %8 %359 +%361 = OpCompositeConstruct %11 %358 %360 +%362 = OpAccessChain %133 %54 %135 +OpStore %362 %361 +%363 = OpAccessChain %149 %49 %150 +%364 = OpLoad %12 %363 +%366 = OpCompositeExtract %9 %364 0 +%367 = OpFConvert %331 %366 +%368 = OpCompositeExtract %9 %364 1 +%369 = OpFConvert %331 %368 +%370 = OpCompositeConstruct %365 %367 %369 +%371 = OpCompositeExtract %331 %370 0 +%372 = OpFConvert %9 %371 +%373 = OpCompositeExtract %331 %370 1 +%374 = OpFConvert %9 %373 +%375 = OpCompositeConstruct %12 %372 %374 +%376 = OpAccessChain %148 %54 %150 +OpStore %376 %375 +%377 = OpAccessChain %164 %49 %165 +%378 = OpLoad %13 %377 +%380 = OpCompositeExtract %7 %378 0 +%381 = OpFConvert %319 %380 +%382 = OpCompositeExtract %7 %378 1 +%383 = OpFConvert %319 %382 +%384 = OpCompositeExtract %7 %378 2 +%385 = OpFConvert %319 %384 +%386 = OpCompositeConstruct %379 %381 %383 %385 +%387 = OpCompositeExtract %319 %386 0 +%388 = OpFConvert %7 %387 +%389 = OpCompositeExtract %319 %386 1 +%390 = OpFConvert %7 %389 +%391 = OpCompositeExtract %319 %386 2 +%392 = OpFConvert %7 %391 +%393 = OpCompositeConstruct %13 %388 %390 %392 +%394 = OpAccessChain %163 %54 %165 +OpStore %394 %393 +%395 = OpAccessChain %182 %49 %183 +%396 = OpLoad %14 %395 +%398 = OpCompositeExtract %8 %396 0 +%399 = OpFConvert %325 %398 +%400 = OpCompositeExtract %8 %396 1 +%401 = OpFConvert %325 %400 +%402 = OpCompositeExtract %8 %396 2 +%403 = OpFConvert %325 %402 +%404 = OpCompositeConstruct %397 %399 %401 %403 +%405 = OpCompositeExtract %325 %404 0 +%406 = OpFConvert %8 %405 +%407 = OpCompositeExtract %325 %404 1 +%408 = OpFConvert %8 %407 +%409 = OpCompositeExtract %325 %404 2 +%410 = OpFConvert %8 %409 +%411 = OpCompositeConstruct %14 %406 %408 %410 +%412 = OpAccessChain %181 %54 %183 +OpStore %412 %411 +%413 = OpAccessChain %200 %49 %201 +%414 = OpLoad %15 %413 +%416 = OpCompositeExtract %9 %414 0 +%417 = OpFConvert %331 %416 +%418 = OpCompositeExtract %9 %414 1 +%419 = OpFConvert %331 %418 +%420 = OpCompositeExtract %9 %414 2 +%421 = OpFConvert %331 %420 +%422 = OpCompositeConstruct %415 %417 %419 %421 +%423 = OpCompositeExtract %331 %422 0 +%424 = OpFConvert %9 %423 +%425 = OpCompositeExtract %331 %422 1 +%426 = OpFConvert %9 %425 +%427 = OpCompositeExtract %331 %422 2 +%428 = OpFConvert %9 %427 +%429 = OpCompositeConstruct %15 %424 %426 %428 +%430 = OpAccessChain %199 %54 %201 +OpStore %430 %429 +%431 = OpAccessChain %218 %49 %219 +%432 = OpLoad %16 %431 +%434 = OpCompositeExtract %7 %432 0 +%435 = OpFConvert %319 %434 +%436 = OpCompositeExtract %7 %432 1 +%437 = OpFConvert %319 %436 +%438 = OpCompositeExtract %7 %432 2 +%439 = OpFConvert %319 %438 +%440 = OpCompositeExtract %7 %432 3 +%441 = OpFConvert %319 %440 +%442 = OpCompositeConstruct %433 %435 %437 %439 %441 +%443 = OpCompositeExtract %319 %442 0 +%444 = OpFConvert %7 %443 +%445 = OpCompositeExtract %319 %442 1 +%446 = OpFConvert %7 %445 +%447 = OpCompositeExtract %319 %442 2 +%448 = OpFConvert %7 %447 +%449 = OpCompositeExtract %319 %442 3 +%450 = OpFConvert %7 %449 +%451 = OpCompositeConstruct %16 %444 %446 %448 %450 +%452 = OpAccessChain %217 %54 %219 +OpStore %452 %451 +%453 = OpAccessChain %239 %49 %240 +%454 = OpLoad %17 %453 +%456 = OpCompositeExtract %8 %454 0 +%457 = OpFConvert %325 %456 +%458 = OpCompositeExtract %8 %454 1 +%459 = OpFConvert %325 %458 +%460 = OpCompositeExtract %8 %454 2 +%461 = OpFConvert %325 %460 +%462 = OpCompositeExtract %8 %454 3 +%463 = OpFConvert %325 %462 +%464 = OpCompositeConstruct %455 %457 %459 %461 %463 +%465 = OpCompositeExtract %325 %464 0 +%466 = OpFConvert %8 %465 +%467 = OpCompositeExtract %325 %464 1 +%468 = OpFConvert %8 %467 +%469 = OpCompositeExtract %325 %464 2 +%470 = OpFConvert %8 %469 +%471 = OpCompositeExtract %325 %464 3 +%472 = OpFConvert %8 %471 +%473 = OpCompositeConstruct %17 %466 %468 %470 %472 +%474 = OpAccessChain %238 %54 %240 +OpStore %474 %473 +%475 = OpAccessChain %260 %49 %261 +%476 = OpLoad %18 %475 +%478 = OpCompositeExtract %9 %476 0 +%479 = OpFConvert %331 %478 +%480 = OpCompositeExtract %9 %476 1 +%481 = OpFConvert %331 %480 +%482 = OpCompositeExtract %9 %476 2 +%483 = OpFConvert %331 %482 +%484 = OpCompositeExtract %9 %476 3 +%485 = OpFConvert %331 %484 +%486 = OpCompositeConstruct %477 %479 %481 %483 %485 +%487 = OpCompositeExtract %331 %486 0 +%488 = OpFConvert %9 %487 +%489 = OpCompositeExtract %331 %486 1 +%490 = OpFConvert %9 %489 +%491 = OpCompositeExtract %331 %486 2 +%492 = OpFConvert %9 %491 +%493 = OpCompositeExtract %331 %486 3 +%494 = OpFConvert %9 %493 +%495 = OpCompositeConstruct %18 %488 %490 %492 %494 +%496 = OpAccessChain %259 %54 %261 +OpStore %496 %495 +%497 = OpLoad %3 %58 +OpReturnValue %497 +OpFunctionEnd +%499 = OpFunction %2 None %500 +%498 = OpLabel +%501 = OpAccessChain %47 %28 %48 +%502 = OpAccessChain %50 %31 %48 +%503 = OpAccessChain %52 %34 %48 +%504 = OpAccessChain %50 %37 %48 +%505 = OpAccessChain %52 %40 %48 +OpBranch %507 +%507 = OpLabel +%508 = OpFunctionCall %3 %45 %506 +%510 = OpAccessChain %84 %504 %509 +OpStore %510 %508 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/image.spvasm b/naga/tests/out/spv/image.spvasm index 6b5ef8c890..8982e2bf6e 100644 --- a/naga/tests/out/spv/image.spvasm +++ b/naga/tests/out/spv/image.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 547 +; Bound: 546 OpCapability Shader OpCapability Image1D OpCapability Sampled1D @@ -10,19 +10,19 @@ OpCapability ImageQuery %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %100 "main" %97 -OpEntryPoint GLCompute %191 "depth_load" %189 -OpEntryPoint Vertex %211 "queries" %209 -OpEntryPoint Vertex %263 "levels_queries" %262 -OpEntryPoint Fragment %294 "texture_sample" %293 -OpEntryPoint Fragment %440 "texture_sample_comparison" %438 -OpEntryPoint Fragment %496 "gather" %495 -OpEntryPoint Fragment %530 "depth_no_comparison" %529 +OpEntryPoint GLCompute %190 "depth_load" %188 +OpEntryPoint Vertex %210 "queries" %208 +OpEntryPoint Vertex %262 "levels_queries" %261 +OpEntryPoint Fragment %293 "texture_sample" %292 +OpEntryPoint Fragment %439 "texture_sample_comparison" %437 +OpEntryPoint Fragment %495 "gather" %494 +OpEntryPoint Fragment %529 "depth_no_comparison" %528 OpExecutionMode %100 LocalSize 16 1 1 -OpExecutionMode %191 LocalSize 16 1 1 -OpExecutionMode %294 OriginUpperLeft -OpExecutionMode %440 OriginUpperLeft -OpExecutionMode %496 OriginUpperLeft -OpExecutionMode %530 OriginUpperLeft +OpExecutionMode %190 LocalSize 16 1 1 +OpExecutionMode %293 OriginUpperLeft +OpExecutionMode %439 OriginUpperLeft +OpExecutionMode %495 OriginUpperLeft +OpExecutionMode %529 OriginUpperLeft %3 = OpString "image.wgsl" OpSource Unknown 0 %3 "@group(0) @binding(0) var image_mipmapped_src: texture_2d; @@ -245,16 +245,16 @@ OpName %77 "lhs" OpName %78 "rhs" OpName %97 "local_id" OpName %100 "main" -OpName %189 "local_id" -OpName %191 "depth_load" -OpName %211 "queries" -OpName %263 "levels_queries" -OpName %294 "texture_sample" -OpName %307 "a" -OpName %440 "texture_sample_comparison" -OpName %445 "a" -OpName %496 "gather" -OpName %530 "depth_no_comparison" +OpName %188 "local_id" +OpName %190 "depth_load" +OpName %210 "queries" +OpName %262 "levels_queries" +OpName %293 "texture_sample" +OpName %306 "a" +OpName %439 "texture_sample_comparison" +OpName %444 "a" +OpName %495 "gather" +OpName %529 "depth_no_comparison" OpDecorate %32 DescriptorSet 0 OpDecorate %32 Binding 0 OpDecorate %34 DescriptorSet 0 @@ -303,13 +303,13 @@ OpDecorate %71 Binding 3 OpDecorate %73 DescriptorSet 1 OpDecorate %73 Binding 4 OpDecorate %97 BuiltIn LocalInvocationId -OpDecorate %189 BuiltIn LocalInvocationId -OpDecorate %209 BuiltIn Position -OpDecorate %262 BuiltIn Position -OpDecorate %293 Location 0 -OpDecorate %438 Location 0 -OpDecorate %495 Location 0 -OpDecorate %529 Location 0 +OpDecorate %188 BuiltIn LocalInvocationId +OpDecorate %208 BuiltIn Position +OpDecorate %261 BuiltIn Position +OpDecorate %292 Location 0 +OpDecorate %437 Location 0 +OpDecorate %494 Location 0 +OpDecorate %528 Location 0 %2 = OpTypeVoid %5 = OpTypeInt 32 0 %4 = OpTypeImage %5 2D 0 0 0 1 Unknown @@ -401,40 +401,40 @@ OpDecorate %529 Location 0 %112 = OpTypeVector %5 2 %120 = OpTypeVector %5 4 %131 = OpTypeVector %15 3 -%189 = OpVariable %98 Input -%210 = OpTypePointer Output %24 -%209 = OpVariable %210 Output -%220 = OpConstant %5 0 -%262 = OpVariable %210 Output -%293 = OpVariable %210 Output -%300 = OpConstant %8 0.5 -%301 = OpTypeVector %8 2 -%302 = OpConstantComposite %301 %300 %300 -%303 = OpTypeVector %8 3 -%304 = OpConstantComposite %303 %300 %300 %300 -%305 = OpConstant %8 2.3 -%306 = OpConstant %8 2.0 -%308 = OpTypePointer Function %24 -%309 = OpConstantNull %24 -%312 = OpTypeSampledImage %16 -%317 = OpTypeSampledImage %17 -%338 = OpTypeSampledImage %19 -%399 = OpTypeSampledImage %21 -%439 = OpTypePointer Output %8 -%438 = OpVariable %439 Output -%446 = OpTypePointer Function %8 -%447 = OpConstantNull %8 -%449 = OpTypeSampledImage %26 -%454 = OpTypeSampledImage %27 -%467 = OpTypeSampledImage %28 -%474 = OpConstant %8 0.0 -%495 = OpVariable %210 Output -%506 = OpConstant %5 1 -%509 = OpConstant %5 3 -%514 = OpTypeSampledImage %4 -%517 = OpTypeVector %15 4 -%518 = OpTypeSampledImage %18 -%529 = OpVariable %210 Output +%188 = OpVariable %98 Input +%209 = OpTypePointer Output %24 +%208 = OpVariable %209 Output +%219 = OpConstant %5 0 +%261 = OpVariable %209 Output +%292 = OpVariable %209 Output +%299 = OpConstant %8 0.5 +%300 = OpTypeVector %8 2 +%301 = OpConstantComposite %300 %299 %299 +%302 = OpTypeVector %8 3 +%303 = OpConstantComposite %302 %299 %299 %299 +%304 = OpConstant %8 2.3 +%305 = OpConstant %8 2.0 +%307 = OpTypePointer Function %24 +%308 = OpConstantNull %24 +%311 = OpTypeSampledImage %16 +%316 = OpTypeSampledImage %17 +%337 = OpTypeSampledImage %19 +%398 = OpTypeSampledImage %21 +%438 = OpTypePointer Output %8 +%437 = OpVariable %438 Output +%445 = OpTypePointer Function %8 +%446 = OpConstantNull %8 +%448 = OpTypeSampledImage %26 +%453 = OpTypeSampledImage %27 +%466 = OpTypeSampledImage %28 +%473 = OpConstant %8 0.0 +%494 = OpVariable %209 Output +%505 = OpConstant %5 1 +%508 = OpConstant %5 3 +%513 = OpTypeSampledImage %4 +%516 = OpTypeVector %15 4 +%517 = OpTypeSampledImage %18 +%528 = OpVariable %209 Output %75 = OpFunction %14 None %76 %77 = OpFunctionParameter %14 %78 = OpFunctionParameter %14 @@ -536,520 +536,520 @@ OpLine %3 34 19 %171 = OpImageFetch %120 %105 %170 Lod %168 OpLine %3 35 19 %172 = OpCompositeExtract %5 %99 0 -%174 = OpCompositeExtract %5 %99 2 -%175 = OpBitcast %15 %174 -%176 = OpImageFetch %120 %106 %172 Lod %175 +%173 = OpCompositeExtract %5 %99 2 +%174 = OpBitcast %15 %173 +%175 = OpImageFetch %120 %106 %172 Lod %174 OpLine %3 37 29 -%177 = OpCompositeExtract %15 %117 0 -%178 = OpIAdd %120 %121 %124 -%179 = OpIAdd %120 %178 %125 -%180 = OpIAdd %120 %179 %133 -%181 = OpIAdd %120 %180 %140 +%176 = OpCompositeExtract %15 %117 0 +%177 = OpIAdd %120 %121 %124 +%178 = OpIAdd %120 %177 %125 +%179 = OpIAdd %120 %178 %133 +%180 = OpIAdd %120 %179 %140 OpLine %3 37 5 -OpImageWrite %107 %177 %181 +OpImageWrite %107 %176 %180 OpLine %3 39 29 -%182 = OpCompositeExtract %15 %117 0 -%183 = OpBitcast %5 %182 -%184 = OpIAdd %120 %149 %153 -%185 = OpIAdd %120 %184 %155 -%186 = OpIAdd %120 %185 %162 -%187 = OpIAdd %120 %186 %171 +%181 = OpCompositeExtract %15 %117 0 +%182 = OpBitcast %5 %181 +%183 = OpIAdd %120 %149 %153 +%184 = OpIAdd %120 %183 %155 +%185 = OpIAdd %120 %184 %162 +%186 = OpIAdd %120 %185 %171 OpLine %3 39 5 -OpImageWrite %107 %183 %187 +OpImageWrite %107 %182 %186 OpReturn OpFunctionEnd -%191 = OpFunction %2 None %101 -%188 = OpLabel -%190 = OpLoad %13 %189 -%192 = OpLoad %7 %36 -%193 = OpLoad %9 %38 -%194 = OpLoad %11 %46 -OpBranch %195 -%195 = OpLabel +%190 = OpFunction %2 None %101 +%187 = OpLabel +%189 = OpLoad %13 %188 +%191 = OpLoad %7 %36 +%192 = OpLoad %9 %38 +%193 = OpLoad %11 %46 +OpBranch %194 +%194 = OpLabel OpLine %3 44 26 -%196 = OpImageQuerySize %112 %193 +%195 = OpImageQuerySize %112 %192 OpLine %3 45 27 -%197 = OpVectorShuffle %112 %190 %190 0 1 -%198 = OpIMul %112 %196 %197 -%199 = OpBitcast %14 %198 +%196 = OpVectorShuffle %112 %189 %189 0 1 +%197 = OpIMul %112 %195 %196 +%198 = OpBitcast %14 %197 OpLine %3 45 27 -%200 = OpFunctionCall %14 %75 %199 %110 +%199 = OpFunctionCall %14 %75 %198 %110 OpLine %3 46 20 -%201 = OpCompositeExtract %5 %190 2 -%202 = OpBitcast %15 %201 -%203 = OpImageFetch %24 %192 %200 Sample %202 -%204 = OpCompositeExtract %8 %203 0 +%200 = OpCompositeExtract %5 %189 2 +%201 = OpBitcast %15 %200 +%202 = OpImageFetch %24 %191 %199 Sample %201 +%203 = OpCompositeExtract %8 %202 0 OpLine %3 47 29 -%205 = OpCompositeExtract %15 %200 0 -%206 = OpConvertFToU %5 %204 -%207 = OpCompositeConstruct %120 %206 %206 %206 %206 +%204 = OpCompositeExtract %15 %199 0 +%205 = OpConvertFToU %5 %203 +%206 = OpCompositeConstruct %120 %205 %205 %205 %205 OpLine %3 47 5 -OpImageWrite %194 %205 %207 +OpImageWrite %193 %204 %206 OpReturn OpFunctionEnd -%211 = OpFunction %2 None %101 -%208 = OpLabel -%212 = OpLoad %16 %48 -%213 = OpLoad %17 %50 -%214 = OpLoad %19 %55 -%215 = OpLoad %20 %57 -%216 = OpLoad %21 %59 -%217 = OpLoad %22 %61 -%218 = OpLoad %23 %63 -OpBranch %219 -%219 = OpLabel +%210 = OpFunction %2 None %101 +%207 = OpLabel +%211 = OpLoad %16 %48 +%212 = OpLoad %17 %50 +%213 = OpLoad %19 %55 +%214 = OpLoad %20 %57 +%215 = OpLoad %21 %59 +%216 = OpLoad %22 %61 +%217 = OpLoad %23 %63 +OpBranch %218 +%218 = OpLabel OpLine %3 72 18 -%221 = OpImageQuerySizeLod %5 %212 %220 +%220 = OpImageQuerySizeLod %5 %211 %219 OpLine %3 73 22 -%222 = OpBitcast %15 %221 -%223 = OpImageQuerySizeLod %5 %212 %222 +%221 = OpBitcast %15 %220 +%222 = OpImageQuerySizeLod %5 %211 %221 OpLine %3 74 18 -%224 = OpImageQuerySizeLod %112 %213 %220 +%223 = OpImageQuerySizeLod %112 %212 %219 OpLine %3 75 22 -%225 = OpImageQuerySizeLod %112 %213 %30 +%224 = OpImageQuerySizeLod %112 %212 %30 OpLine %3 76 24 -%226 = OpImageQuerySizeLod %13 %214 %220 -%227 = OpVectorShuffle %112 %226 %226 0 1 +%225 = OpImageQuerySizeLod %13 %213 %219 +%226 = OpVectorShuffle %112 %225 %225 0 1 OpLine %3 77 28 -%228 = OpImageQuerySizeLod %13 %214 %30 -%229 = OpVectorShuffle %112 %228 %228 0 1 +%227 = OpImageQuerySizeLod %13 %213 %30 +%228 = OpVectorShuffle %112 %227 %227 0 1 OpLine %3 78 20 -%230 = OpImageQuerySizeLod %112 %215 %220 +%229 = OpImageQuerySizeLod %112 %214 %219 OpLine %3 79 24 -%231 = OpImageQuerySizeLod %112 %215 %30 +%230 = OpImageQuerySizeLod %112 %214 %30 OpLine %3 80 26 -%232 = OpImageQuerySizeLod %13 %216 %220 -%233 = OpVectorShuffle %112 %232 %232 0 0 +%231 = OpImageQuerySizeLod %13 %215 %219 +%232 = OpVectorShuffle %112 %231 %231 0 0 OpLine %3 81 30 -%234 = OpImageQuerySizeLod %13 %216 %30 -%235 = OpVectorShuffle %112 %234 %234 0 0 +%233 = OpImageQuerySizeLod %13 %215 %30 +%234 = OpVectorShuffle %112 %233 %233 0 0 OpLine %3 82 18 -%236 = OpImageQuerySizeLod %13 %217 %220 +%235 = OpImageQuerySizeLod %13 %216 %219 OpLine %3 83 22 -%237 = OpImageQuerySizeLod %13 %217 %30 +%236 = OpImageQuerySizeLod %13 %216 %30 OpLine %3 84 21 -%238 = OpImageQuerySize %112 %218 +%237 = OpImageQuerySize %112 %217 OpLine %3 86 15 -%239 = OpCompositeExtract %5 %224 1 -%240 = OpIAdd %5 %221 %239 -%241 = OpCompositeExtract %5 %225 1 -%242 = OpIAdd %5 %240 %241 -%243 = OpCompositeExtract %5 %227 1 -%244 = OpIAdd %5 %242 %243 -%245 = OpCompositeExtract %5 %229 1 -%246 = OpIAdd %5 %244 %245 -%247 = OpCompositeExtract %5 %230 1 -%248 = OpIAdd %5 %246 %247 -%249 = OpCompositeExtract %5 %231 1 -%250 = OpIAdd %5 %248 %249 -%251 = OpCompositeExtract %5 %233 1 -%252 = OpIAdd %5 %250 %251 -%253 = OpCompositeExtract %5 %235 1 -%254 = OpIAdd %5 %252 %253 -%255 = OpCompositeExtract %5 %236 2 -%256 = OpIAdd %5 %254 %255 -%257 = OpCompositeExtract %5 %237 2 -%258 = OpIAdd %5 %256 %257 +%238 = OpCompositeExtract %5 %223 1 +%239 = OpIAdd %5 %220 %238 +%240 = OpCompositeExtract %5 %224 1 +%241 = OpIAdd %5 %239 %240 +%242 = OpCompositeExtract %5 %226 1 +%243 = OpIAdd %5 %241 %242 +%244 = OpCompositeExtract %5 %228 1 +%245 = OpIAdd %5 %243 %244 +%246 = OpCompositeExtract %5 %229 1 +%247 = OpIAdd %5 %245 %246 +%248 = OpCompositeExtract %5 %230 1 +%249 = OpIAdd %5 %247 %248 +%250 = OpCompositeExtract %5 %232 1 +%251 = OpIAdd %5 %249 %250 +%252 = OpCompositeExtract %5 %234 1 +%253 = OpIAdd %5 %251 %252 +%254 = OpCompositeExtract %5 %235 2 +%255 = OpIAdd %5 %253 %254 +%256 = OpCompositeExtract %5 %236 2 +%257 = OpIAdd %5 %255 %256 OpLine %3 89 12 -%259 = OpConvertUToF %8 %258 -%260 = OpCompositeConstruct %24 %259 %259 %259 %259 -OpStore %209 %260 +%258 = OpConvertUToF %8 %257 +%259 = OpCompositeConstruct %24 %258 %258 %258 %258 +OpStore %208 %259 OpReturn OpFunctionEnd -%263 = OpFunction %2 None %101 -%261 = OpLabel -%264 = OpLoad %17 %50 -%265 = OpLoad %19 %55 -%266 = OpLoad %20 %57 -%267 = OpLoad %21 %59 -%268 = OpLoad %22 %61 -%269 = OpLoad %23 %63 -OpBranch %270 -%270 = OpLabel +%262 = OpFunction %2 None %101 +%260 = OpLabel +%263 = OpLoad %17 %50 +%264 = OpLoad %19 %55 +%265 = OpLoad %20 %57 +%266 = OpLoad %21 %59 +%267 = OpLoad %22 %61 +%268 = OpLoad %23 %63 +OpBranch %269 +%269 = OpLabel OpLine %3 94 25 -%271 = OpImageQueryLevels %5 %264 +%270 = OpImageQueryLevels %5 %263 OpLine %3 95 25 -%272 = OpImageQuerySizeLod %13 %265 %220 -%273 = OpCompositeExtract %5 %272 2 +%271 = OpImageQuerySizeLod %13 %264 %219 +%272 = OpCompositeExtract %5 %271 2 OpLine %3 96 31 -%274 = OpImageQueryLevels %5 %265 +%273 = OpImageQueryLevels %5 %264 OpLine %3 97 31 -%275 = OpImageQuerySizeLod %13 %265 %220 -%276 = OpCompositeExtract %5 %275 2 +%274 = OpImageQuerySizeLod %13 %264 %219 +%275 = OpCompositeExtract %5 %274 2 OpLine %3 98 27 -%277 = OpImageQueryLevels %5 %266 +%276 = OpImageQueryLevels %5 %265 OpLine %3 99 33 -%278 = OpImageQueryLevels %5 %267 +%277 = OpImageQueryLevels %5 %266 OpLine %3 100 27 -%279 = OpImageQuerySizeLod %13 %267 %220 -%280 = OpCompositeExtract %5 %279 2 +%278 = OpImageQuerySizeLod %13 %266 %219 +%279 = OpCompositeExtract %5 %278 2 OpLine %3 101 25 -%281 = OpImageQueryLevels %5 %268 +%280 = OpImageQueryLevels %5 %267 OpLine %3 102 26 -%282 = OpImageQuerySamples %5 %269 +%281 = OpImageQuerySamples %5 %268 OpLine %3 104 15 -%283 = OpIAdd %5 %273 %280 -%284 = OpIAdd %5 %283 %282 -%285 = OpIAdd %5 %284 %271 -%286 = OpIAdd %5 %285 %274 -%287 = OpIAdd %5 %286 %281 +%282 = OpIAdd %5 %272 %279 +%283 = OpIAdd %5 %282 %281 +%284 = OpIAdd %5 %283 %270 +%285 = OpIAdd %5 %284 %273 +%286 = OpIAdd %5 %285 %280 +%287 = OpIAdd %5 %286 %276 %288 = OpIAdd %5 %287 %277 -%289 = OpIAdd %5 %288 %278 OpLine %3 106 12 -%290 = OpConvertUToF %8 %289 -%291 = OpCompositeConstruct %24 %290 %290 %290 %290 -OpStore %262 %291 +%289 = OpConvertUToF %8 %288 +%290 = OpCompositeConstruct %24 %289 %289 %289 %289 +OpStore %261 %290 OpReturn OpFunctionEnd -%294 = OpFunction %2 None %101 -%292 = OpLabel -%307 = OpVariable %308 Function %309 -%295 = OpLoad %16 %48 -%296 = OpLoad %17 %50 -%297 = OpLoad %19 %55 -%298 = OpLoad %21 %59 -%299 = OpLoad %25 %65 -OpBranch %310 -%310 = OpLabel +%293 = OpFunction %2 None %101 +%291 = OpLabel +%306 = OpVariable %307 Function %308 +%294 = OpLoad %16 %48 +%295 = OpLoad %17 %50 +%296 = OpLoad %19 %55 +%297 = OpLoad %21 %59 +%298 = OpLoad %25 %65 +OpBranch %309 +%309 = OpLabel OpLine %3 114 14 OpLine %3 115 15 OpLine %3 118 5 -%311 = OpCompositeExtract %8 %302 0 -%313 = OpSampledImage %312 %295 %299 -%314 = OpImageSampleImplicitLod %24 %313 %311 -%315 = OpLoad %24 %307 -%316 = OpFAdd %24 %315 %314 +%310 = OpCompositeExtract %8 %301 0 +%312 = OpSampledImage %311 %294 %298 +%313 = OpImageSampleImplicitLod %24 %312 %310 +%314 = OpLoad %24 %306 +%315 = OpFAdd %24 %314 %313 OpLine %3 118 5 -OpStore %307 %316 +OpStore %306 %315 OpLine %3 119 5 -%318 = OpSampledImage %317 %296 %299 -%319 = OpImageSampleImplicitLod %24 %318 %302 -%320 = OpLoad %24 %307 -%321 = OpFAdd %24 %320 %319 +%317 = OpSampledImage %316 %295 %298 +%318 = OpImageSampleImplicitLod %24 %317 %301 +%319 = OpLoad %24 %306 +%320 = OpFAdd %24 %319 %318 OpLine %3 119 5 -OpStore %307 %321 +OpStore %306 %320 OpLine %3 120 5 -%322 = OpSampledImage %317 %296 %299 -%323 = OpImageSampleImplicitLod %24 %322 %302 ConstOffset %31 -%324 = OpLoad %24 %307 -%325 = OpFAdd %24 %324 %323 +%321 = OpSampledImage %316 %295 %298 +%322 = OpImageSampleImplicitLod %24 %321 %301 ConstOffset %31 +%323 = OpLoad %24 %306 +%324 = OpFAdd %24 %323 %322 OpLine %3 120 5 -OpStore %307 %325 +OpStore %306 %324 OpLine %3 121 5 -%326 = OpSampledImage %317 %296 %299 -%327 = OpImageSampleExplicitLod %24 %326 %302 Lod %305 -%328 = OpLoad %24 %307 -%329 = OpFAdd %24 %328 %327 +%325 = OpSampledImage %316 %295 %298 +%326 = OpImageSampleExplicitLod %24 %325 %301 Lod %304 +%327 = OpLoad %24 %306 +%328 = OpFAdd %24 %327 %326 OpLine %3 121 5 -OpStore %307 %329 +OpStore %306 %328 OpLine %3 122 5 -%330 = OpSampledImage %317 %296 %299 -%331 = OpImageSampleExplicitLod %24 %330 %302 Lod|ConstOffset %305 %31 -%332 = OpLoad %24 %307 -%333 = OpFAdd %24 %332 %331 +%329 = OpSampledImage %316 %295 %298 +%330 = OpImageSampleExplicitLod %24 %329 %301 Lod|ConstOffset %304 %31 +%331 = OpLoad %24 %306 +%332 = OpFAdd %24 %331 %330 OpLine %3 122 5 -OpStore %307 %333 +OpStore %306 %332 OpLine %3 123 5 -%334 = OpSampledImage %317 %296 %299 -%335 = OpImageSampleImplicitLod %24 %334 %302 Bias|ConstOffset %306 %31 -%336 = OpLoad %24 %307 -%337 = OpFAdd %24 %336 %335 +%333 = OpSampledImage %316 %295 %298 +%334 = OpImageSampleImplicitLod %24 %333 %301 Bias|ConstOffset %305 %31 +%335 = OpLoad %24 %306 +%336 = OpFAdd %24 %335 %334 OpLine %3 123 5 -OpStore %307 %337 +OpStore %306 %336 OpLine %3 124 5 -%339 = OpConvertUToF %8 %220 -%340 = OpCompositeConstruct %303 %302 %339 -%341 = OpSampledImage %338 %297 %299 -%342 = OpImageSampleImplicitLod %24 %341 %340 -%343 = OpLoad %24 %307 -%344 = OpFAdd %24 %343 %342 +%338 = OpConvertUToF %8 %219 +%339 = OpCompositeConstruct %302 %301 %338 +%340 = OpSampledImage %337 %296 %298 +%341 = OpImageSampleImplicitLod %24 %340 %339 +%342 = OpLoad %24 %306 +%343 = OpFAdd %24 %342 %341 OpLine %3 124 5 -OpStore %307 %344 +OpStore %306 %343 OpLine %3 125 5 -%345 = OpConvertUToF %8 %220 -%346 = OpCompositeConstruct %303 %302 %345 -%347 = OpSampledImage %338 %297 %299 -%348 = OpImageSampleImplicitLod %24 %347 %346 ConstOffset %31 -%349 = OpLoad %24 %307 -%350 = OpFAdd %24 %349 %348 +%344 = OpConvertUToF %8 %219 +%345 = OpCompositeConstruct %302 %301 %344 +%346 = OpSampledImage %337 %296 %298 +%347 = OpImageSampleImplicitLod %24 %346 %345 ConstOffset %31 +%348 = OpLoad %24 %306 +%349 = OpFAdd %24 %348 %347 OpLine %3 125 5 -OpStore %307 %350 +OpStore %306 %349 OpLine %3 126 5 -%351 = OpConvertUToF %8 %220 -%352 = OpCompositeConstruct %303 %302 %351 -%353 = OpSampledImage %338 %297 %299 -%354 = OpImageSampleExplicitLod %24 %353 %352 Lod %305 -%355 = OpLoad %24 %307 -%356 = OpFAdd %24 %355 %354 +%350 = OpConvertUToF %8 %219 +%351 = OpCompositeConstruct %302 %301 %350 +%352 = OpSampledImage %337 %296 %298 +%353 = OpImageSampleExplicitLod %24 %352 %351 Lod %304 +%354 = OpLoad %24 %306 +%355 = OpFAdd %24 %354 %353 OpLine %3 126 5 -OpStore %307 %356 +OpStore %306 %355 OpLine %3 127 5 -%357 = OpConvertUToF %8 %220 -%358 = OpCompositeConstruct %303 %302 %357 -%359 = OpSampledImage %338 %297 %299 -%360 = OpImageSampleExplicitLod %24 %359 %358 Lod|ConstOffset %305 %31 -%361 = OpLoad %24 %307 -%362 = OpFAdd %24 %361 %360 +%356 = OpConvertUToF %8 %219 +%357 = OpCompositeConstruct %302 %301 %356 +%358 = OpSampledImage %337 %296 %298 +%359 = OpImageSampleExplicitLod %24 %358 %357 Lod|ConstOffset %304 %31 +%360 = OpLoad %24 %306 +%361 = OpFAdd %24 %360 %359 OpLine %3 127 5 -OpStore %307 %362 +OpStore %306 %361 OpLine %3 128 5 -%363 = OpConvertUToF %8 %220 -%364 = OpCompositeConstruct %303 %302 %363 -%365 = OpSampledImage %338 %297 %299 -%366 = OpImageSampleImplicitLod %24 %365 %364 Bias|ConstOffset %306 %31 -%367 = OpLoad %24 %307 -%368 = OpFAdd %24 %367 %366 +%362 = OpConvertUToF %8 %219 +%363 = OpCompositeConstruct %302 %301 %362 +%364 = OpSampledImage %337 %296 %298 +%365 = OpImageSampleImplicitLod %24 %364 %363 Bias|ConstOffset %305 %31 +%366 = OpLoad %24 %306 +%367 = OpFAdd %24 %366 %365 OpLine %3 128 5 -OpStore %307 %368 +OpStore %306 %367 OpLine %3 129 5 -%369 = OpConvertSToF %8 %82 -%370 = OpCompositeConstruct %303 %302 %369 -%371 = OpSampledImage %338 %297 %299 -%372 = OpImageSampleImplicitLod %24 %371 %370 -%373 = OpLoad %24 %307 -%374 = OpFAdd %24 %373 %372 +%368 = OpConvertSToF %8 %82 +%369 = OpCompositeConstruct %302 %301 %368 +%370 = OpSampledImage %337 %296 %298 +%371 = OpImageSampleImplicitLod %24 %370 %369 +%372 = OpLoad %24 %306 +%373 = OpFAdd %24 %372 %371 OpLine %3 129 5 -OpStore %307 %374 +OpStore %306 %373 OpLine %3 130 5 -%375 = OpConvertSToF %8 %82 -%376 = OpCompositeConstruct %303 %302 %375 -%377 = OpSampledImage %338 %297 %299 -%378 = OpImageSampleImplicitLod %24 %377 %376 ConstOffset %31 -%379 = OpLoad %24 %307 -%380 = OpFAdd %24 %379 %378 +%374 = OpConvertSToF %8 %82 +%375 = OpCompositeConstruct %302 %301 %374 +%376 = OpSampledImage %337 %296 %298 +%377 = OpImageSampleImplicitLod %24 %376 %375 ConstOffset %31 +%378 = OpLoad %24 %306 +%379 = OpFAdd %24 %378 %377 OpLine %3 130 5 -OpStore %307 %380 +OpStore %306 %379 OpLine %3 131 5 -%381 = OpConvertSToF %8 %82 -%382 = OpCompositeConstruct %303 %302 %381 -%383 = OpSampledImage %338 %297 %299 -%384 = OpImageSampleExplicitLod %24 %383 %382 Lod %305 -%385 = OpLoad %24 %307 -%386 = OpFAdd %24 %385 %384 +%380 = OpConvertSToF %8 %82 +%381 = OpCompositeConstruct %302 %301 %380 +%382 = OpSampledImage %337 %296 %298 +%383 = OpImageSampleExplicitLod %24 %382 %381 Lod %304 +%384 = OpLoad %24 %306 +%385 = OpFAdd %24 %384 %383 OpLine %3 131 5 -OpStore %307 %386 +OpStore %306 %385 OpLine %3 132 5 -%387 = OpConvertSToF %8 %82 -%388 = OpCompositeConstruct %303 %302 %387 -%389 = OpSampledImage %338 %297 %299 -%390 = OpImageSampleExplicitLod %24 %389 %388 Lod|ConstOffset %305 %31 -%391 = OpLoad %24 %307 -%392 = OpFAdd %24 %391 %390 +%386 = OpConvertSToF %8 %82 +%387 = OpCompositeConstruct %302 %301 %386 +%388 = OpSampledImage %337 %296 %298 +%389 = OpImageSampleExplicitLod %24 %388 %387 Lod|ConstOffset %304 %31 +%390 = OpLoad %24 %306 +%391 = OpFAdd %24 %390 %389 OpLine %3 132 5 -OpStore %307 %392 +OpStore %306 %391 OpLine %3 133 5 -%393 = OpConvertSToF %8 %82 -%394 = OpCompositeConstruct %303 %302 %393 -%395 = OpSampledImage %338 %297 %299 -%396 = OpImageSampleImplicitLod %24 %395 %394 Bias|ConstOffset %306 %31 -%397 = OpLoad %24 %307 -%398 = OpFAdd %24 %397 %396 +%392 = OpConvertSToF %8 %82 +%393 = OpCompositeConstruct %302 %301 %392 +%394 = OpSampledImage %337 %296 %298 +%395 = OpImageSampleImplicitLod %24 %394 %393 Bias|ConstOffset %305 %31 +%396 = OpLoad %24 %306 +%397 = OpFAdd %24 %396 %395 OpLine %3 133 5 -OpStore %307 %398 +OpStore %306 %397 OpLine %3 134 5 -%400 = OpConvertUToF %8 %220 -%401 = OpCompositeConstruct %24 %304 %400 -%402 = OpSampledImage %399 %298 %299 -%403 = OpImageSampleImplicitLod %24 %402 %401 -%404 = OpLoad %24 %307 -%405 = OpFAdd %24 %404 %403 +%399 = OpConvertUToF %8 %219 +%400 = OpCompositeConstruct %24 %303 %399 +%401 = OpSampledImage %398 %297 %298 +%402 = OpImageSampleImplicitLod %24 %401 %400 +%403 = OpLoad %24 %306 +%404 = OpFAdd %24 %403 %402 OpLine %3 134 5 -OpStore %307 %405 +OpStore %306 %404 OpLine %3 135 5 -%406 = OpConvertUToF %8 %220 -%407 = OpCompositeConstruct %24 %304 %406 -%408 = OpSampledImage %399 %298 %299 -%409 = OpImageSampleExplicitLod %24 %408 %407 Lod %305 -%410 = OpLoad %24 %307 -%411 = OpFAdd %24 %410 %409 +%405 = OpConvertUToF %8 %219 +%406 = OpCompositeConstruct %24 %303 %405 +%407 = OpSampledImage %398 %297 %298 +%408 = OpImageSampleExplicitLod %24 %407 %406 Lod %304 +%409 = OpLoad %24 %306 +%410 = OpFAdd %24 %409 %408 OpLine %3 135 5 -OpStore %307 %411 +OpStore %306 %410 OpLine %3 136 5 -%412 = OpConvertUToF %8 %220 -%413 = OpCompositeConstruct %24 %304 %412 -%414 = OpSampledImage %399 %298 %299 -%415 = OpImageSampleImplicitLod %24 %414 %413 Bias %306 -%416 = OpLoad %24 %307 -%417 = OpFAdd %24 %416 %415 +%411 = OpConvertUToF %8 %219 +%412 = OpCompositeConstruct %24 %303 %411 +%413 = OpSampledImage %398 %297 %298 +%414 = OpImageSampleImplicitLod %24 %413 %412 Bias %305 +%415 = OpLoad %24 %306 +%416 = OpFAdd %24 %415 %414 OpLine %3 136 5 -OpStore %307 %417 +OpStore %306 %416 OpLine %3 137 5 -%418 = OpConvertSToF %8 %82 -%419 = OpCompositeConstruct %24 %304 %418 -%420 = OpSampledImage %399 %298 %299 -%421 = OpImageSampleImplicitLod %24 %420 %419 -%422 = OpLoad %24 %307 -%423 = OpFAdd %24 %422 %421 +%417 = OpConvertSToF %8 %82 +%418 = OpCompositeConstruct %24 %303 %417 +%419 = OpSampledImage %398 %297 %298 +%420 = OpImageSampleImplicitLod %24 %419 %418 +%421 = OpLoad %24 %306 +%422 = OpFAdd %24 %421 %420 OpLine %3 137 5 -OpStore %307 %423 +OpStore %306 %422 OpLine %3 138 5 -%424 = OpConvertSToF %8 %82 -%425 = OpCompositeConstruct %24 %304 %424 -%426 = OpSampledImage %399 %298 %299 -%427 = OpImageSampleExplicitLod %24 %426 %425 Lod %305 -%428 = OpLoad %24 %307 -%429 = OpFAdd %24 %428 %427 +%423 = OpConvertSToF %8 %82 +%424 = OpCompositeConstruct %24 %303 %423 +%425 = OpSampledImage %398 %297 %298 +%426 = OpImageSampleExplicitLod %24 %425 %424 Lod %304 +%427 = OpLoad %24 %306 +%428 = OpFAdd %24 %427 %426 OpLine %3 138 5 -OpStore %307 %429 +OpStore %306 %428 OpLine %3 139 5 -%430 = OpConvertSToF %8 %82 -%431 = OpCompositeConstruct %24 %304 %430 -%432 = OpSampledImage %399 %298 %299 -%433 = OpImageSampleImplicitLod %24 %432 %431 Bias %306 -%434 = OpLoad %24 %307 -%435 = OpFAdd %24 %434 %433 +%429 = OpConvertSToF %8 %82 +%430 = OpCompositeConstruct %24 %303 %429 +%431 = OpSampledImage %398 %297 %298 +%432 = OpImageSampleImplicitLod %24 %431 %430 Bias %305 +%433 = OpLoad %24 %306 +%434 = OpFAdd %24 %433 %432 OpLine %3 139 5 -OpStore %307 %435 +OpStore %306 %434 OpLine %3 1 1 -%436 = OpLoad %24 %307 -OpStore %293 %436 +%435 = OpLoad %24 %306 +OpStore %292 %435 OpReturn OpFunctionEnd -%440 = OpFunction %2 None %101 -%437 = OpLabel -%445 = OpVariable %446 Function %447 -%441 = OpLoad %25 %67 -%442 = OpLoad %26 %69 -%443 = OpLoad %27 %71 -%444 = OpLoad %28 %73 -OpBranch %448 -%448 = OpLabel +%439 = OpFunction %2 None %101 +%436 = OpLabel +%444 = OpVariable %445 Function %446 +%440 = OpLoad %25 %67 +%441 = OpLoad %26 %69 +%442 = OpLoad %27 %71 +%443 = OpLoad %28 %73 +OpBranch %447 +%447 = OpLabel OpLine %3 154 14 OpLine %3 155 15 OpLine %3 158 5 -%450 = OpSampledImage %449 %442 %441 -%451 = OpImageSampleDrefImplicitLod %8 %450 %302 %300 -%452 = OpLoad %8 %445 -%453 = OpFAdd %8 %452 %451 +%449 = OpSampledImage %448 %441 %440 +%450 = OpImageSampleDrefImplicitLod %8 %449 %301 %299 +%451 = OpLoad %8 %444 +%452 = OpFAdd %8 %451 %450 OpLine %3 158 5 -OpStore %445 %453 +OpStore %444 %452 OpLine %3 159 5 -%455 = OpConvertUToF %8 %220 -%456 = OpCompositeConstruct %303 %302 %455 -%457 = OpSampledImage %454 %443 %441 -%458 = OpImageSampleDrefImplicitLod %8 %457 %456 %300 -%459 = OpLoad %8 %445 -%460 = OpFAdd %8 %459 %458 +%454 = OpConvertUToF %8 %219 +%455 = OpCompositeConstruct %302 %301 %454 +%456 = OpSampledImage %453 %442 %440 +%457 = OpImageSampleDrefImplicitLod %8 %456 %455 %299 +%458 = OpLoad %8 %444 +%459 = OpFAdd %8 %458 %457 OpLine %3 159 5 -OpStore %445 %460 +OpStore %444 %459 OpLine %3 160 5 -%461 = OpConvertSToF %8 %82 -%462 = OpCompositeConstruct %303 %302 %461 -%463 = OpSampledImage %454 %443 %441 -%464 = OpImageSampleDrefImplicitLod %8 %463 %462 %300 -%465 = OpLoad %8 %445 -%466 = OpFAdd %8 %465 %464 +%460 = OpConvertSToF %8 %82 +%461 = OpCompositeConstruct %302 %301 %460 +%462 = OpSampledImage %453 %442 %440 +%463 = OpImageSampleDrefImplicitLod %8 %462 %461 %299 +%464 = OpLoad %8 %444 +%465 = OpFAdd %8 %464 %463 OpLine %3 160 5 -OpStore %445 %466 +OpStore %444 %465 OpLine %3 161 5 -%468 = OpSampledImage %467 %444 %441 -%469 = OpImageSampleDrefImplicitLod %8 %468 %304 %300 -%470 = OpLoad %8 %445 -%471 = OpFAdd %8 %470 %469 +%467 = OpSampledImage %466 %443 %440 +%468 = OpImageSampleDrefImplicitLod %8 %467 %303 %299 +%469 = OpLoad %8 %444 +%470 = OpFAdd %8 %469 %468 OpLine %3 161 5 -OpStore %445 %471 +OpStore %444 %470 OpLine %3 162 5 -%472 = OpSampledImage %449 %442 %441 -%473 = OpImageSampleDrefExplicitLod %8 %472 %302 %300 Lod %474 -%475 = OpLoad %8 %445 -%476 = OpFAdd %8 %475 %473 +%471 = OpSampledImage %448 %441 %440 +%472 = OpImageSampleDrefExplicitLod %8 %471 %301 %299 Lod %473 +%474 = OpLoad %8 %444 +%475 = OpFAdd %8 %474 %472 OpLine %3 162 5 -OpStore %445 %476 +OpStore %444 %475 OpLine %3 163 5 -%477 = OpConvertUToF %8 %220 -%478 = OpCompositeConstruct %303 %302 %477 -%479 = OpSampledImage %454 %443 %441 -%480 = OpImageSampleDrefExplicitLod %8 %479 %478 %300 Lod %474 -%481 = OpLoad %8 %445 -%482 = OpFAdd %8 %481 %480 +%476 = OpConvertUToF %8 %219 +%477 = OpCompositeConstruct %302 %301 %476 +%478 = OpSampledImage %453 %442 %440 +%479 = OpImageSampleDrefExplicitLod %8 %478 %477 %299 Lod %473 +%480 = OpLoad %8 %444 +%481 = OpFAdd %8 %480 %479 OpLine %3 163 5 -OpStore %445 %482 +OpStore %444 %481 OpLine %3 164 5 -%483 = OpConvertSToF %8 %82 -%484 = OpCompositeConstruct %303 %302 %483 -%485 = OpSampledImage %454 %443 %441 -%486 = OpImageSampleDrefExplicitLod %8 %485 %484 %300 Lod %474 -%487 = OpLoad %8 %445 -%488 = OpFAdd %8 %487 %486 +%482 = OpConvertSToF %8 %82 +%483 = OpCompositeConstruct %302 %301 %482 +%484 = OpSampledImage %453 %442 %440 +%485 = OpImageSampleDrefExplicitLod %8 %484 %483 %299 Lod %473 +%486 = OpLoad %8 %444 +%487 = OpFAdd %8 %486 %485 OpLine %3 164 5 -OpStore %445 %488 +OpStore %444 %487 OpLine %3 165 5 -%489 = OpSampledImage %467 %444 %441 -%490 = OpImageSampleDrefExplicitLod %8 %489 %304 %300 Lod %474 -%491 = OpLoad %8 %445 -%492 = OpFAdd %8 %491 %490 +%488 = OpSampledImage %466 %443 %440 +%489 = OpImageSampleDrefExplicitLod %8 %488 %303 %299 Lod %473 +%490 = OpLoad %8 %444 +%491 = OpFAdd %8 %490 %489 OpLine %3 165 5 -OpStore %445 %492 +OpStore %444 %491 OpLine %3 1 1 -%493 = OpLoad %8 %445 -OpStore %438 %493 +%492 = OpLoad %8 %444 +OpStore %437 %492 OpReturn OpFunctionEnd -%496 = OpFunction %2 None %101 -%494 = OpLabel -%497 = OpLoad %17 %50 -%498 = OpLoad %4 %52 -%499 = OpLoad %18 %53 -%500 = OpLoad %25 %65 -%501 = OpLoad %25 %67 -%502 = OpLoad %26 %69 -OpBranch %503 -%503 = OpLabel +%495 = OpFunction %2 None %101 +%493 = OpLabel +%496 = OpLoad %17 %50 +%497 = OpLoad %4 %52 +%498 = OpLoad %18 %53 +%499 = OpLoad %25 %65 +%500 = OpLoad %25 %67 +%501 = OpLoad %26 %69 +OpBranch %502 +%502 = OpLabel OpLine %3 171 14 OpLine %3 173 15 -%504 = OpSampledImage %317 %497 %500 -%505 = OpImageGather %24 %504 %302 %506 +%503 = OpSampledImage %316 %496 %499 +%504 = OpImageGather %24 %503 %301 %505 OpLine %3 174 22 -%507 = OpSampledImage %317 %497 %500 -%508 = OpImageGather %24 %507 %302 %509 ConstOffset %31 +%506 = OpSampledImage %316 %496 %499 +%507 = OpImageGather %24 %506 %301 %508 ConstOffset %31 OpLine %3 175 21 -%510 = OpSampledImage %449 %502 %501 -%511 = OpImageDrefGather %24 %510 %302 %300 +%509 = OpSampledImage %448 %501 %500 +%510 = OpImageDrefGather %24 %509 %301 %299 OpLine %3 176 28 -%512 = OpSampledImage %449 %502 %501 -%513 = OpImageDrefGather %24 %512 %302 %300 ConstOffset %31 +%511 = OpSampledImage %448 %501 %500 +%512 = OpImageDrefGather %24 %511 %301 %299 ConstOffset %31 OpLine %3 178 13 -%515 = OpSampledImage %514 %498 %500 -%516 = OpImageGather %120 %515 %302 %220 +%514 = OpSampledImage %513 %497 %499 +%515 = OpImageGather %120 %514 %301 %219 OpLine %3 179 13 -%519 = OpSampledImage %518 %499 %500 -%520 = OpImageGather %517 %519 %302 %220 +%518 = OpSampledImage %517 %498 %499 +%519 = OpImageGather %516 %518 %301 %219 OpLine %3 180 13 -%521 = OpConvertUToF %24 %516 -%522 = OpConvertSToF %24 %520 -%523 = OpFAdd %24 %521 %522 +%520 = OpConvertUToF %24 %515 +%521 = OpConvertSToF %24 %519 +%522 = OpFAdd %24 %520 %521 OpLine %3 182 12 -%524 = OpFAdd %24 %505 %508 -%525 = OpFAdd %24 %524 %511 -%526 = OpFAdd %24 %525 %513 -%527 = OpFAdd %24 %526 %523 -OpStore %495 %527 +%523 = OpFAdd %24 %504 %507 +%524 = OpFAdd %24 %523 %510 +%525 = OpFAdd %24 %524 %512 +%526 = OpFAdd %24 %525 %522 +OpStore %494 %526 OpReturn OpFunctionEnd -%530 = OpFunction %2 None %101 -%528 = OpLabel -%531 = OpLoad %25 %65 -%532 = OpLoad %26 %69 -OpBranch %533 -%533 = OpLabel +%529 = OpFunction %2 None %101 +%527 = OpLabel +%530 = OpLoad %25 %65 +%531 = OpLoad %26 %69 +OpBranch %532 +%532 = OpLabel OpLine %3 187 14 OpLine %3 189 15 -%534 = OpSampledImage %449 %532 %531 -%535 = OpImageSampleImplicitLod %24 %534 %302 -%536 = OpCompositeExtract %8 %535 0 +%533 = OpSampledImage %448 %531 %530 +%534 = OpImageSampleImplicitLod %24 %533 %301 +%535 = OpCompositeExtract %8 %534 0 OpLine %3 190 22 -%537 = OpSampledImage %449 %532 %531 -%538 = OpImageGather %24 %537 %302 %220 +%536 = OpSampledImage %448 %531 %530 +%537 = OpImageGather %24 %536 %301 %219 OpLine %3 191 21 -%539 = OpSampledImage %449 %532 %531 -%541 = OpConvertSToF %8 %30 -%540 = OpImageSampleExplicitLod %24 %539 %302 Lod %541 -%542 = OpCompositeExtract %8 %540 0 +%538 = OpSampledImage %448 %531 %530 +%540 = OpConvertSToF %8 %30 +%539 = OpImageSampleExplicitLod %24 %538 %301 Lod %540 +%541 = OpCompositeExtract %8 %539 0 OpLine %3 189 15 -%543 = OpCompositeConstruct %24 %536 %536 %536 %536 -%544 = OpFAdd %24 %543 %538 -%545 = OpCompositeConstruct %24 %542 %542 %542 %542 -%546 = OpFAdd %24 %544 %545 -OpStore %529 %546 +%542 = OpCompositeConstruct %24 %535 %535 %535 %535 +%543 = OpFAdd %24 %542 %537 +%544 = OpCompositeConstruct %24 %541 %541 %541 %541 +%545 = OpFAdd %24 %543 %544 +OpStore %528 %545 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/extra.wgsl b/naga/tests/out/wgsl/extra.wgsl index 25f894e7f5..574c35ab0b 100644 --- a/naga/tests/out/wgsl/extra.wgsl +++ b/naga/tests/out/wgsl/extra.wgsl @@ -1,6 +1,6 @@ struct PushConstants { index: u32, - double: vec2, + double: vec2, } struct FragmentIn { diff --git a/naga/tests/out/wgsl/f16-glsl.comp.wgsl b/naga/tests/out/wgsl/f16-glsl.comp.wgsl new file mode 100644 index 0000000000..e12dcb1408 --- /dev/null +++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl @@ -0,0 +1,47 @@ +enable f16; + +struct A { + a_1_: f16, + a_vec2_: vec2, + a_vec3_: vec3, + a_vec4_: vec4, +} + +struct B { + b_1_: f16, + b_vec2_: vec2, + b_vec3_: vec3, + b_vec4_: vec4, + b_mat2_: mat2x2, + b_mat2x3_: mat2x3, + b_mat2x4_: mat2x4, + b_mat3x2_: mat3x2, + b_mat3_: mat3x3, + b_mat3x4_: mat3x4, + b_mat4x2_: mat4x2, + b_mat4x3_: mat4x3, + b_mat4_: mat4x4, +} + +@group(0) @binding(0) +var global: A; +@group(0) @binding(1) +var global_1: B; + +fn main_1() { + let _e16 = global.a_1_; + global_1.b_1_ = _e16; + let _e17 = global.a_vec2_; + global_1.b_vec2_ = _e17; + let _e18 = global.a_vec3_; + global_1.b_vec3_ = _e18; + let _e19 = global.a_vec4_; + global_1.b_vec4_ = _e19; + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + main_1(); + return; +} diff --git a/naga/tests/out/wgsl/f16-spv.wgsl b/naga/tests/out/wgsl/f16-spv.wgsl new file mode 100644 index 0000000000..4c29ace8c3 --- /dev/null +++ b/naga/tests/out/wgsl/f16-spv.wgsl @@ -0,0 +1,46 @@ +enable f16; + +struct B { + b_1_: f16, + b_vec2_: vec2, + b_vec3_: vec3, + b_vec4_: vec4, + b_mat2_: mat2x2, + b_mat2x3_: mat2x3, + b_mat2x4_: mat2x4, + b_mat3x2_: mat3x2, + b_mat3_: mat3x3, + b_mat3x4_: mat3x4, + b_mat4x2_: mat4x2, + b_mat4x3_: mat4x3, + b_mat4_: mat4x4, +} + +struct A { + a_1_: f16, + a_vec2_: vec2, + a_vec3_: vec3, + a_vec4_: vec4, +} + +@group(0) @binding(1) +var unnamed: B; +@group(0) @binding(0) +var unnamed_1: A; + +fn main_1() { + let _e3 = unnamed_1.a_1_; + unnamed.b_1_ = _e3; + let _e6 = unnamed_1.a_vec2_; + unnamed.b_vec2_ = _e6; + let _e9 = unnamed_1.a_vec3_; + unnamed.b_vec3_ = _e9; + let _e12 = unnamed_1.a_vec4_; + unnamed.b_vec4_ = _e12; + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + main_1(); +} diff --git a/naga/tests/out/wgsl/f16.wgsl b/naga/tests/out/wgsl/f16.wgsl new file mode 100644 index 0000000000..a45edce3b5 --- /dev/null +++ b/naga/tests/out/wgsl/f16.wgsl @@ -0,0 +1,167 @@ +enable f16; + +struct UniformCompatible { + val_u32_: u32, + val_i32_: i32, + val_f32_: f32, + val_f16_: f16, + val_f16_2_: vec2, + val_f16_3_: vec3, + val_f16_4_: vec4, + final_value: f16, + val_mat2x2_: mat2x2, + val_mat2x3_: mat2x3, + val_mat2x4_: mat2x4, + val_mat3x2_: mat3x2, + val_mat3x3_: mat3x3, + val_mat3x4_: mat3x4, + val_mat4x2_: mat4x2, + val_mat4x3_: mat4x3, + val_mat4x4_: mat4x4, +} + +struct StorageCompatible { + val_f16_array_2_: array, +} + +struct LayoutTest { + scalar1_: f16, + scalar2_: f16, + v3_: vec3, + tuck_in: f16, + scalar4_: f16, + larger: u32, +} + +const constant_variable: f16 = 15.203125h; + +var private_variable: f16 = 1h; +@group(0) @binding(0) +var input_uniform: UniformCompatible; +@group(0) @binding(1) +var input_storage: UniformCompatible; +@group(0) @binding(2) +var input_arrays: StorageCompatible; +@group(0) @binding(3) +var output: UniformCompatible; +@group(0) @binding(4) +var output_arrays: StorageCompatible; + +fn f16_function(x: f16) -> f16 { + var val: f16 = 15.203125h; + + let _e4 = val; + val = (_e4 + -33344h); + let _e6 = val; + let _e9 = val; + val = (_e9 + (_e6 + 5h)); + let _e13 = input_uniform.val_f32_; + let _e14 = val; + let _e18 = val; + val = (_e18 + f16((_e13 + f32(_e14)))); + let _e22 = input_uniform.val_f16_; + let _e25 = val; + val = (_e25 + vec3(_e22).z); + let _e31 = input_uniform.val_f16_; + let _e34 = input_storage.val_f16_; + output.val_f16_ = (_e31 + _e34); + let _e40 = input_uniform.val_f16_2_; + let _e43 = input_storage.val_f16_2_; + output.val_f16_2_ = (_e40 + _e43); + let _e49 = input_uniform.val_f16_3_; + let _e52 = input_storage.val_f16_3_; + output.val_f16_3_ = (_e49 + _e52); + let _e58 = input_uniform.val_f16_4_; + let _e61 = input_storage.val_f16_4_; + output.val_f16_4_ = (_e58 + _e61); + let _e67 = input_uniform.val_mat2x2_; + let _e70 = input_storage.val_mat2x2_; + output.val_mat2x2_ = (_e67 + _e70); + let _e76 = input_uniform.val_mat2x3_; + let _e79 = input_storage.val_mat2x3_; + output.val_mat2x3_ = (_e76 + _e79); + let _e85 = input_uniform.val_mat2x4_; + let _e88 = input_storage.val_mat2x4_; + output.val_mat2x4_ = (_e85 + _e88); + let _e94 = input_uniform.val_mat3x2_; + let _e97 = input_storage.val_mat3x2_; + output.val_mat3x2_ = (_e94 + _e97); + let _e103 = input_uniform.val_mat3x3_; + let _e106 = input_storage.val_mat3x3_; + output.val_mat3x3_ = (_e103 + _e106); + let _e112 = input_uniform.val_mat3x4_; + let _e115 = input_storage.val_mat3x4_; + output.val_mat3x4_ = (_e112 + _e115); + let _e121 = input_uniform.val_mat4x2_; + let _e124 = input_storage.val_mat4x2_; + output.val_mat4x2_ = (_e121 + _e124); + let _e130 = input_uniform.val_mat4x3_; + let _e133 = input_storage.val_mat4x3_; + output.val_mat4x3_ = (_e130 + _e133); + let _e139 = input_uniform.val_mat4x4_; + let _e142 = input_storage.val_mat4x4_; + output.val_mat4x4_ = (_e139 + _e142); + let _e148 = input_arrays.val_f16_array_2_; + output_arrays.val_f16_array_2_ = _e148; + let _e149 = val; + let _e151 = val; + val = (_e151 + abs(_e149)); + let _e153 = val; + let _e154 = val; + let _e155 = val; + let _e157 = val; + val = (_e157 + clamp(_e153, _e154, _e155)); + let _e159 = val; + let _e161 = val; + let _e164 = val; + val = (_e164 + dot(vec2(_e159), vec2(_e161))); + let _e166 = val; + let _e167 = val; + let _e169 = val; + val = (_e169 + max(_e166, _e167)); + let _e171 = val; + let _e172 = val; + let _e174 = val; + val = (_e174 + min(_e171, _e172)); + let _e176 = val; + let _e178 = val; + val = (_e178 + sign(_e176)); + let _e181 = val; + val = (_e181 + 1h); + let _e185 = input_uniform.val_f16_2_; + let float_vec2_ = vec2(_e185); + output.val_f16_2_ = vec2(float_vec2_); + let _e192 = input_uniform.val_f16_3_; + let float_vec3_ = vec3(_e192); + output.val_f16_3_ = vec3(float_vec3_); + let _e199 = input_uniform.val_f16_4_; + let float_vec4_ = vec4(_e199); + output.val_f16_4_ = vec4(float_vec4_); + let _e208 = input_uniform.val_mat2x2_; + output.val_mat2x2_ = mat2x2(mat2x2(_e208)); + let _e215 = input_uniform.val_mat2x3_; + output.val_mat2x3_ = mat2x3(mat2x3(_e215)); + let _e222 = input_uniform.val_mat2x4_; + output.val_mat2x4_ = mat2x4(mat2x4(_e222)); + let _e229 = input_uniform.val_mat3x2_; + output.val_mat3x2_ = mat3x2(mat3x2(_e229)); + let _e236 = input_uniform.val_mat3x3_; + output.val_mat3x3_ = mat3x3(mat3x3(_e236)); + let _e243 = input_uniform.val_mat3x4_; + output.val_mat3x4_ = mat3x4(mat3x4(_e243)); + let _e250 = input_uniform.val_mat4x2_; + output.val_mat4x2_ = mat4x2(mat4x2(_e250)); + let _e257 = input_uniform.val_mat4x3_; + output.val_mat4x3_ = mat4x3(mat4x3(_e257)); + let _e264 = input_uniform.val_mat4x4_; + output.val_mat4x4_ = mat4x4(mat4x4(_e264)); + let _e267 = val; + return _e267; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + let _e3 = f16_function(2h); + output.final_value = _e3; + return; +} diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs index f221c7896e..2d46e37f72 100644 --- a/naga/tests/spirv_capabilities.rs +++ b/naga/tests/spirv_capabilities.rs @@ -208,3 +208,8 @@ fn int64() { "#, ); } + +#[test] +fn float16() { + require(&[Ca::Float16], "enable f16; fn f(x: f16) { }"); +} diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index 2dcd0588ca..34e1e18ece 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -3,9 +3,11 @@ Tests for the WGSL front end. */ #![cfg(feature = "wgsl-in")] +#[track_caller] fn check(input: &str, snapshot: &str) { let output = naga::front::wgsl::parse_str(input) - .expect_err("expected parser error") + .map(|_| panic!("expected parser error, but parsing succeeded!")) + .unwrap_err() .emit_to_string(input); if output != snapshot { for diff in diff::lines(snapshot, &output) { @@ -19,6 +21,19 @@ fn check(input: &str, snapshot: &str) { } } +#[track_caller] +fn check_success(input: &str) { + match naga::front::wgsl::parse_str(input) { + Ok(_) => {} + Err(err) => { + panic!( + "expected success, but parsing failed with:\n{}", + err.emit_to_string(input) + ); + } + } +} + #[test] fn very_negative_integers() { // wgpu#4492 @@ -824,6 +839,50 @@ fn matrix_constructor_inferred() { ); } +#[test] +fn float16_requires_enable() { + check( + r#" + const a: f16 = 1.0; + "#, + r#"error: the `f16` language extension is not enabled + ┌─ wgsl:2:22 + │ +2 │ const a: f16 = 1.0; + │ ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable f16;` at the top of the shader. + +"#, + ); + + check( + r#" + const a = 1.0h; + "#, + r#"error: the `f16` language extension is not enabled + ┌─ wgsl:2:23 + │ +2 │ const a = 1.0h; + │ ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable f16;` at the top of the shader. + +"#, + ); +} + +#[test] +fn multiple_enables_valid() { + check_success( + r#" + enable f16; + enable f16; + const a: f16 = 1.0h; + "#, + ); +} + /// Check the result of validating a WGSL program against a pattern. /// /// Unless you are generating code programmatically, the @@ -900,6 +959,7 @@ macro_rules! check_validation { } } +#[track_caller] fn validation_error( source: &str, caps: naga::valid::Capabilities, @@ -928,6 +988,53 @@ fn int64_capability() { } } +#[test] +fn float16_capability() { + check_validation! { + "enable f16; var input: f16;", + "enable f16; var input: vec2;": + Err(naga::valid::ValidationError::Type { + source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}), + .. + }) + } +} + +#[test] +fn float16_in_push_constant() { + check_validation! { + "enable f16; var input: f16;", + "enable f16; var input: vec2;", + "enable f16; var input: mat4x4;", + "enable f16; struct S { a: f16 }; var input: S;", + "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var input: S2;": + Err(naga::valid::ValidationError::GlobalVariable { + source: naga::valid::GlobalVariableError::InvalidPushConstantType( + naga::valid::PushConstantError::InvalidScalar( + naga::Scalar::F16 + ) + ), + .. + }), + naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT + } +} + +#[test] +fn float16_in_atomic() { + check_validation! { + "enable f16; var a: atomic;": + Err(naga::valid::ValidationError::Type { + source: naga::valid::TypeError::InvalidAtomicWidth( + naga::ScalarKind::Float, + 2 + ), + .. + }), + naga::valid::Capabilities::SHADER_FLOAT16 + } +} + #[test] fn invalid_arrays() { check_validation! { diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 5ee691ba05..bc2659da8a 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -43,6 +43,7 @@ cfg-if.workspace = true ctor.workspace = true futures-lite.workspace = true glam.workspace = true +half = { workspace = true, features = ["bytemuck"] } itertools.workspace = true image.workspace = true libtest-mimic.workspace = true diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index 9a3bae0d40..15d4c5e5cf 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -143,8 +143,8 @@ impl ShaderTest { body, input_type: String::from("CustomStruct"), output_type: String::from("array"), - input_values: bytemuck::cast_slice(input_values).to_vec(), - output_values: vec![bytemuck::cast_slice(output_values).to_vec()], + input_values: bytemuck::pod_collect_to_vec(input_values), + output_values: vec![bytemuck::pod_collect_to_vec(output_values)], output_comparison_fn: Self::default_comparison_function::, output_initialization: u32::MAX, failures: Backends::empty(), diff --git a/tests/tests/shader/struct_layout.rs b/tests/tests/shader/struct_layout.rs index 38a040fcad..25bd32bf77 100644 --- a/tests/tests/shader/struct_layout.rs +++ b/tests/tests/shader/struct_layout.rs @@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits}; use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE}; use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters}; +#[gpu_test] +static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371 + .expect_fail( + FailureCase::backend(wgpu::Backends::VULKAN) + .validation_error("a matrix with stride 8 not satisfying alignment to 16"), + ) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Uniform, + create_struct_layout_tests(InputStorageType::Uniform), + ) + }); + +#[gpu_test] +static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_struct_layout_tests(InputStorageType::Storage), + ) + }); + +#[gpu_test] +static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::PUSH_CONSTANTS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits { + max_push_constant_size: MAX_BUFFER_SIZE as u32, + ..Limits::downlevel_defaults() + }), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::PushConstant, + create_struct_layout_tests(InputStorageType::PushConstant), + ) + }); + fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec { let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect(); @@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec tests } +#[gpu_test] +static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_64bit_struct_layout_tests(), + ) + }); + +#[gpu_test] +static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_64bit_struct_layout_tests(), + ) + }); + +#[gpu_test] +static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits { + max_push_constant_size: MAX_BUFFER_SIZE as u32, + ..Limits::downlevel_defaults() + }), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::PushConstant, + create_64bit_struct_layout_tests(), + ) + }); + fn create_64bit_struct_layout_tests() -> Vec { let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect(); @@ -356,29 +461,26 @@ fn create_64bit_struct_layout_tests() -> Vec { } #[gpu_test] -static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() +static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() + .features(Features::SHADER_F16) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371 - .expect_fail( - FailureCase::backend(wgpu::Backends::VULKAN) - .validation_error("a matrix with stride 8 not satisfying alignment to 16"), - ) .limits(Limits::downlevel_defaults()), ) .run_async(|ctx| { shader_input_output_test( ctx, - InputStorageType::Uniform, - create_struct_layout_tests(InputStorageType::Uniform), + InputStorageType::Storage, + create_16bit_struct_layout_test(), ) }); #[gpu_test] -static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() +static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() + .features(Features::SHADER_F16) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) .limits(Limits::downlevel_defaults()), ) @@ -386,76 +488,205 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() shader_input_output_test( ctx, InputStorageType::Storage, - create_struct_layout_tests(InputStorageType::Storage), + create_16bit_struct_layout_test(), ) }); -#[gpu_test] -static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::PUSH_CONSTANTS) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits { - max_push_constant_size: MAX_BUFFER_SIZE as u32, - ..Limits::downlevel_defaults() - }), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::PushConstant, - create_struct_layout_tests(InputStorageType::PushConstant), - ) - }); +fn create_16bit_struct_layout_test() -> Vec { + let mut tests = Vec::new(); -#[gpu_test] -static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits::downlevel_defaults()), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::Storage, - create_64bit_struct_layout_tests(), - ) - }); + fn f16asu16(f32: f32) -> u16 { + half::f16::from_f32(f32).to_bits() + } -#[gpu_test] -static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits::downlevel_defaults()), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::Storage, - create_64bit_struct_layout_tests(), - ) - }); + // 16 bit alignment tests + { + let members = + "scalar1: f16, scalar2: f16, v3: vec3, tuck_in: f16, scalar4: f16, larger: u32"; + let direct = String::from( + "\ + output[0] = u32(input.scalar1); + output[1] = u32(input.scalar2); + output[2] = u32(input.v3.x); + output[3] = u32(input.v3.y); + output[4] = u32(input.v3.z); + output[5] = u32(input.tuck_in); + output[6] = u32(input.scalar4); + output[7] = u32(extractBits(input.larger, 0u, 16u)); + output[8] = u32(extractBits(input.larger, 16u, 16u)); + ", + ); -#[gpu_test] -static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits { - max_push_constant_size: MAX_BUFFER_SIZE as u32, - ..Limits::downlevel_defaults() - }), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::PushConstant, - create_64bit_struct_layout_tests(), - ) - }); + tests.push(ShaderTest::new( + "f16 alignment".into(), + members.into(), + direct, + &[ + f16asu16(0.0), + f16asu16(1.0), + f16asu16(2.0), + f16asu16(3.0), + f16asu16(4.0), + f16asu16(5.0), + f16asu16(6.0), + f16asu16(7.0), + f16asu16(8.0), + f16asu16(9.0), + 10_u16, + 11_u16, + // Some extra values to help debug if the test fails. + 12_u16, + 13_u16, + 14_u16, + 15_u16, + 16_u16, + 17_u16, + 18_u16, + 19_u16, + 20_u16, + ], + &[ + 0, // scalar1 + 1, // scalar2 + 4, 5, 6, // v3 + 7, // tuck_in + 8, // scalar4 + 10, // larger[0..16] + 11, // larger[16..32] + ], + )); + } + + // Matrix tests + { + let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h"; + let direct = String::from( + "\ + output[0] = u32(input.m2[0].x); + output[1] = u32(input.m2[0].y); + output[2] = u32(input.m2[1].x); + output[3] = u32(input.m2[1].y); + + output[4] = u32(input.m3[0].x); + output[5] = u32(input.m3[0].y); + output[6] = u32(input.m3[0].z); + output[7] = u32(input.m3[1].x); + output[8] = u32(input.m3[1].y); + output[9] = u32(input.m3[1].z); + output[10] = u32(input.m3[2].x); + output[11] = u32(input.m3[2].y); + output[12] = u32(input.m3[2].z); + + output[13] = u32(input.m4[0].x); + output[14] = u32(input.m4[0].y); + output[15] = u32(input.m4[0].z); + output[16] = u32(input.m4[0].w); + output[17] = u32(input.m4[1].x); + output[18] = u32(input.m4[1].y); + output[19] = u32(input.m4[1].z); + output[20] = u32(input.m4[1].w); + output[21] = u32(input.m4[2].x); + output[22] = u32(input.m4[2].y); + output[23] = u32(input.m4[2].z); + output[24] = u32(input.m4[2].w); + output[25] = u32(input.m4[3].x); + output[26] = u32(input.m4[3].y); + output[27] = u32(input.m4[3].z); + output[28] = u32(input.m4[3].w); + ", + ); + + tests.push(ShaderTest::new( + "f16 matrix alignment".into(), + members.into(), + direct, + &(0..32).map(|x| f16asu16(x as f32)).collect::>(), + &[ + 0, 1, // m2[0] + 2, 3, // m2[1] + // + 4, 5, 6, // m3[0] + 8, 9, 10, // m3[1] + 12, 13, 14, // m3[2] + // + 16, 17, 18, 19, // m4[0] + 20, 21, 22, 23, // m4[1] + 24, 25, 26, 27, // m4[2] + 28, 29, 30, 31, // m4[3] + ], + )); + } + + // // Nested struct and array test. + // // + // // This tries to exploit all the weird edge cases of the struct layout algorithm. + // // We dont go as all-out as the other nested struct test because + // // all our primitives are twice as wide and we have only so much buffer to spare. + // { + // let header = String::from( + // "struct Inner { scalar: u64, scalar32: u32, member: array, 2> }", + // ); + // let members = String::from("inner: Inner"); + // let direct = String::from( + // "\ + + // ", + // ); + + // tests.push( + // ShaderTest::new( + // String::from("nested struct and array"), + // members, + // direct, + // &input_values, + // &[ + // 0, 1, // inner.scalar + // 2, // inner.scalar32 + // 8, 9, 10, 11, 12, 13, // inner.member[0] + // 16, 17, 18, 19, 20, 21, // inner.member[1] + // ], + // ) + // .header(header), + // ); + // } + // { + // let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }"); + // let members = String::from("inner: Inner, vector: vec3"); + // let direct = String::from( + // "\ + // output[0] = bitcast(input.inner.scalar32); + // output[1] = u32(bitcast(input.inner.scalar) & 0xFFFFFFFF); + // output[2] = u32((bitcast(input.inner.scalar) >> 32) & 0xFFFFFFFF); + // output[3] = bitcast(input.inner.scalar32_2); + // output[4] = u32(bitcast(input.vector.x) & 0xFFFFFFFF); + // output[5] = u32((bitcast(input.vector.x) >> 32) & 0xFFFFFFFF); + // output[6] = u32(bitcast(input.vector.y) & 0xFFFFFFFF); + // output[7] = u32((bitcast(input.vector.y) >> 32) & 0xFFFFFFFF); + // output[8] = u32(bitcast(input.vector.z) & 0xFFFFFFFF); + // output[9] = u32((bitcast(input.vector.z) >> 32) & 0xFFFFFFFF); + // ", + // ); + + // tests.push( + // ShaderTest::new( + // String::from("nested struct and array"), + // members, + // direct, + // &input_values, + // &[ + // 0, // inner.scalar32 + // 2, 3, // inner.scalar + // 4, // inner.scalar32_2 + // 8, 9, 10, 11, 12, 13, // vector + // ], + // ) + // .header(header), + // ); + // } + + // Insert `enable f16;` header + tests + .into_iter() + .map(|test| test.header("enable f16;".into())) + .collect() +} diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 5bb1aad6f2..78fb02566c 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -383,6 +383,10 @@ pub fn create_validator( features.contains(wgt::Features::PUSH_CONSTANTS), ); caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64)); + caps.set( + Caps::SHADER_FLOAT16, + features.contains(wgt::Features::SHADER_F16), + ); caps.set( Caps::PRIMITIVE_INDEX, features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX), diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index bc5c943d88..34438c6eb9 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -1,5 +1,5 @@ use std::{ - mem::{size_of, size_of_val}, + mem::{self, size_of, size_of_val}, ptr, string::String, sync::Arc, @@ -432,6 +432,24 @@ impl super::Adapter { && features1.Int64ShaderOps.as_bool(), ); + let float16_supported = { + let mut features4: Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax + ptr::from_mut(&mut features4).cast(), + size_of::() as _, + ) + }; + hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool() + }; + + features.set( + wgt::Features::SHADER_F16, + shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported, + ); + features.set( wgt::Features::TEXTURE_INT64_ATOMIC, shader_model >= naga::back::hlsl::ShaderModel::V6_6 @@ -619,7 +637,7 @@ impl crate::Adapter for super::Adapter { unsafe fn open( &self, - _features: wgt::Features, + features: wgt::Features, limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { @@ -640,6 +658,7 @@ impl crate::Adapter for super::Adapter { let device = super::Device::new( self.device.clone(), queue.clone(), + features, limits, memory_hints, self.private_caps, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index a5fbe283db..9aa8573ad0 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -34,9 +34,11 @@ use crate::{ const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; impl super::Device { + #[allow(clippy::too_many_arguments)] pub(super) fn new( raw: Direct3D12::ID3D12Device, present_queue: Direct3D12::ID3D12CommandQueue, + features: wgt::Features, limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, private_caps: super::PrivateCapabilities, @@ -179,6 +181,7 @@ impl super::Device { event: Event::create(false, false)?, }, private_caps, + features, shared: Arc::new(shared), rtv_pool: Mutex::new(rtv_pool), dsv_pool: Mutex::new(descriptor::CpuPool::new( diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 99e29e87c9..151f109a51 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -629,6 +629,7 @@ pub struct Device { present_queue: Direct3D12::ID3D12CommandQueue, idler: Idler, private_caps: PrivateCapabilities, + features: wgt::Features, shared: Arc, // CPU only pools rtv_pool: Mutex, diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs index f9f3a6b740..e12784605d 100644 --- a/wgpu-hal/src/dx12/shader_compilation.rs +++ b/wgpu-hal/src/dx12/shader_compilation.rs @@ -268,7 +268,7 @@ pub(super) fn compile_dxc( let raw_ep = OPCWSTR::new(raw_ep); let full_stage = OPCWSTR::new(full_stage); - let mut compile_args = arrayvec::ArrayVec::::new_const(); + let mut compile_args = arrayvec::ArrayVec::::new_const(); if let Some(source_name) = source_name.as_ref() { compile_args.push(source_name.ptr()) @@ -298,6 +298,10 @@ pub(super) fn compile_dxc( compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS); } + if device.features.contains(wgt::Features::SHADER_F16) { + compile_args.push(windows::core::w!("-enable-16bit-types")); + } + let buffer = Dxc::DxcBuffer { Ptr: source.as_ptr().cast(), Size: source.len(), diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 23b7cdd94b..08cba45ce0 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -378,6 +378,7 @@ impl PhysicalDeviceFeatures { vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true), vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) + .storage_input_output16(true) .uniform_and_storage_buffer16_bit_access(true), )) } else { @@ -668,7 +669,8 @@ impl PhysicalDeviceFeatures { F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0, + && bit16.uniform_and_storage_buffer16_bit_access != 0 + && bit16.storage_input_output16 != 0, ); } @@ -1846,6 +1848,10 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64); } + if features.contains(wgt::Features::SHADER_F16) { + capabilities.push(spv::Capability::Float16); + } + if features.intersects( wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX