diff --git a/Cargo.lock b/Cargo.lock index 5c781ca2a44..84c28f3c333 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1757,6 +1757,20 @@ dependencies = [ "crunchy", ] +[[package]] +name = "half-2" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864" +dependencies = [ + "arbitrary", + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "serde", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -2137,6 +2151,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.1.3" @@ -2337,12 +2357,14 @@ dependencies = [ "codespan-reporting", "diff", "env_logger", + "half-2", "hashbrown", "hexf-parse", "hlsl-snapshots", "indexmap", "itertools 0.13.0", "log", + "num-traits", "petgraph 0.7.1", "pp-rs", "ron", @@ -2354,6 +2376,7 @@ dependencies = [ "termcolor", "thiserror 2.0.11", "unicode-ident", + "walkdir", ] [[package]] @@ -2516,6 +2539,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -4703,6 +4727,7 @@ dependencies = [ "env_logger", "futures-lite", "glam", + "half-2", "image", "itertools 0.13.0", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 926d3bd53ba..80fa8b32e8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,6 +96,8 @@ flume = "0.11" futures-lite = "2" getrandom = "0.2" glam = "0.29" +# TODO: Use `half` directly again after is released upstream. +half = { package = "half-2", version = "2.4.1" } hashbrown = { version = "0.14.5", default-features = false, features = [ "ahash", "inline-more", @@ -142,6 +144,7 @@ strum = { version = "0.26.3", default-features = false, features = ["derive"] } trybuild = "1" tracy-client = "0.17" thiserror = { version = "2", default-features = false } +walkdir = "2" winit = { version = "0.29", features = ["android-native-activity"] } # Metal dependencies diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 4458405bbfd..a84ab9d2ba7 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -41,10 +41,17 @@ msl-out = [] ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`. msl-out-if-target-apple = [] -serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"] +serialize = [ + "dep:serde", + "bitflags/serde", + "half/serde", + "hashbrown/serde", + "indexmap/serde", +] deserialize = [ "dep:serde", "bitflags/serde", + "half/serde", "hashbrown/serde", "indexmap/serde", ] @@ -84,9 +91,11 @@ termcolor = { version = "1.4.1" } # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e codespan-reporting = { version = "0.11.0" } hashbrown.workspace = true +half = { workspace = true, features = ["arbitrary", "num-traits"] } rustc-hash.workspace = true indexmap.workspace = true log = "0.4" +num-traits = "0.2" strum = { workspace = true, optional = true } spirv = { version = "0.3", optional = true } thiserror.workspace = true @@ -118,3 +127,4 @@ ron = "0.8.0" rspirv = { version = "0.11", git = "https://github.com/gfx-rs/rspirv", rev = "b969f175d5663258b4891e44b76c1544da9661ab" } serde = { workspace = true, features = ["default", "derive"] } spirv = { version = "0.3", features = ["deserialize"] } +walkdir.workspace = true diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index 0798fac82d1..3c93084a2f4 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2669,6 +2669,9 @@ impl<'a, W: Write> Writer<'a, W> { // decimal part even it's zero which is needed for a valid glsl float constant crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?, crate::Literal::F32(value) => write!(self.out, "{value:?}")?, + crate::Literal::F16(_) => { + return Err(Error::Custom("GLSL has no 16-bit float type".into())); + } // Unsigned integers need a `u` at the end // // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index a8283388ceb..eac1cae5749 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -2634,6 +2634,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // decimal part even it's zero crate::Literal::F64(value) => write!(self.out, "{value:?}L")?, crate::Literal::F32(value) => write!(self.out, "{value:?}")?, + crate::Literal::F16(value) => write!(self.out, "{value:?}h")?, crate::Literal::U32(value) => write!(self.out, "{value}u")?, // HLSL has no suffix for explicit i32 literals, but not using any suffix // makes the type ambiguous which prevents overload resolution from diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index bc77a7f6d0c..0fe26270f82 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -5,6 +5,8 @@ use crate::{ proc::{self, index, ExpressionKindTracker, NameKey, TypeResolution}, valid, FastHashMap, FastHashSet, }; +use half::f16; +use num_traits::real::Real; #[cfg(test)] use std::ptr; use std::{ @@ -174,9 +176,11 @@ impl Display for TypeContext<'_> { write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name()) } crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]), - crate::TypeInner::Matrix { columns, rows, .. } => { - put_numeric_type(out, crate::Scalar::F32, &[rows, columns]) - } + crate::TypeInner::Matrix { + columns, + rows, + scalar, + } => put_numeric_type(out, scalar, &[rows, columns]), crate::TypeInner::Pointer { base, space } => { let sub = Self { handle: base, @@ -413,8 +417,12 @@ impl crate::Scalar { match self { Self { kind: Sk::Float, - width: _, + width: 4, } => "float", + Self { + kind: Sk::Float, + width: 2, + } => "half", Self { kind: Sk::Sint, width: 4, @@ -471,7 +479,7 @@ fn should_pack_struct_member( match *ty_inner { crate::TypeInner::Vector { size: crate::VectorSize::Tri, - scalar: scalar @ crate::Scalar { width: 4, .. }, + scalar: scalar @ crate::Scalar { width: 4 | 2, .. }, } if is_tight => Some(scalar), _ => None, } @@ -1446,6 +1454,21 @@ impl Writer { crate::Literal::F64(_) => { return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64)) } + crate::Literal::F16(value) => { + if value.is_infinite() { + let sign = if value.is_sign_negative() { "-" } else { "" }; + write!(self.out, "{sign}INFINITY")?; + } else if value.is_nan() { + write!(self.out, "NAN")?; + } else { + let suffix = if value.fract() == f16::from_f32(0.0) { + ".0h" + } else { + "h" + }; + write!(self.out, "{value}{suffix}")?; + } + } crate::Literal::F32(value) => { if value.is_infinite() { let sign = if value.is_sign_negative() { "-" } else { "" }; diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 6b26c1c2aac..9c85608b706 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -8,6 +8,7 @@ use super::{ WriterFlags, }; use crate::{arena::Handle, proc::index::GuardedIndex, Statement}; +use arrayvec::ArrayVec; use spirv::Word; fn get_dimension(type_inner: &crate::TypeInner) -> Dimension { @@ -1476,159 +1477,7 @@ impl BlockContext<'_> { expr, kind, convert, - } => { - use crate::ScalarKind as Sk; - - let expr_id = self.cached[expr]; - let (src_scalar, src_size, is_matrix) = - match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) { - crate::TypeInner::Scalar(scalar) => (scalar, None, false), - crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false), - crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true), - ref other => { - log::error!("As source {:?}", other); - return Err(Error::Validation("Unexpected Expression::As source")); - } - }; - - enum Cast { - Identity, - Unary(spirv::Op), - Binary(spirv::Op, Word), - Ternary(spirv::Op, Word, Word), - } - - let cast = if is_matrix { - // we only support identity casts for matrices - Cast::Unary(spirv::Op::CopyObject) - } else { - match (src_scalar.kind, kind, convert) { - // Filter out identity casts. Some Adreno drivers are - // confused by no-op OpBitCast instructions. - (src_kind, kind, convert) - if src_kind == kind - && convert.filter(|&width| width != src_scalar.width).is_none() => - { - Cast::Identity - } - (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject), - (_, _, None) => Cast::Unary(spirv::Op::Bitcast), - // casting to a bool - generate `OpXxxNotEqual` - (_, Sk::Bool, Some(_)) => { - let op = match src_scalar.kind { - Sk::Sint | Sk::Uint => spirv::Op::INotEqual, - Sk::Float => spirv::Op::FUnordNotEqual, - Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(), - }; - let zero_scalar_id = - self.writer.get_constant_scalar_with(0, src_scalar)?; - let zero_id = match src_size { - Some(size) => { - let ty = LocalType::Numeric(NumericType::Vector { - size, - scalar: src_scalar, - }) - .into(); - - self.temp_list.clear(); - self.temp_list.resize(size as _, zero_scalar_id); - - self.writer.get_constant_composite(ty, &self.temp_list) - } - None => zero_scalar_id, - }; - - Cast::Binary(op, zero_id) - } - // casting from a bool - generate `OpSelect` - (Sk::Bool, _, Some(dst_width)) => { - let dst_scalar = crate::Scalar { - kind, - width: dst_width, - }; - let zero_scalar_id = - self.writer.get_constant_scalar_with(0, dst_scalar)?; - let one_scalar_id = - self.writer.get_constant_scalar_with(1, dst_scalar)?; - let (accept_id, reject_id) = match src_size { - Some(size) => { - let ty = LocalType::Numeric(NumericType::Vector { - size, - scalar: dst_scalar, - }) - .into(); - - self.temp_list.clear(); - self.temp_list.resize(size as _, zero_scalar_id); - - let vec0_id = - self.writer.get_constant_composite(ty, &self.temp_list); - - self.temp_list.fill(one_scalar_id); - - let vec1_id = - self.writer.get_constant_composite(ty, &self.temp_list); - - (vec1_id, vec0_id) - } - None => (one_scalar_id, zero_scalar_id), - }; - - Cast::Ternary(spirv::Op::Select, accept_id, reject_id) - } - (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU), - (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS), - (Sk::Float, Sk::Float, Some(dst_width)) - if src_scalar.width != dst_width => - { - Cast::Unary(spirv::Op::FConvert) - } - (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF), - (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::SConvert) - } - (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF), - (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::UConvert) - } - (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::SConvert) - } - (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::UConvert) - } - // We assume it's either an identity cast, or int-uint. - _ => Cast::Unary(spirv::Op::Bitcast), - } - }; - - let id = self.gen_id(); - let instruction = match cast { - Cast::Identity => None, - Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)), - Cast::Binary(op, operand) => Some(Instruction::binary( - op, - result_type_id, - id, - expr_id, - operand, - )), - Cast::Ternary(op, op1, op2) => Some(Instruction::ternary( - op, - result_type_id, - id, - expr_id, - op1, - op2, - )), - }; - if let Some(instruction) = instruction { - block.body.push(instruction); - id - } else { - expr_id - } - } + } => self.write_as_expression(expr, convert, kind, block, result_type_id)?, crate::Expression::ImageLoad { image, coordinate, @@ -1784,6 +1633,237 @@ impl BlockContext<'_> { Ok(()) } + /// Helper which focuses on generating the `As` expressions and the various conversions + /// that need to happen because of that. + fn write_as_expression( + &mut self, + expr: Handle, + convert: Option, + kind: crate::ScalarKind, + + block: &mut Block, + result_type_id: u32, + ) -> Result { + use crate::ScalarKind as Sk; + let expr_id = self.cached[expr]; + let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types); + + // Matrix casts needs special treatment in SPIR-V, as the cast functions + // can take vectors or scalars, but not matrices. In order to cast a matrix + // we need to cast each column of the matrix individually and construct a new + // matrix from the converted columns. + if let crate::TypeInner::Matrix { + columns, + rows, + scalar, + } = *ty + { + let Some(convert) = convert else { + // No conversion needs to be done, passes through. + return Ok(expr_id); + }; + + if convert == scalar.width { + // No conversion needs to be done, passes through. + return Ok(expr_id); + } + + if kind != Sk::Float { + // Only float conversions are supported for matrices. + return Err(Error::Validation("Matrices must be floats")); + } + + // Type of each extracted column + let column_src_ty = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: rows, + scalar, + }))); + + // Type of the column after conversion + let column_dst_ty = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: rows, + scalar: crate::Scalar { + kind, + width: convert, + }, + }))); + + let mut components = ArrayVec::::new(); + + for column in 0..columns as usize { + let column_id = self.gen_id(); + block.body.push(Instruction::composite_extract( + column_src_ty, + column_id, + expr_id, + &[column as u32], + )); + + let column_conv_id = self.gen_id(); + block.body.push(Instruction::unary( + spirv::Op::FConvert, + column_dst_ty, + column_conv_id, + column_id, + )); + + components.push(column_conv_id); + } + + let construct_id = self.gen_id(); + + block.body.push(Instruction::composite_construct( + result_type_id, + construct_id, + &components, + )); + + return Ok(construct_id); + } + + let (src_scalar, src_size) = match *ty { + crate::TypeInner::Scalar(scalar) => (scalar, None), + crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)), + ref other => { + log::error!("As source {:?}", other); + return Err(Error::Validation("Unexpected Expression::As source")); + } + }; + + enum Cast { + Identity, + Unary(spirv::Op), + Binary(spirv::Op, Word), + Ternary(spirv::Op, Word, Word), + } + let cast = match (src_scalar.kind, kind, convert) { + // Filter out identity casts. Some Adreno drivers are + // confused by no-op OpBitCast instructions. + (src_kind, kind, convert) + if src_kind == kind + && convert.filter(|&width| width != src_scalar.width).is_none() => + { + Cast::Identity + } + (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject), + (_, _, None) => Cast::Unary(spirv::Op::Bitcast), + // casting to a bool - generate `OpXxxNotEqual` + (_, Sk::Bool, Some(_)) => { + let op = match src_scalar.kind { + Sk::Sint | Sk::Uint => spirv::Op::INotEqual, + Sk::Float => spirv::Op::FUnordNotEqual, + Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(), + }; + let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?; + let zero_id = match src_size { + Some(size) => { + let ty = LocalType::Numeric(NumericType::Vector { + size, + scalar: src_scalar, + }) + .into(); + + self.temp_list.clear(); + self.temp_list.resize(size as _, zero_scalar_id); + + self.writer.get_constant_composite(ty, &self.temp_list) + } + None => zero_scalar_id, + }; + + Cast::Binary(op, zero_id) + } + // casting from a bool - generate `OpSelect` + (Sk::Bool, _, Some(dst_width)) => { + let dst_scalar = crate::Scalar { + kind, + width: dst_width, + }; + let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?; + let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?; + let (accept_id, reject_id) = match src_size { + Some(size) => { + let ty = LocalType::Numeric(NumericType::Vector { + size, + scalar: dst_scalar, + }) + .into(); + + self.temp_list.clear(); + self.temp_list.resize(size as _, zero_scalar_id); + + let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list); + + self.temp_list.fill(one_scalar_id); + + let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list); + + (vec1_id, vec0_id) + } + None => (one_scalar_id, zero_scalar_id), + }; + + Cast::Ternary(spirv::Op::Select, accept_id, reject_id) + } + (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU), + (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS), + (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::FConvert) + } + (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF), + (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF), + (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } + (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } + // We assume it's either an identity cast, or int-uint. + _ => Cast::Unary(spirv::Op::Bitcast), + }; + Ok(match cast { + Cast::Identity => expr_id, + Cast::Unary(op) => { + let id = self.gen_id(); + block + .body + .push(Instruction::unary(op, result_type_id, id, expr_id)); + id + } + Cast::Binary(op, operand) => { + let id = self.gen_id(); + block.body.push(Instruction::binary( + op, + result_type_id, + id, + expr_id, + operand, + )); + id + } + Cast::Ternary(op, op1, op2) => { + let id = self.gen_id(); + block.body.push(Instruction::ternary( + op, + result_type_id, + id, + expr_id, + op1, + op2, + )); + id + } + }) + } + /// Build an `OpAccessChain` instruction. /// /// Emit any needed bounds-checking expressions to `block`. diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs index 57bfb4e9f31..bb4b4f98c1e 100644 --- a/naga/src/back/spv/instructions.rs +++ b/naga/src/back/spv/instructions.rs @@ -406,6 +406,10 @@ impl super::Instruction { instruction } + pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self { + Self::constant(result_type_id, id, &[low]) + } + pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self { Self::constant(result_type_id, id, &[value]) } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index cc0c227bece..56f9985790d 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -1049,6 +1049,15 @@ impl Writer { if bits == 64 { self.capabilities_used.insert(spirv::Capability::Float64); } + if bits == 16 { + self.capabilities_used.insert(spirv::Capability::Float16); + self.capabilities_used + .insert(spirv::Capability::StorageBuffer16BitAccess); + self.capabilities_used + .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess); + self.capabilities_used + .insert(spirv::Capability::StorageInputOutput16); + } Instruction::type_float(id, bits) } Sk::Bool => Instruction::type_bool(id), @@ -1117,6 +1126,19 @@ impl Writer { )?; self.use_extension("SPV_EXT_shader_atomic_float_add"); } + // 16 bit floating-point support requires Float16 capability + crate::TypeInner::Matrix { + scalar: crate::Scalar::F16, + .. + } + | crate::TypeInner::Vector { + scalar: crate::Scalar::F16, + .. + } + | crate::TypeInner::Scalar(crate::Scalar::F16) => { + self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?; + self.use_extension("SPV_KHR_16bit_storage"); + } _ => {} } Ok(()) @@ -1410,6 +1432,10 @@ impl Writer { Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32) } crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()), + crate::Literal::F16(value) => { + let low = value.to_bits(); + Instruction::constant_16bit(type_id, id, low as u32) + } crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value), crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32), crate::Literal::U64(value) => { diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index a7cd8f95c9b..cbcf1079856 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -1,3 +1,5 @@ +use hashbrown::HashSet; + use super::Error; use crate::back::wgsl::polyfill::InversePolyfill; use crate::{ @@ -125,6 +127,9 @@ impl Writer { } } + // Write all `enable` declarations + self.write_enable_declarations(module)?; + // Write all structs for (handle, ty) in module.types.iter() { if let TypeInner::Struct { ref members, .. } = ty.inner { @@ -217,6 +222,41 @@ impl Writer { Ok(()) } + /// Helper method which writes all the `enable` declarations + /// needed for a module. + fn write_enable_declarations(&mut self, module: &Module) -> BackendResult { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + enum WrittenDeclarations { + F16, + } + + let mut written_declarations = HashSet::new(); + + // Write all the `enable` declarations + for (_, ty) in module.types.iter() { + match ty.inner { + TypeInner::Scalar(scalar) + | TypeInner::Vector { scalar, .. } + | TypeInner::Matrix { scalar, .. } => { + if scalar == crate::Scalar::F16 + && !written_declarations.contains(&WrittenDeclarations::F16) + { + writeln!(self.out, "enable f16;")?; + written_declarations.insert(WrittenDeclarations::F16); + } + } + _ => {} + } + } + + if !written_declarations.is_empty() { + // Empty line for readability + writeln!(self.out)?; + } + + Ok(()) + } + /// Helper method used to write struct name /// /// # Notes @@ -1262,6 +1302,7 @@ impl Writer { match expressions[expr] { Expression::Literal(literal) => match literal { + crate::Literal::F16(value) => write!(self.out, "{value}h")?, crate::Literal::F32(value) => write!(self.out, "{value}f")?, crate::Literal::U32(value) => write!(self.out, "{value}u")?, crate::Literal::I32(value) => { @@ -2026,6 +2067,10 @@ const fn scalar_kind_str(scalar: crate::Scalar) -> &'static str { kind: Sk::Float, width: 4, } => "f32", + Scalar { + kind: Sk::Float, + width: 2, + } => "f16", Scalar { kind: Sk::Sint, width: 4, diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs index 92962db00d7..201ad5333d2 100644 --- a/naga/src/front/glsl/error.rs +++ b/naga/src/front/glsl/error.rs @@ -102,9 +102,15 @@ pub enum ErrorKind { /// Unsupported matrix of the form matCx2 /// /// Our IR expects matrices of the form matCx2 to have a stride of 8 however - /// matrices in the std140 layout have a stride of at least 16 - #[error("unsupported matrix of the form matCx2 in std140 block layout")] - UnsupportedMatrixTypeInStd140, + /// matrices in the std140 layout have a stride of at least 16. + #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")] + UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 }, + /// Unsupported matrix of the form f16matCxR + /// + /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count, + /// however matrices in the std140 layout have a stride of at least 16. + #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")] + UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 }, /// A variable with the same name already exists in the current scope. #[error("Variable already declared: {0}")] VariableAlreadyDeclared(String), diff --git a/naga/src/front/glsl/offset.rs b/naga/src/front/glsl/offset.rs index 6e8d5ada107..d3d29ca4285 100644 --- a/naga/src/front/glsl/offset.rs +++ b/naga/src/front/glsl/offset.rs @@ -120,11 +120,25 @@ pub fn calculate_offset( } // See comment on the error kind - if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi { - errors.push(Error { - kind: ErrorKind::UnsupportedMatrixTypeInStd140, - meta, - }); + if StructLayout::Std140 == layout { + // Do the f16 test first, as it's more specific + if scalar == Scalar::F16 { + errors.push(Error { + kind: ErrorKind::UnsupportedF16MatrixInStd140 { + columns: columns as u8, + rows: rows as u8, + }, + meta, + }); + } + if rows == crate::VectorSize::Bi { + errors.push(Error { + kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 { + columns: columns as u8, + }, + meta, + }); + } } (align, align * columns as u32) diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs index ad5e188fd95..d4bd5a69fa4 100644 --- a/naga/src/front/glsl/types.rs +++ b/naga/src/front/glsl/types.rs @@ -10,6 +10,10 @@ pub fn parse_type(type_name: &str) -> Option { name: None, inner: TypeInner::Scalar(Scalar::BOOL), }), + "float16_t" => Some(Type { + name: None, + inner: TypeInner::Scalar(Scalar::F16), + }), "float" => Some(Type { name: None, inner: TypeInner::Scalar(Scalar::F32), @@ -40,6 +44,7 @@ pub fn parse_type(type_name: &str) -> Option { "i" => Scalar::I32, "u" => Scalar::U32, "d" => Scalar::F64, + "f16" => Scalar::F16, _ => return None, }) } diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index b8087fc8b0f..473011dbf2f 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -36,6 +36,7 @@ mod null; use convert::*; pub use error::Error; use function::*; +use half::f16; use crate::{ arena::{Arena, Handle, UniqueArena}, @@ -80,6 +81,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[ "SPV_KHR_vulkan_memory_model", "SPV_KHR_multiview", "SPV_EXT_shader_atomic_float_add", + "SPV_KHR_16bit_storage", ]; pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"]; @@ -5598,6 +5600,9 @@ impl> Frontend { }) => { let low = self.next()?; match width { + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal + // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word. + 2 => crate::Literal::F16(f16::from_bits(low as u16)), 4 => crate::Literal::F32(f32::from_bits(low)), 8 => { inst.expect(5)?; diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index 7bdbf12d2c7..504c0a1f904 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -146,8 +146,6 @@ pub enum NumberError { Invalid, #[error("numeric literal not representable by target type")] NotRepresentable, - #[error("unimplemented f16 type")] - UnimplementedF16, } #[derive(Copy, Clone, Debug, PartialEq)] @@ -1008,19 +1006,18 @@ impl<'a> Error<'a> { )], }, Error::EnableExtensionNotEnabled { kind, span } => ParseError { - message: format!("`{}` enable-extension is not enabled", kind.to_ident()), + message: format!("the `{}` language extension is not enabled", kind.to_ident()), labels: vec![( span, format!( concat!( - "the `{}` enable-extension is needed for this functionality, ", - "but it is not currently enabled" + "the `{0}` language extension is needed for this functionality, ", + "but it is not currently enabled." ), kind.to_ident() ) .into(), )], - #[allow(irrefutable_let_patterns)] notes: if let EnableExtension::Unimplemented(kind) = kind { vec![format!( concat!( @@ -1032,7 +1029,12 @@ impl<'a> Error<'a> { kind.tracking_issue_num() )] } else { - vec![] + vec![ + format!( + "You can enable this extension by adding `enable {};` at the top of the shader.", + kind.to_ident() + ), + ] }, }, Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError { diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index 7cd3ef90f81..14b969a685d 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -1886,6 +1886,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { let expr: Typed = match *expr { ast::Expression::Literal(literal) => { let literal = match literal { + ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f), ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f), ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i), ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u), diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs index 00c19d877da..1e07ff39e6c 100644 --- a/naga/src/front/wgsl/parse/conv.rs +++ b/naga/src/front/wgsl/parse/conv.rs @@ -1,4 +1,7 @@ use super::Error; +use crate::front::wgsl::parse::directive::enable_extension::{ + EnableExtensions, ImplementedEnableExtension, +}; use crate::front::wgsl::Scalar; use crate::Span; @@ -112,10 +115,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result Option { +pub fn get_scalar_type( + enable_extensions: &EnableExtensions, + span: Span, + word: &str, +) -> Result, Error<'static>> { use crate::ScalarKind as Sk; - match word { - // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }), + let scalar = match word { + "f16" => Some(Scalar { + kind: Sk::Float, + width: 2, + }), "f32" => Some(Scalar { kind: Sk::Float, width: 4, @@ -145,7 +155,18 @@ pub fn get_scalar_type(word: &str) -> Option { width: crate::BOOL_WIDTH, }), _ => None, + }; + + if matches!(scalar, Some(Scalar::F16)) + && !enable_extensions.contains(ImplementedEnableExtension::F16) + { + return Err(Error::EnableExtensionNotEnabled { + span, + kind: ImplementedEnableExtension::F16.into(), + }); } + + Ok(scalar) } pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> { diff --git a/naga/src/front/wgsl/parse/directive/enable_extension.rs b/naga/src/front/wgsl/parse/directive/enable_extension.rs index 147ec0b5e04..3efa3b52835 100644 --- a/naga/src/front/wgsl/parse/directive/enable_extension.rs +++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs @@ -5,24 +5,29 @@ use crate::{front::wgsl::error::Error, Span}; /// Tracks the status of every enable-extension known to Naga. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct EnableExtensions {} +pub struct EnableExtensions { + /// Whether `enable f16;` was written earlier in the shader module. + f16: bool, +} impl EnableExtensions { pub(crate) const fn empty() -> Self { - Self {} + Self { f16: false } } /// Add an enable-extension to the set requested by a module. - #[allow(unreachable_code)] pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) { - let _field: &mut bool = match ext {}; - *_field = true; + let field = match ext { + ImplementedEnableExtension::F16 => &mut self.f16, + }; + *field = true; } /// Query whether an enable-extension tracked here has been requested. - #[allow(unused)] pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool { - match ext {} + match ext { + ImplementedEnableExtension::F16 => self.f16, + } } } @@ -37,7 +42,6 @@ impl Default for EnableExtensions { /// WGSL spec.: #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] pub enum EnableExtension { - #[allow(unused)] Implemented(ImplementedEnableExtension), Unimplemented(UnimplementedEnableExtension), } @@ -50,7 +54,7 @@ impl EnableExtension { /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible. pub(crate) fn from_ident(word: &str, span: Span) -> Result> { Ok(match word { - Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16), + Self::F16 => Self::Implemented(ImplementedEnableExtension::F16), Self::CLIP_DISTANCES => { Self::Unimplemented(UnimplementedEnableExtension::ClipDistances) } @@ -64,9 +68,10 @@ impl EnableExtension { /// Maps this [`EnableExtension`] into the sentinel word associated with it in WGSL. pub const fn to_ident(self) -> &'static str { match self { - Self::Implemented(kind) => match kind {}, + Self::Implemented(kind) => match kind { + ImplementedEnableExtension::F16 => Self::F16, + }, Self::Unimplemented(kind) => match kind { - UnimplementedEnableExtension::F16 => Self::F16, UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES, UnimplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING, }, @@ -76,17 +81,24 @@ impl EnableExtension { /// A variant of [`EnableExtension::Implemented`]. #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub enum ImplementedEnableExtension {} - -/// A variant of [`EnableExtension::Unimplemented`]. -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub enum UnimplementedEnableExtension { +pub enum ImplementedEnableExtension { /// Enables `f16`/`half` primitive support in all shader languages. /// /// In the WGSL standard, this corresponds to [`enable f16;`]. /// /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16 F16, +} + +impl From for EnableExtension { + fn from(value: ImplementedEnableExtension) -> Self { + Self::Implemented(value) + } +} + +/// A variant of [`EnableExtension::Unimplemented`]. +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub enum UnimplementedEnableExtension { /// Enables the `clip_distances` variable in WGSL. /// /// In the WGSL standard, this corresponds to [`enable clip_distances;`]. @@ -104,7 +116,6 @@ pub enum UnimplementedEnableExtension { impl UnimplementedEnableExtension { pub(crate) const fn tracking_issue_num(self) -> u16 { match self { - Self::F16 => 4384, Self::ClipDistances => 6236, Self::DualSourceBlending => 6402, } diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs index d55720972e9..8d64f52ac6a 100644 --- a/naga/src/front/wgsl/parse/lexer.rs +++ b/naga/src/front/wgsl/parse/lexer.rs @@ -408,14 +408,17 @@ impl<'a> Lexer<'a> { /// Parses a generic scalar type, for example ``. pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result> { self.expect_generic_paren('<')?; - let pair = match self.next() { + let (scalar, _span) = match self.next() { (Token::Word(word), span) => { - conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span)) + conv::get_scalar_type(&self.enable_extensions, span, word)? + .map(|scalar| (scalar, span)) + .ok_or(Error::UnknownScalarType(span))? } - (_, span) => Err(Error::UnknownScalarType(span)), - }?; + (_, span) => return Err(Error::UnknownScalarType(span)), + }; + self.expect_generic_paren('>')?; - Ok(pair) + Ok(scalar) } /// Parses a generic scalar type, for example ``. @@ -425,14 +428,18 @@ impl<'a> Lexer<'a> { &mut self, ) -> Result<(Scalar, Span), Error<'a>> { self.expect_generic_paren('<')?; - let pair = match self.next() { - (Token::Word(word), span) => conv::get_scalar_type(word) - .map(|scalar| (scalar, span)) - .ok_or(Error::UnknownScalarType(span)), - (_, span) => Err(Error::UnknownScalarType(span)), - }?; + + let (scalar, span) = match self.next() { + (Token::Word(word), span) => { + conv::get_scalar_type(&self.enable_extensions, span, word)? + .map(|scalar| (scalar, span)) + .ok_or(Error::UnknownScalarType(span))? + } + (_, span) => return Err(Error::UnknownScalarType(span)), + }; + self.expect_generic_paren('>')?; - Ok(pair) + Ok((scalar, span)) } pub(in crate::front::wgsl) fn next_storage_access( @@ -493,6 +500,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) { #[test] fn test_numbers() { + use half::f16; // WGSL spec examples // // decimal integer @@ -517,14 +525,16 @@ fn test_numbers() { Token::Number(Ok(Number::AbstractFloat(0.01))), Token::Number(Ok(Number::AbstractFloat(12.34))), Token::Number(Ok(Number::F32(0.))), - Token::Number(Err(NumberError::UnimplementedF16)), + Token::Number(Ok(Number::F16(f16::from_f32(0.)))), Token::Number(Ok(Number::AbstractFloat(0.001))), Token::Number(Ok(Number::AbstractFloat(43.75))), Token::Number(Ok(Number::F32(16.))), Token::Number(Ok(Number::AbstractFloat(0.1875))), - Token::Number(Err(NumberError::UnimplementedF16)), + // https://github.com/gfx-rs/wgpu/issues/7046 + Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75 Token::Number(Ok(Number::AbstractFloat(0.12109375))), - Token::Number(Err(NumberError::UnimplementedF16)), + // https://github.com/gfx-rs/wgpu/issues/7046 + Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5 ], ); diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index 23b2984e75a..f4115014b8b 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -3,9 +3,7 @@ use crate::diagnostic_filter::{ ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule, }; use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken}; -use crate::front::wgsl::parse::directive::enable_extension::{ - EnableExtension, EnableExtensions, UnimplementedEnableExtension, -}; +use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions}; use crate::front::wgsl::parse::directive::language_extension::LanguageExtension; use crate::front::wgsl::parse::directive::DirectiveKind; use crate::front::wgsl::parse::lexer::{Lexer, Token}; @@ -346,7 +344,7 @@ impl Parser { span: Span, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result>, Error<'a>> { - if let Some(scalar) = conv::get_scalar_type(word) { + if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? { return Ok(Some(ast::ConstructorType::Scalar(scalar))); } @@ -375,6 +373,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec2h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "vec3" => ast::ConstructorType::PartialVector { size: crate::VectorSize::Tri, }, @@ -399,6 +404,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec3h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "vec4" => ast::ConstructorType::PartialVector { size: crate::VectorSize::Quad, }, @@ -423,6 +435,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec4h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Bi, @@ -435,6 +454,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Tri, @@ -447,6 +474,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Quad, @@ -459,6 +494,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Bi, @@ -471,6 +514,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Tri, @@ -483,6 +534,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Quad, @@ -495,6 +554,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Bi, @@ -507,6 +574,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Tri, @@ -519,6 +594,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Quad, @@ -531,6 +614,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "array" => ast::ConstructorType::PartialArray, "atomic" | "binding_array" @@ -725,15 +816,17 @@ impl Parser { } (Token::Number(res), span) => { let _ = lexer.next(); - let num = res.map_err(|err| match err { - super::error::NumberError::UnimplementedF16 => { - Error::EnableExtensionNotEnabled { - kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16), + let num = res.map_err(|err| Error::BadNumber(span, err))?; + + if let Some(enable_extension) = num.required_enable_extension() { + if !lexer.enable_extensions.contains(enable_extension) { + return Err(Error::EnableExtensionNotEnabled { + kind: enable_extension.into(), span, - } + }); } - err => Error::BadNumber(span, err), - })?; + } + ast::Expression::Literal(ast::Literal::Number(num)) } (Token::Word("RAY_FLAG_NONE"), _) => { @@ -1268,9 +1361,10 @@ impl Parser { &mut self, lexer: &mut Lexer<'a>, word: &'a str, + span: Span, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result>, Error<'a>> { - if let Some(scalar) = conv::get_scalar_type(word) { + if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? { return Ok(Some(ast::Type::Scalar(scalar))); } @@ -1298,6 +1392,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec2h" => ast::Type::Vector { + size: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "vec3" => { let (ty, ty_span) = self.singular_generic(lexer, ctx)?; ast::Type::Vector { @@ -1321,6 +1420,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec3h" => ast::Type::Vector { + size: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "vec4" => { let (ty, ty_span) = self.singular_generic(lexer, ctx)?; ast::Type::Vector { @@ -1344,6 +1448,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec4h" => ast::Type::Vector { + size: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)? } @@ -1353,6 +1462,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)? } @@ -1362,6 +1477,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)? } @@ -1371,6 +1492,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)? } @@ -1380,6 +1507,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)? } @@ -1389,6 +1522,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)? } @@ -1398,6 +1537,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)? } @@ -1407,6 +1552,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)? } @@ -1416,6 +1567,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)? } @@ -1425,6 +1582,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "atomic" => { let scalar = lexer.next_scalar_generic()?; ast::Type::Atomic(scalar) @@ -1681,7 +1844,7 @@ impl Parser { let (name, span) = lexer.next_ident_with_span()?; - let ty = match this.type_decl_impl(lexer, name, ctx)? { + let ty = match this.type_decl_impl(lexer, name, span, ctx)? { Some(ty) => ty, None => { ctx.unresolved.insert(ast::Dependency { diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs index 72795de6b42..87dfb0a02cb 100644 --- a/naga/src/front/wgsl/parse/number.rs +++ b/naga/src/front/wgsl/parse/number.rs @@ -1,5 +1,7 @@ use crate::front::wgsl::error::NumberError; +use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension; use crate::front::wgsl::parse::lexer::Token; +use half::f16; /// When using this type assume no Abstract Int/Float for now #[derive(Copy, Clone, Debug, PartialEq)] @@ -16,12 +18,23 @@ pub enum Number { I64(i64), /// Concrete u64 U64(u64), + /// Concrete f16 + F16(f16), /// Concrete f32 F32(f32), /// Concrete f64 F64(f64), } +impl Number { + pub(super) const fn required_enable_extension(&self) -> Option { + match *self { + Number::F16(_) => Some(ImplementedEnableExtension::F16), + _ => None, + } + } +} + pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) { let (result, rest) = parse(input); (Token::Number(result), rest) @@ -367,7 +380,8 @@ fn parse_hex_float(input: &str, kind: Option) -> Result Err(NumberError::NotRepresentable), }, - Some(FloatKind::F16) => Err(NumberError::UnimplementedF16), + // TODO: f16 is not supported by hexf_parse + Some(FloatKind::F16) => Err(NumberError::NotRepresentable), Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) { Ok(num) => Ok(Number::F32(num)), // can only be ParseHexfErrorKind::Inexact but we can't check since it's private @@ -403,7 +417,12 @@ fn parse_dec_float(input: &str, kind: Option) -> Result Err(NumberError::UnimplementedF16), + Some(FloatKind::F16) => { + let num = input.parse::().unwrap(); // will never fail + num.is_finite() + .then_some(Number::F16(num)) + .ok_or(NumberError::NotRepresentable) + } } } diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 2e917d34e04..d78bdfb7afb 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -271,6 +271,7 @@ pub use crate::span::{SourceLocation, Span, SpanContext, WithSpan}; #[cfg(feature = "arbitrary")] use arbitrary::Arbitrary; use diagnostic_filter::DiagnosticFilterNode; +use half::f16; #[cfg(feature = "deserialize")] use serde::Deserialize; #[cfg(feature = "serialize")] @@ -895,6 +896,7 @@ pub enum Literal { F64(f64), /// May not be NaN or infinity. F32(f32), + F16(f16), U32(u32), I32(i32), U64(u64), diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs index be7c503a017..ed713bf9210 100644 --- a/naga/src/proc/constant_evaluator.rs +++ b/naga/src/proc/constant_evaluator.rs @@ -1,6 +1,8 @@ use std::iter; use arrayvec::ArrayVec; +use half::f16; +use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero}; use crate::{ arena::{Arena, Handle, HandleVec, UniqueArena}, @@ -199,6 +201,7 @@ gen_component_wise_extractor! { literals: [ AbstractFloat => AbstractFloat: f64, F32 => F32: f32, + F16 => F16: f16, AbstractInt => AbstractInt: i64, U32 => U32: u32, I32 => I32: i32, @@ -219,6 +222,7 @@ gen_component_wise_extractor! { literals: [ AbstractFloat => Abstract: f64, F32 => F32: f32, + F16 => F16: f16, ], scalar_kinds: [ Float, @@ -244,6 +248,7 @@ gen_component_wise_extractor! { AbstractFloat => AbstractFloat: f64, AbstractInt => AbstractInt: i64, F32 => F32: f32, + F16 => F16: f16, I32 => I32: i32, ], scalar_kinds: [ @@ -1103,6 +1108,7 @@ impl<'a> ConstantEvaluator<'a> { component_wise_scalar(self, span, [arg], |args| match args { Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])), Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])), + Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])), Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])), Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])), Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz @@ -1134,9 +1140,13 @@ impl<'a> ConstantEvaluator<'a> { } ) } - crate::MathFunction::Saturate => { - component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) }) - } + crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e { + Float::F16([e]) => Ok(Float::F16( + [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))], + )), + Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])), + Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])), + }), // trigonometry crate::MathFunction::Cos => { @@ -1213,6 +1223,9 @@ impl<'a> ConstantEvaluator<'a> { component_wise_float(self, span, [arg], |e| match e { Float::Abstract([e]) => Ok(Float::Abstract([round_ties_even(e)])), Float::F32([e]) => Ok(Float::F32([(round_ties_even(e as f64) as f32)])), + Float::F16([e]) => { + Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))])) + } }) } crate::MathFunction::Fract => { @@ -1258,15 +1271,27 @@ impl<'a> ConstantEvaluator<'a> { ) } crate::MathFunction::Step => { - component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| { - Ok([if edge <= x { 1.0 } else { 0.0 }]) + component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x { + Float::Abstract([edge, x]) => { + Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }])) + } + Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])), + Float::F16([edge, x]) => Ok(Float::F16([if edge <= x { + f16::one() + } else { + f16::zero() + }])), }) } crate::MathFunction::Sqrt => { component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) }) } crate::MathFunction::InverseSqrt => { - component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) }) + component_wise_float(self, span, [arg], |e| match e { + Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])), + Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])), + Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])), + }) } // bits @@ -1561,6 +1586,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v, Literal::U32(v) => v as i32, Literal::F32(v) => v as i32, + Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf Literal::Bool(v) => v as i32, Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); @@ -1572,6 +1598,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v as u32, Literal::U32(v) => v, Literal::F32(v) => v as u32, + Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf Literal::Bool(v) => v as u32, Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); @@ -1587,6 +1614,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(v) => v as i64, Literal::I64(v) => v, Literal::U64(v) => v as i64, + Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf Literal::AbstractInt(v) => i64::try_from_abstract(v)?, Literal::AbstractFloat(v) => i64::try_from_abstract(v)?, }), @@ -1598,9 +1626,22 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(v) => v as u64, Literal::I64(v) => v as u64, Literal::U64(v) => v, + Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf Literal::AbstractInt(v) => u64::try_from_abstract(v)?, Literal::AbstractFloat(v) => u64::try_from_abstract(v)?, }), + Sc::F16 => Literal::F16(match literal { + Literal::F16(v) => v, + Literal::F32(v) => f16::from_f32(v), + Literal::F64(v) => f16::from_f64(v), + Literal::Bool(v) => f16::from_u32(v as u32).unwrap(), + Literal::I64(v) => f16::from_i64(v).unwrap(), + Literal::U64(v) => f16::from_u64(v).unwrap(), + Literal::I32(v) => f16::from_i32(v).unwrap(), + Literal::U32(v) => f16::from_u32(v).unwrap(), + Literal::AbstractFloat(v) => f16::try_from_abstract(v)?, + Literal::AbstractInt(v) => f16::try_from_abstract(v)?, + }), Sc::F32 => Literal::F32(match literal { Literal::I32(v) => v as f32, Literal::U32(v) => v as f32, @@ -1609,12 +1650,14 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); } + Literal::F16(v) => f16::to_f32(v), Literal::AbstractInt(v) => f32::try_from_abstract(v)?, Literal::AbstractFloat(v) => f32::try_from_abstract(v)?, }), Sc::F64 => Literal::F64(match literal { Literal::I32(v) => v as f64, Literal::U32(v) => v as f64, + Literal::F16(v) => f16::to_f64(v), Literal::F32(v) => v as f64, Literal::F64(v) => v, Literal::Bool(v) => v as u32 as f64, @@ -1626,6 +1669,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v != 0, Literal::U32(v) => v != 0, Literal::F32(v) => v != 0.0, + Literal::F16(v) => v != f16::zero(), Literal::Bool(v) => v, Literal::F64(_) | Literal::I64(_) @@ -1780,6 +1824,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => Literal::I32(v.wrapping_neg()), Literal::I64(v) => Literal::I64(v.wrapping_neg()), Literal::F32(v) => Literal::F32(-v), + Literal::F16(v) => Literal::F16(-v), Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()), Literal::AbstractFloat(v) => Literal::AbstractFloat(-v), _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg), @@ -1928,6 +1973,14 @@ impl<'a> ConstantEvaluator<'a> { _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs), }) } + (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op { + BinaryOperator::Add => a + b, + BinaryOperator::Subtract => a - b, + BinaryOperator::Multiply => a * b, + BinaryOperator::Divide => a / b, + BinaryOperator::Modulo => a % b, + _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs), + }), (Literal::AbstractInt(a), Literal::AbstractInt(b)) => { Literal::AbstractInt(match op { BinaryOperator::Add => a.checked_add(b).ok_or_else(|| { @@ -2532,6 +2585,32 @@ impl TryFromAbstract for u64 { } } +impl TryFromAbstract for f16 { + fn try_from_abstract(value: f64) -> Result { + let f = f16::from_f64(value); + if f.is_infinite() { + return Err(ConstantEvaluatorError::AutomaticConversionLossy { + value: format!("{value:?}"), + to_type: "f16", + }); + } + Ok(f) + } +} + +impl TryFromAbstract for f16 { + fn try_from_abstract(value: i64) -> Result { + let f = f16::from_i64(value); + if f.is_none() { + return Err(ConstantEvaluatorError::AutomaticConversionLossy { + value: format!("{value:?}"), + to_type: "f16", + }); + } + Ok(f.unwrap()) + } +} + #[cfg(test)] mod tests { use std::vec; diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index a6a19f70eda..ef095cff399 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -79,6 +79,7 @@ impl From for super::Scalar { pub enum HashableLiteral { F64(u64), F32(u32), + F16(u16), U32(u32), I32(i32), U64(u64), @@ -93,6 +94,7 @@ impl From for HashableLiteral { match l { crate::Literal::F64(v) => Self::F64(v.to_bits()), crate::Literal::F32(v) => Self::F32(v.to_bits()), + crate::Literal::F16(v) => Self::F16(v.to_bits()), crate::Literal::U32(v) => Self::U32(v), crate::Literal::I32(v) => Self::I32(v), crate::Literal::U64(v) => Self::U64(v), @@ -131,6 +133,7 @@ impl crate::Literal { match *self { Self::F64(_) | Self::I64(_) | Self::U64(_) => 8, Self::F32(_) | Self::U32(_) | Self::I32(_) => 4, + Self::F16(_) => 2, Self::Bool(_) => crate::BOOL_WIDTH, Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH, } @@ -139,6 +142,7 @@ impl crate::Literal { match *self { Self::F64(_) => crate::Scalar::F64, Self::F32(_) => crate::Scalar::F32, + Self::F16(_) => crate::Scalar::F16, Self::U32(_) => crate::Scalar::U32, Self::I32(_) => crate::Scalar::I32, Self::U64(_) => crate::Scalar::U64, diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs index 351f4c53686..9b44f05db54 100644 --- a/naga/src/proc/type_methods.rs +++ b/naga/src/proc/type_methods.rs @@ -20,6 +20,10 @@ impl crate::ScalarKind { } impl crate::Scalar { + pub const F16: Self = Self { + kind: crate::ScalarKind::Float, + width: 2, + }; pub const I32: Self = Self { kind: crate::ScalarKind::Sint, width: 4, diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index b0c54a3df4a..15e01b661ce 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -1767,7 +1767,7 @@ impl super::Validator { } pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> { - self.check_width(literal.scalar())?; + let _ = self.check_width(literal.scalar())?; check_literal_value(literal)?; Ok(()) diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index a6fff61d826..3ae16b232bb 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -1,6 +1,6 @@ use super::{ analyzer::{FunctionInfo, GlobalUse}, - Capabilities, Disalignment, FunctionError, ModuleInfo, + Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError, }; use crate::arena::{Handle, UniqueArena}; @@ -39,6 +39,8 @@ pub enum GlobalVariableError { InitializerNotAllowed(crate::AddressSpace), #[error("Storage address space doesn't support write-only access")] StorageAddressSpaceWriteOnlyNotSupported, + #[error("Type is not valid for use as a push constant")] + InvalidPushConstantType(#[source] PushConstantError), } #[derive(Clone, Debug, thiserror::Error)] @@ -569,6 +571,9 @@ impl super::Validator { Capabilities::PUSH_CONSTANT, )); } + if let Err(ref err) = type_info.push_constant_compatibility { + return Err(GlobalVariableError::InvalidPushConstantType(err.clone())); + } ( TypeFlags::DATA | TypeFlags::COPY diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index 8560404703a..3ffed6a8e42 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -28,7 +28,7 @@ pub use expression::{check_literal_value, LiteralError}; pub use expression::{ConstExpressionError, ExpressionError}; pub use function::{CallError, FunctionError, LocalVariableError}; pub use interface::{EntryPointError, GlobalVariableError, VaryingError}; -pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError}; +pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError}; use self::handles::InvalidHandleError; @@ -158,6 +158,8 @@ bitflags::bitflags! { const TEXTURE_ATOMIC = 1 << 23; /// Support for atomic operations on 64-bit images. const TEXTURE_INT64_ATOMIC = 1 << 24; + /// Support for 16-bit floating-point types. + const SHADER_FLOAT16 = 1 << 25; } } diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 8c6825b842e..837e00e2399 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -167,8 +167,16 @@ pub enum WidthError { Abstract, } +#[derive(Clone, Debug, thiserror::Error)] +#[cfg_attr(test, derive(PartialEq))] +pub enum PushConstantError { + #[error("The scalar type {0:?} is not supported in push constants")] + InvalidScalar(crate::Scalar), +} + // Only makes sense if `flags.contains(HOST_SHAREABLE)` type LayoutCompatibility = Result, Disalignment)>; +type PushConstantCompatibility = Result<(), PushConstantError>; fn check_member_layout( accum: &mut LayoutCompatibility, @@ -219,6 +227,7 @@ pub(super) struct TypeInfo { pub flags: TypeFlags, pub uniform_layout: LayoutCompatibility, pub storage_layout: LayoutCompatibility, + pub push_constant_compatibility: PushConstantCompatibility, } impl TypeInfo { @@ -227,6 +236,7 @@ impl TypeInfo { flags: TypeFlags::empty(), uniform_layout: Ok(Alignment::ONE), storage_layout: Ok(Alignment::ONE), + push_constant_compatibility: Ok(()), } } @@ -235,6 +245,7 @@ impl TypeInfo { flags, uniform_layout: Ok(alignment), storage_layout: Ok(alignment), + push_constant_compatibility: Ok(()), } } } @@ -248,11 +259,15 @@ impl super::Validator { } } - pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> { + pub(super) const fn check_width( + &self, + scalar: crate::Scalar, + ) -> Result { + let mut push_constant_compatibility = Ok(()); let good = match scalar.kind { crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH, - crate::ScalarKind::Float => { - if scalar.width == 8 { + crate::ScalarKind::Float => match scalar.width { + 8 => { if !self.capabilities.contains(Capabilities::FLOAT64) { return Err(WidthError::MissingCapability { name: "f64", @@ -260,10 +275,21 @@ impl super::Validator { }); } true - } else { - scalar.width == 4 } - } + 2 => { + if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) { + return Err(WidthError::MissingCapability { + name: "f16", + flag: "FLOAT16", + }); + } + + push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar)); + + true + } + _ => scalar.width == 4, + }, crate::ScalarKind::Sint => { if scalar.width == 8 { if !self.capabilities.contains(Capabilities::SHADER_INT64) { @@ -295,7 +321,7 @@ impl super::Validator { } }; if good { - Ok(()) + Ok(push_constant_compatibility) } else { Err(WidthError::Invalid(scalar.kind, scalar.width)) } @@ -315,13 +341,13 @@ impl super::Validator { use crate::TypeInner as Ti; Ok(match gctx.types[handle].inner { Ti::Scalar(scalar) => { - self.check_width(scalar)?; + let push_constant_compatibility = self.check_width(scalar)?; let shareable = if scalar.kind.is_numeric() { TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE } else { TypeFlags::empty() }; - TypeInfo::new( + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -330,16 +356,18 @@ impl super::Validator { | TypeFlags::CREATION_RESOLVED | shareable, Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Vector { size, scalar } => { - self.check_width(scalar)?; + let push_constant_compatibility = self.check_width(scalar)?; let shareable = if scalar.kind.is_numeric() { TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE } else { TypeFlags::empty() }; - TypeInfo::new( + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -348,7 +376,9 @@ impl super::Validator { | TypeFlags::CREATION_RESOLVED | shareable, Alignment::from(size) * Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Matrix { columns: _, @@ -358,8 +388,8 @@ impl super::Validator { if scalar.kind != crate::ScalarKind::Float { return Err(TypeError::MatrixElementNotFloat); } - self.check_width(scalar)?; - TypeInfo::new( + let push_constant_compatibility = self.check_width(scalar)?; + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -368,7 +398,9 @@ impl super::Validator { | TypeFlags::CONSTRUCTIBLE | TypeFlags::CREATION_RESOLVED, Alignment::from(rows) * Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Atomic(scalar) => { match scalar { @@ -463,7 +495,7 @@ impl super::Validator { // However, some cases are trivial: All our implicit base types // are DATA and SIZED, so we can never return // `InvalidPointerBase` or `InvalidPointerToUnsized`. - self.check_width(scalar)?; + let _ = self.check_width(scalar)?; // `Validator::validate_function` actually checks the address // space of pointer arguments explicitly before checking the @@ -549,6 +581,7 @@ impl super::Validator { flags: base_info.flags & type_info_mask, uniform_layout, storage_layout, + push_constant_compatibility: base_info.push_constant_compatibility.clone(), } } Ti::Struct { ref members, span } => { @@ -629,6 +662,10 @@ impl super::Validator { base_info.storage_layout, handle, ); + if base_info.push_constant_compatibility.is_err() { + ti.push_constant_compatibility = + base_info.push_constant_compatibility.clone(); + } // Validate rule: If a structure member itself has a structure type S, // then the number of bytes between the start of that member and diff --git a/naga/tests/example_wgsl.rs b/naga/tests/example_wgsl.rs index 115d8769a7f..cc6a942a096 100644 --- a/naga/tests/example_wgsl.rs +++ b/naga/tests/example_wgsl.rs @@ -1,55 +1,51 @@ #![cfg(feature = "wgsl-in")] use naga::{front::wgsl, valid::Validator}; -use std::{fs, path::PathBuf}; +use std::{ffi::OsStr, fs, path::Path}; /// Runs through all example shaders and ensures they are valid wgsl. #[test] pub fn parse_example_wgsl() { - let read_dir = match PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("examples") - .read_dir() - { - Ok(iter) => iter, - Err(e) => { - log::error!("Unable to open the examples folder: {:?}", e); - return; - } - }; - for example_entry in read_dir { - let read_files = match example_entry { - Ok(dir_entry) => match dir_entry.path().read_dir() { - Ok(iter) => iter, - Err(_) => continue, - }, - Err(e) => { - log::warn!("Skipping example: {:?}", e); - continue; - } + let example_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("examples"); + + println!("Looking for examples in {}", example_path.display()); + + let mut example_paths = Vec::new(); + for example_entry in walkdir::WalkDir::new(example_path) { + let Ok(dir_entry) = example_entry else { + continue; }; - for file_entry in read_files { - let shader = match file_entry { - Ok(entry) => match entry.path().extension() { - Some(ostr) if ostr == "wgsl" => { - println!("Validating {:?}", entry.path()); - fs::read_to_string(entry.path()).unwrap_or_default() - } - _ => continue, - }, - Err(e) => { - log::warn!("Skipping file: {:?}", e); - continue; - } - }; - - let module = wgsl::parse_str(&shader).unwrap(); - //TODO: re-use the validator - Validator::new( - naga::valid::ValidationFlags::all(), - naga::valid::Capabilities::all(), - ) - .validate(&module) - .unwrap(); + + if !dir_entry.file_type().is_file() { + continue; + } + + let path = dir_entry.path(); + + if path.extension().map(OsStr::to_string_lossy).as_deref() == Some("wgsl") { + example_paths.push(path.to_path_buf()); } } + + assert_ne!(example_paths.len(), 0, "No examples found!"); + + println!("Found {} examples", example_paths.len()); + + for example_path in example_paths { + println!("\tParsing {}", example_path.display()); + + let shader = fs::read_to_string(&example_path).unwrap(); + + let module = wgsl::parse_str(&shader).unwrap(); + //TODO: re-use the validator + Validator::new( + naga::valid::ValidationFlags::all(), + naga::valid::Capabilities::all(), + ) + .validate(&module) + .unwrap(); + } } diff --git a/naga/tests/in/extra.wgsl b/naga/tests/in/extra.wgsl index ef68f4aa80e..1a7ab91ff56 100644 --- a/naga/tests/in/extra.wgsl +++ b/naga/tests/in/extra.wgsl @@ -1,6 +1,6 @@ struct PushConstants { index: u32, - double: vec2, + double: vec2, } var pc: PushConstants; diff --git a/naga/tests/in/f16.param.ron b/naga/tests/in/f16.param.ron new file mode 100644 index 00000000000..cf105af810b --- /dev/null +++ b/naga/tests/in/f16.param.ron @@ -0,0 +1,22 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + ), + hlsl: ( + shader_model: V6_2, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), + msl: ( + lang_version: (1, 0), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/f16.wgsl b/naga/tests/in/f16.wgsl new file mode 100644 index 00000000000..dbef9aedfc9 --- /dev/null +++ b/naga/tests/in/f16.wgsl @@ -0,0 +1,127 @@ +enable f16; + +var private_variable: f16 = 1h; +const constant_variable: f16 = f16(15.2); + +struct UniformCompatible { + // Other types + val_u32: u32, + val_i32: i32, + val_f32: f32, + + // f16 + val_f16: f16, + val_f16_2: vec2, + val_f16_3: vec3, + val_f16_4: vec4, + final_value: f16, + + val_mat2x2: mat2x2, + val_mat2x3: mat2x3, + val_mat2x4: mat2x4, + val_mat3x2: mat3x2, + val_mat3x3: mat3x3, + val_mat3x4: mat3x4, + val_mat4x2: mat4x2, + val_mat4x3: mat4x3, + val_mat4x4: mat4x4, +} + +struct StorageCompatible { + val_f16_array_2: array, +} + +struct LayoutTest { + scalar1: f16, scalar2: f16, v3: vec3, tuck_in: f16, scalar4: f16, larger: u32 +} + +@group(0) @binding(0) +var input_uniform: UniformCompatible; + +@group(0) @binding(1) +var input_storage: UniformCompatible; + +@group(0) @binding(2) +var input_arrays: StorageCompatible; + +@group(0) @binding(3) +var output: UniformCompatible; + +@group(0) @binding(4) +var output_arrays: StorageCompatible; + +fn f16_function(x: f16) -> f16 { + var val: f16 = f16(constant_variable); + // A number too big for f16 + val += 1h - 33333h; + // Constructing an f16 from an AbstractInt + val += val + f16(5.); + // Constructing a f16 from other types and other types from f16. + val += f16(input_uniform.val_f32 + f32(val)); + // Constructing a vec3 from a i64 + val += vec3(input_uniform.val_f16).z; + + // Reading/writing to a uniform/storage buffer + output.val_f16 = input_uniform.val_f16 + input_storage.val_f16; + output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2; + output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3; + output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4; + + output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2; + output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3; + output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4; + output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2; + output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3; + output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4; + output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2; + output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3; + output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4; + + output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2; + + // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin + // constants based on the size of the type, and which are arguments. + + // Numeric functions + val += abs(val); + val += clamp(val, val, val); + val += dot(vec2(val), vec2(val)); + val += max(val, val); + val += min(val, val); + val += sign(val); + + val += f16(1.0); + + // We use the shorthand aliases here to ensure the aliases + // work correctly. + + // Cast vectors to/from f32 + let float_vec2 = vec2f(input_uniform.val_f16_2); + output.val_f16_2 = vec2h(float_vec2); + + let float_vec3 = vec3f(input_uniform.val_f16_3); + output.val_f16_3 = vec3h(float_vec3); + + let float_vec4 = vec4f(input_uniform.val_f16_4); + output.val_f16_4 = vec4h(float_vec4); + + // Cast matrices to/from f32 + output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2)); + output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3)); + output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4)); + output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2)); + output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3)); + output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4)); + output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2)); + output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3)); + output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4)); + + // Make sure all the variables are used. + return val; +} + +@compute @workgroup_size(1) +fn main() { + output.final_value = f16_function(2h); +} + diff --git a/naga/tests/in/glsl/f16-glsl.comp b/naga/tests/in/glsl/f16-glsl.comp new file mode 100644 index 00000000000..af8c89cb8a3 --- /dev/null +++ b/naga/tests/in/glsl/f16-glsl.comp @@ -0,0 +1,57 @@ +#version 460 + +#extension GL_AMD_gpu_shader_half_float: enable + +layout(set = 0, binding = 0) uniform A { + float16_t a_1; + f16vec2 a_vec2; + f16vec3 a_vec3; + f16vec4 a_vec4; + // So the rules here are particularly nasty for any f16 matries in uniform buffers + // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform + // buffer is over-aligned to what naga-ir wants. + // + // This is https://github.com/gfx-rs/wgpu/issues/4375. + + // f16mat2 a_mat2; + // f16mat2x3 a_mat2x3; + // f16mat2x4 a_mat2x4; + // f16mat3x2 a_mat3x2; + // f16mat3 a_mat3; + // f16mat3x4 a_mat3x4; + // f16mat4x2 a_mat4x2; + // f16mat4x3 a_mat4x3; + // f16mat4 a_mat4; +}; + +layout(set = 0, binding = 1) buffer B { + float16_t b_1; + f16vec2 b_vec2; + f16vec3 b_vec3; + f16vec4 b_vec4; + f16mat2 b_mat2; + f16mat2x3 b_mat2x3; + f16mat2x4 b_mat2x4; + f16mat3x2 b_mat3x2; + f16mat3 b_mat3; + f16mat3x4 b_mat3x4; + f16mat4x2 b_mat4x2; + f16mat4x3 b_mat4x3; + f16mat4 b_mat4; +}; + +void main() { + b_1 = a_1; + b_vec2 = a_vec2; + b_vec3 = a_vec3; + b_vec4 = a_vec4; + // b_mat2 = a_mat2; + // b_mat2x3 = a_mat2x3; + // b_mat2x4 = a_mat2x4; + // b_mat3x2 = a_mat3x2; + // b_mat3 = a_mat3; + // b_mat3x4 = a_mat3x4; + // b_mat4x2 = a_mat4x2; + // b_mat4x3 = a_mat4x3; + // b_mat4 = a_mat4; +} diff --git a/naga/tests/in/spv/f16-spv.comp b/naga/tests/in/spv/f16-spv.comp new file mode 100644 index 00000000000..af8c89cb8a3 --- /dev/null +++ b/naga/tests/in/spv/f16-spv.comp @@ -0,0 +1,57 @@ +#version 460 + +#extension GL_AMD_gpu_shader_half_float: enable + +layout(set = 0, binding = 0) uniform A { + float16_t a_1; + f16vec2 a_vec2; + f16vec3 a_vec3; + f16vec4 a_vec4; + // So the rules here are particularly nasty for any f16 matries in uniform buffers + // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform + // buffer is over-aligned to what naga-ir wants. + // + // This is https://github.com/gfx-rs/wgpu/issues/4375. + + // f16mat2 a_mat2; + // f16mat2x3 a_mat2x3; + // f16mat2x4 a_mat2x4; + // f16mat3x2 a_mat3x2; + // f16mat3 a_mat3; + // f16mat3x4 a_mat3x4; + // f16mat4x2 a_mat4x2; + // f16mat4x3 a_mat4x3; + // f16mat4 a_mat4; +}; + +layout(set = 0, binding = 1) buffer B { + float16_t b_1; + f16vec2 b_vec2; + f16vec3 b_vec3; + f16vec4 b_vec4; + f16mat2 b_mat2; + f16mat2x3 b_mat2x3; + f16mat2x4 b_mat2x4; + f16mat3x2 b_mat3x2; + f16mat3 b_mat3; + f16mat3x4 b_mat3x4; + f16mat4x2 b_mat4x2; + f16mat4x3 b_mat4x3; + f16mat4 b_mat4; +}; + +void main() { + b_1 = a_1; + b_vec2 = a_vec2; + b_vec3 = a_vec3; + b_vec4 = a_vec4; + // b_mat2 = a_mat2; + // b_mat2x3 = a_mat2x3; + // b_mat2x4 = a_mat2x4; + // b_mat3x2 = a_mat3x2; + // b_mat3 = a_mat3; + // b_mat3x4 = a_mat3x4; + // b_mat4x2 = a_mat4x2; + // b_mat4x3 = a_mat4x3; + // b_mat4 = a_mat4; +} diff --git a/naga/tests/in/spv/f16-spv.param.ron b/naga/tests/in/spv/f16-spv.param.ron new file mode 100644 index 00000000000..c70ada99392 --- /dev/null +++ b/naga/tests/in/spv/f16-spv.param.ron @@ -0,0 +1,3 @@ +( + god_mode: true, +) \ No newline at end of file diff --git a/naga/tests/in/spv/f16-spv.spv b/naga/tests/in/spv/f16-spv.spv new file mode 100644 index 00000000000..7e7f1763ef8 Binary files /dev/null and b/naga/tests/in/spv/f16-spv.spv differ diff --git a/naga/tests/in/spv/f16-spv.spvasm b/naga/tests/in/spv/f16-spv.spvasm new file mode 100644 index 00000000000..806de327548 --- /dev/null +++ b/naga/tests/in/spv/f16-spv.spvasm @@ -0,0 +1,130 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 11 +; Bound: 46 +; Schema: 0 + OpCapability Shader + OpCapability StorageBuffer16BitAccess + OpCapability UniformAndStorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 460 + OpSourceExtension "GL_AMD_gpu_shader_half_float" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %B "B" + OpMemberName %B 0 "b_1" + OpMemberName %B 1 "b_vec2" + OpMemberName %B 2 "b_vec3" + OpMemberName %B 3 "b_vec4" + OpMemberName %B 4 "b_mat2" + OpMemberName %B 5 "b_mat2x3" + OpMemberName %B 6 "b_mat2x4" + OpMemberName %B 7 "b_mat3x2" + OpMemberName %B 8 "b_mat3" + OpMemberName %B 9 "b_mat3x4" + OpMemberName %B 10 "b_mat4x2" + OpMemberName %B 11 "b_mat4x3" + OpMemberName %B 12 "b_mat4" + OpName %_ "" + OpName %A "A" + OpMemberName %A 0 "a_1" + OpMemberName %A 1 "a_vec2" + OpMemberName %A 2 "a_vec3" + OpMemberName %A 3 "a_vec4" + OpName %__0 "" + OpDecorate %B BufferBlock + OpMemberDecorate %B 0 Offset 0 + OpMemberDecorate %B 1 Offset 4 + OpMemberDecorate %B 2 Offset 8 + OpMemberDecorate %B 3 Offset 16 + OpMemberDecorate %B 4 ColMajor + OpMemberDecorate %B 4 MatrixStride 4 + OpMemberDecorate %B 4 Offset 24 + OpMemberDecorate %B 5 ColMajor + OpMemberDecorate %B 5 MatrixStride 8 + OpMemberDecorate %B 5 Offset 32 + OpMemberDecorate %B 6 ColMajor + OpMemberDecorate %B 6 MatrixStride 8 + OpMemberDecorate %B 6 Offset 48 + OpMemberDecorate %B 7 ColMajor + OpMemberDecorate %B 7 MatrixStride 4 + OpMemberDecorate %B 7 Offset 64 + OpMemberDecorate %B 8 ColMajor + OpMemberDecorate %B 8 MatrixStride 8 + OpMemberDecorate %B 8 Offset 80 + OpMemberDecorate %B 9 ColMajor + OpMemberDecorate %B 9 MatrixStride 8 + OpMemberDecorate %B 9 Offset 104 + OpMemberDecorate %B 10 ColMajor + OpMemberDecorate %B 10 MatrixStride 4 + OpMemberDecorate %B 10 Offset 128 + OpMemberDecorate %B 11 ColMajor + OpMemberDecorate %B 11 MatrixStride 8 + OpMemberDecorate %B 11 Offset 144 + OpMemberDecorate %B 12 ColMajor + OpMemberDecorate %B 12 MatrixStride 8 + OpMemberDecorate %B 12 Offset 176 + OpDecorate %_ Binding 1 + OpDecorate %_ DescriptorSet 0 + OpDecorate %A Block + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 4 + OpMemberDecorate %A 2 Offset 8 + OpMemberDecorate %A 3 Offset 16 + OpDecorate %__0 Binding 0 + OpDecorate %__0 DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 + %mat2v2half = OpTypeMatrix %v2half 2 + %mat2v3half = OpTypeMatrix %v3half 2 + %mat2v4half = OpTypeMatrix %v4half 2 + %mat3v2half = OpTypeMatrix %v2half 3 + %mat3v3half = OpTypeMatrix %v3half 3 + %mat3v4half = OpTypeMatrix %v4half 3 + %mat4v2half = OpTypeMatrix %v2half 4 + %mat4v3half = OpTypeMatrix %v3half 4 + %mat4v4half = OpTypeMatrix %v4half 4 + %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half +%_ptr_Uniform_B = OpTypePointer Uniform %B + %_ = OpVariable %_ptr_Uniform_B Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %A = OpTypeStruct %half %v2half %v3half %v4half +%_ptr_Uniform_A = OpTypePointer Uniform %A + %__0 = OpVariable %_ptr_Uniform_A Uniform +%_ptr_Uniform_half = OpTypePointer Uniform %half + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %int_2 = OpConstant %int 2 +%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half + %int_3 = OpConstant %int 3 +%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half + %main = OpFunction %void None %3 + %5 = OpLabel + %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0 + %29 = OpLoad %half %28 + %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0 + OpStore %30 %29 + %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1 + %34 = OpLoad %v2half %33 + %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1 + OpStore %35 %34 + %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2 + %39 = OpLoad %v3half %38 + %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2 + OpStore %40 %39 + %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3 + %44 = OpLoad %v4half %43 + %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3 + OpStore %45 %44 + OpReturn + OpFunctionEnd diff --git a/naga/tests/out/hlsl/f16.hlsl b/naga/tests/out/hlsl/f16.hlsl new file mode 100644 index 00000000000..05e23872122 --- /dev/null +++ b/naga/tests/out/hlsl/f16.hlsl @@ -0,0 +1,351 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct UniformCompatible { + uint val_u32_; + int val_i32_; + float val_f32_; + half val_f16_; + half2 val_f16_2_; + int _pad5_0; + half3 val_f16_3_; + half4 val_f16_4_; + half final_value; + half2 val_mat2x2__0; half2 val_mat2x2__1; + int _pad9_0; + row_major half2x3 val_mat2x3_; + row_major half2x4 val_mat2x4_; + half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2; + int _pad12_0; + row_major half3x3 val_mat3x3_; + row_major half3x4 val_mat3x4_; + half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3; + row_major half4x3 val_mat4x3_; + row_major half4x4 val_mat4x4_; +}; + +struct StorageCompatible { + half val_f16_array_2_[2]; +}; + +struct LayoutTest { + half scalar1_; + half scalar2_; + int _pad2_0; + half3 v3_; + half tuck_in; + half scalar4_; + uint larger; +}; + +static const half constant_variable = 15.203125h; + +static half private_variable = 1.0h; +cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; } +ByteAddressBuffer input_storage : register(t1); +ByteAddressBuffer input_arrays : register(t2); +RWByteAddressBuffer output : register(u3); +RWByteAddressBuffer output_arrays : register(u4); + +half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) { + return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1); +} + +void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) { + obj.val_mat2x2__0 = mat[0]; + obj.val_mat2x2__1 = mat[1]; +} + +void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat2x2__0 = vec; break; } + case 1: { obj.val_mat2x2__1 = vec; break; } + } +} + +void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; } + } +} + +half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) { + return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2); +} + +void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) { + obj.val_mat3x2__0 = mat[0]; + obj.val_mat3x2__1 = mat[1]; + obj.val_mat3x2__2 = mat[2]; +} + +void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat3x2__0 = vec; break; } + case 1: { obj.val_mat3x2__1 = vec; break; } + case 2: { obj.val_mat3x2__2 = vec; break; } + } +} + +void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; } + case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; } + } +} + +half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) { + return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3); +} + +void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) { + obj.val_mat4x2__0 = mat[0]; + obj.val_mat4x2__1 = mat[1]; + obj.val_mat4x2__2 = mat[2]; + obj.val_mat4x2__3 = mat[3]; +} + +void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat4x2__0 = vec; break; } + case 1: { obj.val_mat4x2__1 = vec; break; } + case 2: { obj.val_mat4x2__2 = vec; break; } + case 3: { obj.val_mat4x2__3 = vec; break; } + } +} + +void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; } + case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; } + case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; } + } +} + +typedef half ret_Constructarray2_half_[2]; +ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) { + half ret[2] = { arg0, arg1 }; + return ret; +} + +half f16_function(half x) +{ + half val = 15.203125h; + + half _e4 = val; + val = (_e4 + -33344.0h); + half _e6 = val; + half _e9 = val; + val = (_e9 + (_e6 + 5.0h)); + float _e13 = input_uniform.val_f32_; + half _e14 = val; + half _e18 = val; + val = (_e18 + half((_e13 + float(_e14)))); + half _e22 = input_uniform.val_f16_; + half _e25 = val; + val = (_e25 + (_e22).xxx.z); + half _e31 = input_uniform.val_f16_; + half _e34 = input_storage.Load(12); + output.Store(12, (_e31 + _e34)); + half2 _e40 = input_uniform.val_f16_2_; + half2 _e43 = input_storage.Load(16); + output.Store(16, (_e40 + _e43)); + half3 _e49 = input_uniform.val_f16_3_; + half3 _e52 = input_storage.Load(24); + output.Store(24, (_e49 + _e52)); + half4 _e58 = input_uniform.val_f16_4_; + half4 _e61 = input_storage.Load(32); + output.Store(32, (_e58 + _e61)); + half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform); + half2x2 _e70 = half2x2(input_storage.Load(44+0), input_storage.Load(44+4)); + { + half2x2 _value2 = (_e67 + _e70); + output.Store(44+0, _value2[0]); + output.Store(44+4, _value2[1]); + } + half2x3 _e76 = input_uniform.val_mat2x3_; + half2x3 _e79 = half2x3(input_storage.Load(56+0), input_storage.Load(56+8)); + { + half2x3 _value2 = (_e76 + _e79); + output.Store(56+0, _value2[0]); + output.Store(56+8, _value2[1]); + } + half2x4 _e85 = input_uniform.val_mat2x4_; + half2x4 _e88 = half2x4(input_storage.Load(72+0), input_storage.Load(72+8)); + { + half2x4 _value2 = (_e85 + _e88); + output.Store(72+0, _value2[0]); + output.Store(72+8, _value2[1]); + } + half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform); + half3x2 _e97 = half3x2(input_storage.Load(88+0), input_storage.Load(88+4), input_storage.Load(88+8)); + { + half3x2 _value2 = (_e94 + _e97); + output.Store(88+0, _value2[0]); + output.Store(88+4, _value2[1]); + output.Store(88+8, _value2[2]); + } + half3x3 _e103 = input_uniform.val_mat3x3_; + half3x3 _e106 = half3x3(input_storage.Load(104+0), input_storage.Load(104+8), input_storage.Load(104+16)); + { + half3x3 _value2 = (_e103 + _e106); + output.Store(104+0, _value2[0]); + output.Store(104+8, _value2[1]); + output.Store(104+16, _value2[2]); + } + half3x4 _e112 = input_uniform.val_mat3x4_; + half3x4 _e115 = half3x4(input_storage.Load(128+0), input_storage.Load(128+8), input_storage.Load(128+16)); + { + half3x4 _value2 = (_e112 + _e115); + output.Store(128+0, _value2[0]); + output.Store(128+8, _value2[1]); + output.Store(128+16, _value2[2]); + } + half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform); + half4x2 _e124 = half4x2(input_storage.Load(152+0), input_storage.Load(152+4), input_storage.Load(152+8), input_storage.Load(152+12)); + { + half4x2 _value2 = (_e121 + _e124); + output.Store(152+0, _value2[0]); + output.Store(152+4, _value2[1]); + output.Store(152+8, _value2[2]); + output.Store(152+12, _value2[3]); + } + half4x3 _e130 = input_uniform.val_mat4x3_; + half4x3 _e133 = half4x3(input_storage.Load(168+0), input_storage.Load(168+8), input_storage.Load(168+16), input_storage.Load(168+24)); + { + half4x3 _value2 = (_e130 + _e133); + output.Store(168+0, _value2[0]); + output.Store(168+8, _value2[1]); + output.Store(168+16, _value2[2]); + output.Store(168+24, _value2[3]); + } + half4x4 _e139 = input_uniform.val_mat4x4_; + half4x4 _e142 = half4x4(input_storage.Load(200+0), input_storage.Load(200+8), input_storage.Load(200+16), input_storage.Load(200+24)); + { + half4x4 _value2 = (_e139 + _e142); + output.Store(200+0, _value2[0]); + output.Store(200+8, _value2[1]); + output.Store(200+16, _value2[2]); + output.Store(200+24, _value2[3]); + } + half _e148[2] = Constructarray2_half_(input_arrays.Load(0+0), input_arrays.Load(0+2)); + { + half _value2[2] = _e148; + output_arrays.Store(0+0, _value2[0]); + output_arrays.Store(0+2, _value2[1]); + } + half _e149 = val; + half _e151 = val; + val = (_e151 + abs(_e149)); + half _e153 = val; + half _e154 = val; + half _e155 = val; + half _e157 = val; + val = (_e157 + clamp(_e153, _e154, _e155)); + half _e159 = val; + half _e161 = val; + half _e164 = val; + val = (_e164 + dot((_e159).xx, (_e161).xx)); + half _e166 = val; + half _e167 = val; + half _e169 = val; + val = (_e169 + max(_e166, _e167)); + half _e171 = val; + half _e172 = val; + half _e174 = val; + val = (_e174 + min(_e171, _e172)); + half _e176 = val; + half _e178 = val; + val = (_e178 + sign(_e176)); + half _e181 = val; + val = (_e181 + 1.0h); + half2 _e185 = input_uniform.val_f16_2_; + float2 float_vec2_ = float2(_e185); + output.Store(16, half2(float_vec2_)); + half3 _e192 = input_uniform.val_f16_3_; + float3 float_vec3_ = float3(_e192); + output.Store(24, half3(float_vec3_)); + half4 _e199 = input_uniform.val_f16_4_; + float4 float_vec4_ = float4(_e199); + output.Store(32, half4(float_vec4_)); + half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform); + { + half2x2 _value2 = half2x2(float2x2(_e208)); + output.Store(44+0, _value2[0]); + output.Store(44+4, _value2[1]); + } + half2x3 _e215 = input_uniform.val_mat2x3_; + { + half2x3 _value2 = half2x3(float2x3(_e215)); + output.Store(56+0, _value2[0]); + output.Store(56+8, _value2[1]); + } + half2x4 _e222 = input_uniform.val_mat2x4_; + { + half2x4 _value2 = half2x4(float2x4(_e222)); + output.Store(72+0, _value2[0]); + output.Store(72+8, _value2[1]); + } + half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform); + { + half3x2 _value2 = half3x2(float3x2(_e229)); + output.Store(88+0, _value2[0]); + output.Store(88+4, _value2[1]); + output.Store(88+8, _value2[2]); + } + half3x3 _e236 = input_uniform.val_mat3x3_; + { + half3x3 _value2 = half3x3(float3x3(_e236)); + output.Store(104+0, _value2[0]); + output.Store(104+8, _value2[1]); + output.Store(104+16, _value2[2]); + } + half3x4 _e243 = input_uniform.val_mat3x4_; + { + half3x4 _value2 = half3x4(float3x4(_e243)); + output.Store(128+0, _value2[0]); + output.Store(128+8, _value2[1]); + output.Store(128+16, _value2[2]); + } + half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform); + { + half4x2 _value2 = half4x2(float4x2(_e250)); + output.Store(152+0, _value2[0]); + output.Store(152+4, _value2[1]); + output.Store(152+8, _value2[2]); + output.Store(152+12, _value2[3]); + } + half4x3 _e257 = input_uniform.val_mat4x3_; + { + half4x3 _value2 = half4x3(float4x3(_e257)); + output.Store(168+0, _value2[0]); + output.Store(168+8, _value2[1]); + output.Store(168+16, _value2[2]); + output.Store(168+24, _value2[3]); + } + half4x4 _e264 = input_uniform.val_mat4x4_; + { + half4x4 _value2 = half4x4(float4x4(_e264)); + output.Store(200+0, _value2[0]); + output.Store(200+8, _value2[1]); + output.Store(200+16, _value2[2]); + output.Store(200+24, _value2[3]); + } + half _e267 = val; + return _e267; +} + +[numthreads(1, 1, 1)] +void main() +{ + const half _e3 = f16_function(2.0h); + output.Store(40, _e3); + return; +} diff --git a/naga/tests/out/hlsl/f16.ron b/naga/tests/out/hlsl/f16.ron new file mode 100644 index 00000000000..b396a4626e3 --- /dev/null +++ b/naga/tests/out/hlsl/f16.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_2", + ), + ], +) diff --git a/naga/tests/out/msl/extra.msl b/naga/tests/out/msl/extra.msl index 8288dfad92f..4d6bb568f3a 100644 --- a/naga/tests/out/msl/extra.msl +++ b/naga/tests/out/msl/extra.msl @@ -6,7 +6,7 @@ using metal::uint; struct PushConstants { uint index; - char _pad1[12]; + char _pad1[4]; metal::float2 double_; }; struct FragmentIn { diff --git a/naga/tests/out/msl/f16.msl b/naga/tests/out/msl/f16.msl new file mode 100644 index 00000000000..40be58e9015 --- /dev/null +++ b/naga/tests/out/msl/f16.msl @@ -0,0 +1,177 @@ +// language: metal1.0 +#include +#include + +using metal::uint; + +struct UniformCompatible { + uint val_u32_; + int val_i32_; + float val_f32_; + half val_f16_; + char _pad4[2]; + metal::half2 val_f16_2_; + char _pad5[4]; + metal::half3 val_f16_3_; + metal::half4 val_f16_4_; + half final_value; + char _pad8[2]; + metal::half2x2 val_mat2x2_; + char _pad9[4]; + metal::half2x3 val_mat2x3_; + metal::half2x4 val_mat2x4_; + metal::half3x2 val_mat3x2_; + char _pad12[4]; + metal::half3x3 val_mat3x3_; + metal::half3x4 val_mat3x4_; + metal::half4x2 val_mat4x2_; + metal::half4x3 val_mat4x3_; + metal::half4x4 val_mat4x4_; +}; +struct type_16 { + half inner[2]; +}; +struct StorageCompatible { + type_16 val_f16_array_2_; +}; +struct LayoutTest { + half scalar1_; + half scalar2_; + char _pad2[4]; + metal::packed_half3 v3_; + half tuck_in; + half scalar4_; + char _pad5[2]; + uint larger; +}; +constant half constant_variable = 15.203125h; + +half f16_function( + half x, + constant UniformCompatible& input_uniform, + device UniformCompatible const& input_storage, + device StorageCompatible const& input_arrays, + device UniformCompatible& output, + device StorageCompatible& output_arrays +) { + half val = 15.203125h; + half _e4 = val; + val = _e4 + -33344.0h; + half _e6 = val; + half _e9 = val; + val = _e9 + (_e6 + 5.0h); + float _e13 = input_uniform.val_f32_; + half _e14 = val; + half _e18 = val; + val = _e18 + static_cast(_e13 + static_cast(_e14)); + half _e22 = input_uniform.val_f16_; + half _e25 = val; + val = _e25 + metal::half3(_e22).z; + half _e31 = input_uniform.val_f16_; + half _e34 = input_storage.val_f16_; + output.val_f16_ = _e31 + _e34; + metal::half2 _e40 = input_uniform.val_f16_2_; + metal::half2 _e43 = input_storage.val_f16_2_; + output.val_f16_2_ = _e40 + _e43; + metal::half3 _e49 = input_uniform.val_f16_3_; + metal::half3 _e52 = input_storage.val_f16_3_; + output.val_f16_3_ = _e49 + _e52; + metal::half4 _e58 = input_uniform.val_f16_4_; + metal::half4 _e61 = input_storage.val_f16_4_; + output.val_f16_4_ = _e58 + _e61; + metal::half2x2 _e67 = input_uniform.val_mat2x2_; + metal::half2x2 _e70 = input_storage.val_mat2x2_; + output.val_mat2x2_ = _e67 + _e70; + metal::half2x3 _e76 = input_uniform.val_mat2x3_; + metal::half2x3 _e79 = input_storage.val_mat2x3_; + output.val_mat2x3_ = _e76 + _e79; + metal::half2x4 _e85 = input_uniform.val_mat2x4_; + metal::half2x4 _e88 = input_storage.val_mat2x4_; + output.val_mat2x4_ = _e85 + _e88; + metal::half3x2 _e94 = input_uniform.val_mat3x2_; + metal::half3x2 _e97 = input_storage.val_mat3x2_; + output.val_mat3x2_ = _e94 + _e97; + metal::half3x3 _e103 = input_uniform.val_mat3x3_; + metal::half3x3 _e106 = input_storage.val_mat3x3_; + output.val_mat3x3_ = _e103 + _e106; + metal::half3x4 _e112 = input_uniform.val_mat3x4_; + metal::half3x4 _e115 = input_storage.val_mat3x4_; + output.val_mat3x4_ = _e112 + _e115; + metal::half4x2 _e121 = input_uniform.val_mat4x2_; + metal::half4x2 _e124 = input_storage.val_mat4x2_; + output.val_mat4x2_ = _e121 + _e124; + metal::half4x3 _e130 = input_uniform.val_mat4x3_; + metal::half4x3 _e133 = input_storage.val_mat4x3_; + output.val_mat4x3_ = _e130 + _e133; + metal::half4x4 _e139 = input_uniform.val_mat4x4_; + metal::half4x4 _e142 = input_storage.val_mat4x4_; + output.val_mat4x4_ = _e139 + _e142; + type_16 _e148 = input_arrays.val_f16_array_2_; + output_arrays.val_f16_array_2_ = _e148; + half _e149 = val; + half _e151 = val; + val = _e151 + metal::abs(_e149); + half _e153 = val; + half _e154 = val; + half _e155 = val; + half _e157 = val; + val = _e157 + metal::clamp(_e153, _e154, _e155); + half _e159 = val; + half _e161 = val; + half _e164 = val; + val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161)); + half _e166 = val; + half _e167 = val; + half _e169 = val; + val = _e169 + metal::max(_e166, _e167); + half _e171 = val; + half _e172 = val; + half _e174 = val; + val = _e174 + metal::min(_e171, _e172); + half _e176 = val; + half _e178 = val; + val = _e178 + metal::sign(_e176); + half _e181 = val; + val = _e181 + 1.0h; + metal::half2 _e185 = input_uniform.val_f16_2_; + metal::float2 float_vec2_ = static_cast(_e185); + output.val_f16_2_ = static_cast(float_vec2_); + metal::half3 _e192 = input_uniform.val_f16_3_; + metal::float3 float_vec3_ = static_cast(_e192); + output.val_f16_3_ = static_cast(float_vec3_); + metal::half4 _e199 = input_uniform.val_f16_4_; + metal::float4 float_vec4_ = static_cast(_e199); + output.val_f16_4_ = static_cast(float_vec4_); + metal::half2x2 _e208 = input_uniform.val_mat2x2_; + output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208)); + metal::half2x3 _e215 = input_uniform.val_mat2x3_; + output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215)); + metal::half2x4 _e222 = input_uniform.val_mat2x4_; + output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222)); + metal::half3x2 _e229 = input_uniform.val_mat3x2_; + output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229)); + metal::half3x3 _e236 = input_uniform.val_mat3x3_; + output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236)); + metal::half3x4 _e243 = input_uniform.val_mat3x4_; + output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243)); + metal::half4x2 _e250 = input_uniform.val_mat4x2_; + output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250)); + metal::half4x3 _e257 = input_uniform.val_mat4x3_; + output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257)); + metal::half4x4 _e264 = input_uniform.val_mat4x4_; + output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264)); + half _e267 = val; + return _e267; +} + +kernel void main_( + constant UniformCompatible& input_uniform [[user(fake0)]] +, device UniformCompatible const& input_storage [[user(fake0)]] +, device StorageCompatible const& input_arrays [[user(fake0)]] +, device UniformCompatible& output [[user(fake0)]] +, device StorageCompatible& output_arrays [[user(fake0)]] +) { + half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays); + output.final_value = _e3; + return; +} diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm index f174ad3b382..7fa0c3f0de4 100644 --- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm +++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 125 +; Bound: 123 OpCapability Shader OpCapability Int64Atomics OpCapability Int64 @@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64" -OpEntryPoint GLCompute %77 "test_atomic_compare_exchange_u64" +OpEntryPoint GLCompute %76 "test_atomic_compare_exchange_u64" OpExecutionMode %19 LocalSize 1 1 1 -OpExecutionMode %77 LocalSize 1 1 1 +OpExecutionMode %76 LocalSize 1 1 1 OpDecorate %5 ArrayStride 8 OpDecorate %8 ArrayStride 8 OpMemberDecorate %10 0 Offset 0 @@ -57,12 +57,12 @@ OpMemberDecorate %16 0 Offset 0 %51 = OpTypeInt 32 1 %50 = OpConstant %51 1 %52 = OpConstant %3 64 -%78 = OpTypePointer StorageBuffer %8 -%80 = OpConstant %7 10 -%83 = OpTypePointer Function %7 -%84 = OpConstantNull %7 -%86 = OpConstantNull %9 -%99 = OpTypePointer StorageBuffer %7 +%77 = OpTypePointer StorageBuffer %8 +%79 = OpConstant %7 10 +%82 = OpTypePointer Function %7 +%83 = OpConstantNull %7 +%85 = OpConstantNull %9 +%98 = OpTypePointer StorageBuffer %7 %19 = OpFunction %2 None %20 %18 = OpLabel %27 = OpVariable %28 Function %22 @@ -106,16 +106,16 @@ OpBranch %61 %61 = OpLabel %63 = OpLoad %4 %29 %64 = OpIAdd %4 %63 %25 -%66 = OpLoad %3 %27 -%67 = OpLoad %4 %29 -%69 = OpAccessChain %47 %23 %66 -%70 = OpAtomicCompareExchange %4 %69 %50 %52 %52 %64 %67 -%71 = OpIEqual %9 %70 %67 -%68 = OpCompositeConstruct %10 %70 %71 -%72 = OpCompositeExtract %4 %68 0 -OpStore %29 %72 -%73 = OpCompositeExtract %9 %68 1 -OpStore %32 %73 +%65 = OpLoad %3 %27 +%66 = OpLoad %4 %29 +%68 = OpAccessChain %47 %23 %65 +%69 = OpAtomicCompareExchange %4 %68 %50 %52 %52 %64 %66 +%70 = OpIEqual %9 %69 %66 +%67 = OpCompositeConstruct %10 %69 %70 +%71 = OpCompositeExtract %4 %67 0 +OpStore %29 %71 +%72 = OpCompositeExtract %9 %67 1 +OpStore %32 %72 OpBranch %62 %62 = OpLabel OpBranch %56 @@ -126,80 +126,80 @@ OpBranch %45 %45 = OpLabel OpBranch %39 %39 = OpLabel -%74 = OpLoad %3 %27 -%75 = OpIAdd %3 %74 %26 -OpStore %27 %75 +%73 = OpLoad %3 %27 +%74 = OpIAdd %3 %73 %26 +OpStore %27 %74 OpBranch %36 %37 = OpLabel OpReturn OpFunctionEnd -%77 = OpFunction %2 None %20 -%76 = OpLabel -%81 = OpVariable %28 Function %22 -%82 = OpVariable %83 Function %84 -%85 = OpVariable %33 Function %86 -%79 = OpAccessChain %78 %15 %22 +%76 = OpFunction %2 None %20 +%75 = OpLabel +%80 = OpVariable %28 Function %22 +%81 = OpVariable %82 Function %83 +%84 = OpVariable %33 Function %85 +%78 = OpAccessChain %77 %15 %22 +OpBranch %86 +%86 = OpLabel OpBranch %87 %87 = OpLabel -OpBranch %88 -%88 = OpLabel -OpLoopMerge %89 %91 None -OpBranch %90 -%90 = OpLabel -%92 = OpLoad %3 %81 -%93 = OpULessThan %9 %92 %6 -OpSelectionMerge %94 None -OpBranchConditional %93 %94 %95 -%95 = OpLabel +OpLoopMerge %88 %90 None OpBranch %89 +%89 = OpLabel +%91 = OpLoad %3 %80 +%92 = OpULessThan %9 %91 %6 +OpSelectionMerge %93 None +OpBranchConditional %92 %93 %94 %94 = OpLabel -OpBranch %96 -%96 = OpLabel -%98 = OpLoad %3 %81 -%100 = OpAccessChain %99 %79 %98 -%101 = OpAtomicLoad %7 %100 %50 %52 -OpStore %82 %101 -OpStore %85 %24 -OpBranch %102 -%102 = OpLabel -OpLoopMerge %103 %105 None -OpBranch %104 -%104 = OpLabel -%106 = OpLoad %9 %85 -%107 = OpLogicalNot %9 %106 -OpSelectionMerge %108 None -OpBranchConditional %107 %108 %109 -%109 = OpLabel +OpBranch %88 +%93 = OpLabel +OpBranch %95 +%95 = OpLabel +%97 = OpLoad %3 %80 +%99 = OpAccessChain %98 %78 %97 +%100 = OpAtomicLoad %7 %99 %50 %52 +OpStore %81 %100 +OpStore %84 %24 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %102 %104 None OpBranch %103 +%103 = OpLabel +%105 = OpLoad %9 %84 +%106 = OpLogicalNot %9 %105 +OpSelectionMerge %107 None +OpBranchConditional %106 %107 %108 %108 = OpLabel +OpBranch %102 +%107 = OpLabel +OpBranch %109 +%109 = OpLabel +%111 = OpLoad %7 %81 +%112 = OpIAdd %7 %111 %79 +%113 = OpLoad %3 %80 +%114 = OpLoad %7 %81 +%116 = OpAccessChain %98 %78 %113 +%117 = OpAtomicCompareExchange %7 %116 %50 %52 %52 %112 %114 +%118 = OpIEqual %9 %117 %114 +%115 = OpCompositeConstruct %11 %117 %118 +%119 = OpCompositeExtract %7 %115 0 +OpStore %81 %119 +%120 = OpCompositeExtract %9 %115 1 +OpStore %84 %120 OpBranch %110 %110 = OpLabel -%112 = OpLoad %7 %82 -%113 = OpIAdd %7 %112 %80 -%115 = OpLoad %3 %81 -%116 = OpLoad %7 %82 -%118 = OpAccessChain %99 %79 %115 -%119 = OpAtomicCompareExchange %7 %118 %50 %52 %52 %113 %116 -%120 = OpIEqual %9 %119 %116 -%117 = OpCompositeConstruct %11 %119 %120 -%121 = OpCompositeExtract %7 %117 0 -OpStore %82 %121 -%122 = OpCompositeExtract %9 %117 1 -OpStore %85 %122 -OpBranch %111 -%111 = OpLabel -OpBranch %105 -%105 = OpLabel -OpBranch %102 -%103 = OpLabel -OpBranch %97 -%97 = OpLabel -OpBranch %91 -%91 = OpLabel -%123 = OpLoad %3 %81 -%124 = OpIAdd %3 %123 %26 -OpStore %81 %124 -OpBranch %88 -%89 = OpLabel +OpBranch %104 +%104 = OpLabel +OpBranch %101 +%102 = OpLabel +OpBranch %96 +%96 = OpLabel +OpBranch %90 +%90 = OpLabel +%121 = OpLoad %3 %80 +%122 = OpIAdd %3 %121 %26 +OpStore %80 %122 +OpBranch %87 +%88 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/extra.spvasm b/naga/tests/out/spv/extra.spvasm index 9c434a8ce26..0e84427bad4 100644 --- a/naga/tests/out/spv/extra.spvasm +++ b/naga/tests/out/spv/extra.spvasm @@ -1,76 +1,74 @@ ; SPIR-V ; Version: 1.2 ; Generator: rspirv -; Bound: 48 +; Bound: 47 OpCapability Shader -OpCapability Float64 OpCapability Geometry %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %23 "main" %15 %18 %21 -OpExecutionMode %23 OriginUpperLeft +OpEntryPoint Fragment %22 "main" %14 %17 %20 +OpExecutionMode %22 OriginUpperLeft OpMemberDecorate %6 0 Offset 0 -OpMemberDecorate %6 1 Offset 16 -OpMemberDecorate %9 0 Offset 0 -OpMemberDecorate %9 1 Offset 16 -OpDecorate %11 Block -OpMemberDecorate %11 0 Offset 0 -OpDecorate %15 Location 0 -OpDecorate %18 BuiltIn PrimitiveId -OpDecorate %18 Flat -OpDecorate %21 Location 0 +OpMemberDecorate %6 1 Offset 8 +OpMemberDecorate %8 0 Offset 0 +OpMemberDecorate %8 1 Offset 16 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %14 Location 0 +OpDecorate %17 BuiltIn PrimitiveId +OpDecorate %17 Flat +OpDecorate %20 Location 0 %2 = OpTypeVoid %3 = OpTypeInt 32 0 -%5 = OpTypeFloat 64 +%5 = OpTypeFloat 32 %4 = OpTypeVector %5 2 %6 = OpTypeStruct %3 %4 -%8 = OpTypeFloat 32 -%7 = OpTypeVector %8 4 -%9 = OpTypeStruct %7 %3 -%11 = OpTypeStruct %6 -%12 = OpTypePointer PushConstant %11 -%10 = OpVariable %12 PushConstant -%16 = OpTypePointer Input %7 -%15 = OpVariable %16 Input -%19 = OpTypePointer Input %3 -%18 = OpVariable %19 Input -%22 = OpTypePointer Output %7 -%21 = OpVariable %22 Output -%24 = OpTypeFunction %2 -%25 = OpTypePointer PushConstant %6 -%26 = OpConstant %3 0 -%28 = OpConstant %8 1.0 -%29 = OpTypeVector %8 3 -%30 = OpConstantComposite %29 %28 %28 %28 -%33 = OpTypePointer PushConstant %3 -%36 = OpTypeBool -%23 = OpFunction %2 None %24 -%13 = OpLabel -%17 = OpLoad %7 %15 -%20 = OpLoad %3 %18 -%14 = OpCompositeConstruct %9 %17 %20 -%27 = OpAccessChain %25 %10 %26 -OpBranch %31 -%31 = OpLabel -%32 = OpCompositeExtract %3 %14 1 -%34 = OpAccessChain %33 %27 %26 -%35 = OpLoad %3 %34 -%37 = OpIEqual %36 %32 %35 -OpSelectionMerge %38 None -OpBranchConditional %37 %39 %40 -%39 = OpLabel -%41 = OpCompositeExtract %7 %14 0 -OpStore %21 %41 +%7 = OpTypeVector %5 4 +%8 = OpTypeStruct %7 %3 +%10 = OpTypeStruct %6 +%11 = OpTypePointer PushConstant %10 +%9 = OpVariable %11 PushConstant +%15 = OpTypePointer Input %7 +%14 = OpVariable %15 Input +%18 = OpTypePointer Input %3 +%17 = OpVariable %18 Input +%21 = OpTypePointer Output %7 +%20 = OpVariable %21 Output +%23 = OpTypeFunction %2 +%24 = OpTypePointer PushConstant %6 +%25 = OpConstant %3 0 +%27 = OpConstant %5 1.0 +%28 = OpTypeVector %5 3 +%29 = OpConstantComposite %28 %27 %27 %27 +%32 = OpTypePointer PushConstant %3 +%35 = OpTypeBool +%22 = OpFunction %2 None %23 +%12 = OpLabel +%16 = OpLoad %7 %14 +%19 = OpLoad %3 %17 +%13 = OpCompositeConstruct %8 %16 %19 +%26 = OpAccessChain %24 %9 %25 +OpBranch %30 +%30 = OpLabel +%31 = OpCompositeExtract %3 %13 1 +%33 = OpAccessChain %32 %26 %25 +%34 = OpLoad %3 %33 +%36 = OpIEqual %35 %31 %34 +OpSelectionMerge %37 None +OpBranchConditional %36 %38 %39 +%38 = OpLabel +%40 = OpCompositeExtract %7 %13 0 +OpStore %20 %40 OpReturn -%40 = OpLabel -%42 = OpCompositeExtract %7 %14 0 -%43 = OpVectorShuffle %29 %42 %42 0 1 2 -%44 = OpFSub %29 %30 %43 -%45 = OpCompositeExtract %7 %14 0 -%46 = OpCompositeExtract %8 %45 3 -%47 = OpCompositeConstruct %7 %44 %46 -OpStore %21 %47 +%39 = OpLabel +%41 = OpCompositeExtract %7 %13 0 +%42 = OpVectorShuffle %28 %41 %41 0 1 2 +%43 = OpFSub %28 %29 %42 +%44 = OpCompositeExtract %7 %13 0 +%45 = OpCompositeExtract %5 %44 3 +%46 = OpCompositeConstruct %7 %43 %45 +OpStore %20 %46 OpReturn -%38 = OpLabel +%37 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/f16.spvasm b/naga/tests/out/spv/f16.spvasm new file mode 100644 index 00000000000..3d8fcea7a75 --- /dev/null +++ b/naga/tests/out/spv/f16.spvasm @@ -0,0 +1,633 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 511 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability StorageInputOutput16 +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %499 "main" +OpExecutionMode %499 LocalSize 1 1 1 +OpMemberDecorate %19 0 Offset 0 +OpMemberDecorate %19 1 Offset 4 +OpMemberDecorate %19 2 Offset 8 +OpMemberDecorate %19 3 Offset 12 +OpMemberDecorate %19 4 Offset 16 +OpMemberDecorate %19 5 Offset 24 +OpMemberDecorate %19 6 Offset 32 +OpMemberDecorate %19 7 Offset 40 +OpMemberDecorate %19 8 Offset 44 +OpMemberDecorate %19 8 ColMajor +OpMemberDecorate %19 8 MatrixStride 4 +OpMemberDecorate %19 9 Offset 56 +OpMemberDecorate %19 9 ColMajor +OpMemberDecorate %19 9 MatrixStride 8 +OpMemberDecorate %19 10 Offset 72 +OpMemberDecorate %19 10 ColMajor +OpMemberDecorate %19 10 MatrixStride 8 +OpMemberDecorate %19 11 Offset 88 +OpMemberDecorate %19 11 ColMajor +OpMemberDecorate %19 11 MatrixStride 4 +OpMemberDecorate %19 12 Offset 104 +OpMemberDecorate %19 12 ColMajor +OpMemberDecorate %19 12 MatrixStride 8 +OpMemberDecorate %19 13 Offset 128 +OpMemberDecorate %19 13 ColMajor +OpMemberDecorate %19 13 MatrixStride 8 +OpMemberDecorate %19 14 Offset 152 +OpMemberDecorate %19 14 ColMajor +OpMemberDecorate %19 14 MatrixStride 4 +OpMemberDecorate %19 15 Offset 168 +OpMemberDecorate %19 15 ColMajor +OpMemberDecorate %19 15 MatrixStride 8 +OpMemberDecorate %19 16 Offset 200 +OpMemberDecorate %19 16 ColMajor +OpMemberDecorate %19 16 MatrixStride 8 +OpDecorate %20 ArrayStride 2 +OpMemberDecorate %22 0 Offset 0 +OpMemberDecorate %23 0 Offset 0 +OpMemberDecorate %23 1 Offset 2 +OpMemberDecorate %23 2 Offset 8 +OpMemberDecorate %23 3 Offset 14 +OpMemberDecorate %23 4 Offset 16 +OpMemberDecorate %23 5 Offset 20 +OpDecorate %28 DescriptorSet 0 +OpDecorate %28 Binding 0 +OpDecorate %29 Block +OpMemberDecorate %29 0 Offset 0 +OpDecorate %31 NonWritable +OpDecorate %31 DescriptorSet 0 +OpDecorate %31 Binding 1 +OpDecorate %32 Block +OpMemberDecorate %32 0 Offset 0 +OpDecorate %34 NonWritable +OpDecorate %34 DescriptorSet 0 +OpDecorate %34 Binding 2 +OpDecorate %35 Block +OpMemberDecorate %35 0 Offset 0 +OpDecorate %37 DescriptorSet 0 +OpDecorate %37 Binding 3 +OpDecorate %38 Block +OpMemberDecorate %38 0 Offset 0 +OpDecorate %40 DescriptorSet 0 +OpDecorate %40 Binding 4 +OpDecorate %41 Block +OpMemberDecorate %41 0 Offset 0 +%2 = OpTypeVoid +%3 = OpTypeFloat 16 +%4 = OpTypeInt 32 0 +%5 = OpTypeInt 32 1 +%6 = OpTypeFloat 32 +%7 = OpTypeVector %3 2 +%8 = OpTypeVector %3 3 +%9 = OpTypeVector %3 4 +%10 = OpTypeMatrix %7 2 +%11 = OpTypeMatrix %8 2 +%12 = OpTypeMatrix %9 2 +%13 = OpTypeMatrix %7 3 +%14 = OpTypeMatrix %8 3 +%15 = OpTypeMatrix %9 3 +%16 = OpTypeMatrix %7 4 +%17 = OpTypeMatrix %8 4 +%18 = OpTypeMatrix %9 4 +%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18 +%21 = OpConstant %4 2 +%20 = OpTypeArray %3 %21 +%22 = OpTypeStruct %20 +%23 = OpTypeStruct %3 %3 %8 %3 %3 %4 +%24 = OpConstant %3 2.1524e-41 +%25 = OpConstant %3 2.7121e-41 +%27 = OpTypePointer Private %3 +%26 = OpVariable %27 Private %24 +%29 = OpTypeStruct %19 +%30 = OpTypePointer Uniform %29 +%28 = OpVariable %30 Uniform +%32 = OpTypeStruct %19 +%33 = OpTypePointer StorageBuffer %32 +%31 = OpVariable %33 StorageBuffer +%35 = OpTypeStruct %22 +%36 = OpTypePointer StorageBuffer %35 +%34 = OpVariable %36 StorageBuffer +%38 = OpTypeStruct %19 +%39 = OpTypePointer StorageBuffer %38 +%37 = OpVariable %39 StorageBuffer +%41 = OpTypeStruct %22 +%42 = OpTypePointer StorageBuffer %41 +%40 = OpVariable %42 StorageBuffer +%46 = OpTypeFunction %3 %3 +%47 = OpTypePointer Uniform %19 +%48 = OpConstant %4 0 +%50 = OpTypePointer StorageBuffer %19 +%52 = OpTypePointer StorageBuffer %22 +%56 = OpConstant %3 8.8991e-41 +%57 = OpConstant %3 2.4753e-41 +%59 = OpTypePointer Function %3 +%67 = OpTypePointer Uniform %6 +%76 = OpTypePointer Uniform %3 +%77 = OpConstant %4 3 +%84 = OpTypePointer StorageBuffer %3 +%91 = OpTypePointer StorageBuffer %7 +%92 = OpTypePointer Uniform %7 +%93 = OpConstant %4 4 +%100 = OpTypePointer StorageBuffer %8 +%101 = OpTypePointer Uniform %8 +%102 = OpConstant %4 5 +%109 = OpTypePointer StorageBuffer %9 +%110 = OpTypePointer Uniform %9 +%111 = OpConstant %4 6 +%118 = OpTypePointer StorageBuffer %10 +%119 = OpTypePointer Uniform %10 +%120 = OpConstant %4 8 +%133 = OpTypePointer StorageBuffer %11 +%134 = OpTypePointer Uniform %11 +%135 = OpConstant %4 9 +%148 = OpTypePointer StorageBuffer %12 +%149 = OpTypePointer Uniform %12 +%150 = OpConstant %4 10 +%163 = OpTypePointer StorageBuffer %13 +%164 = OpTypePointer Uniform %13 +%165 = OpConstant %4 11 +%181 = OpTypePointer StorageBuffer %14 +%182 = OpTypePointer Uniform %14 +%183 = OpConstant %4 12 +%199 = OpTypePointer StorageBuffer %15 +%200 = OpTypePointer Uniform %15 +%201 = OpConstant %4 13 +%217 = OpTypePointer StorageBuffer %16 +%218 = OpTypePointer Uniform %16 +%219 = OpConstant %4 14 +%238 = OpTypePointer StorageBuffer %17 +%239 = OpTypePointer Uniform %17 +%240 = OpConstant %4 15 +%259 = OpTypePointer StorageBuffer %18 +%260 = OpTypePointer Uniform %18 +%261 = OpConstant %4 16 +%280 = OpTypePointer StorageBuffer %20 +%319 = OpTypeVector %6 2 +%325 = OpTypeVector %6 3 +%331 = OpTypeVector %6 4 +%337 = OpTypeMatrix %319 2 +%351 = OpTypeMatrix %325 2 +%365 = OpTypeMatrix %331 2 +%379 = OpTypeMatrix %319 3 +%397 = OpTypeMatrix %325 3 +%415 = OpTypeMatrix %331 3 +%433 = OpTypeMatrix %319 4 +%455 = OpTypeMatrix %325 4 +%477 = OpTypeMatrix %331 4 +%500 = OpTypeFunction %2 +%506 = OpConstant %3 2.2959e-41 +%509 = OpConstant %4 7 +%45 = OpFunction %3 None %46 +%44 = OpFunctionParameter %3 +%43 = OpLabel +%58 = OpVariable %59 Function %25 +%49 = OpAccessChain %47 %28 %48 +%51 = OpAccessChain %50 %31 %48 +%53 = OpAccessChain %52 %34 %48 +%54 = OpAccessChain %50 %37 %48 +%55 = OpAccessChain %52 %40 %48 +OpBranch %60 +%60 = OpLabel +%61 = OpLoad %3 %58 +%62 = OpFAdd %3 %61 %56 +OpStore %58 %62 +%63 = OpLoad %3 %58 +%64 = OpFAdd %3 %63 %57 +%65 = OpLoad %3 %58 +%66 = OpFAdd %3 %65 %64 +OpStore %58 %66 +%68 = OpAccessChain %67 %49 %21 +%69 = OpLoad %6 %68 +%70 = OpLoad %3 %58 +%71 = OpFConvert %6 %70 +%72 = OpFAdd %6 %69 %71 +%73 = OpFConvert %3 %72 +%74 = OpLoad %3 %58 +%75 = OpFAdd %3 %74 %73 +OpStore %58 %75 +%78 = OpAccessChain %76 %49 %77 +%79 = OpLoad %3 %78 +%80 = OpCompositeConstruct %8 %79 %79 %79 +%81 = OpCompositeExtract %3 %80 2 +%82 = OpLoad %3 %58 +%83 = OpFAdd %3 %82 %81 +OpStore %58 %83 +%85 = OpAccessChain %76 %49 %77 +%86 = OpLoad %3 %85 +%87 = OpAccessChain %84 %51 %77 +%88 = OpLoad %3 %87 +%89 = OpFAdd %3 %86 %88 +%90 = OpAccessChain %84 %54 %77 +OpStore %90 %89 +%94 = OpAccessChain %92 %49 %93 +%95 = OpLoad %7 %94 +%96 = OpAccessChain %91 %51 %93 +%97 = OpLoad %7 %96 +%98 = OpFAdd %7 %95 %97 +%99 = OpAccessChain %91 %54 %93 +OpStore %99 %98 +%103 = OpAccessChain %101 %49 %102 +%104 = OpLoad %8 %103 +%105 = OpAccessChain %100 %51 %102 +%106 = OpLoad %8 %105 +%107 = OpFAdd %8 %104 %106 +%108 = OpAccessChain %100 %54 %102 +OpStore %108 %107 +%112 = OpAccessChain %110 %49 %111 +%113 = OpLoad %9 %112 +%114 = OpAccessChain %109 %51 %111 +%115 = OpLoad %9 %114 +%116 = OpFAdd %9 %113 %115 +%117 = OpAccessChain %109 %54 %111 +OpStore %117 %116 +%121 = OpAccessChain %119 %49 %120 +%122 = OpLoad %10 %121 +%123 = OpAccessChain %118 %51 %120 +%124 = OpLoad %10 %123 +%126 = OpCompositeExtract %7 %122 0 +%127 = OpCompositeExtract %7 %124 0 +%128 = OpFAdd %7 %126 %127 +%129 = OpCompositeExtract %7 %122 1 +%130 = OpCompositeExtract %7 %124 1 +%131 = OpFAdd %7 %129 %130 +%125 = OpCompositeConstruct %10 %128 %131 +%132 = OpAccessChain %118 %54 %120 +OpStore %132 %125 +%136 = OpAccessChain %134 %49 %135 +%137 = OpLoad %11 %136 +%138 = OpAccessChain %133 %51 %135 +%139 = OpLoad %11 %138 +%141 = OpCompositeExtract %8 %137 0 +%142 = OpCompositeExtract %8 %139 0 +%143 = OpFAdd %8 %141 %142 +%144 = OpCompositeExtract %8 %137 1 +%145 = OpCompositeExtract %8 %139 1 +%146 = OpFAdd %8 %144 %145 +%140 = OpCompositeConstruct %11 %143 %146 +%147 = OpAccessChain %133 %54 %135 +OpStore %147 %140 +%151 = OpAccessChain %149 %49 %150 +%152 = OpLoad %12 %151 +%153 = OpAccessChain %148 %51 %150 +%154 = OpLoad %12 %153 +%156 = OpCompositeExtract %9 %152 0 +%157 = OpCompositeExtract %9 %154 0 +%158 = OpFAdd %9 %156 %157 +%159 = OpCompositeExtract %9 %152 1 +%160 = OpCompositeExtract %9 %154 1 +%161 = OpFAdd %9 %159 %160 +%155 = OpCompositeConstruct %12 %158 %161 +%162 = OpAccessChain %148 %54 %150 +OpStore %162 %155 +%166 = OpAccessChain %164 %49 %165 +%167 = OpLoad %13 %166 +%168 = OpAccessChain %163 %51 %165 +%169 = OpLoad %13 %168 +%171 = OpCompositeExtract %7 %167 0 +%172 = OpCompositeExtract %7 %169 0 +%173 = OpFAdd %7 %171 %172 +%174 = OpCompositeExtract %7 %167 1 +%175 = OpCompositeExtract %7 %169 1 +%176 = OpFAdd %7 %174 %175 +%177 = OpCompositeExtract %7 %167 2 +%178 = OpCompositeExtract %7 %169 2 +%179 = OpFAdd %7 %177 %178 +%170 = OpCompositeConstruct %13 %173 %176 %179 +%180 = OpAccessChain %163 %54 %165 +OpStore %180 %170 +%184 = OpAccessChain %182 %49 %183 +%185 = OpLoad %14 %184 +%186 = OpAccessChain %181 %51 %183 +%187 = OpLoad %14 %186 +%189 = OpCompositeExtract %8 %185 0 +%190 = OpCompositeExtract %8 %187 0 +%191 = OpFAdd %8 %189 %190 +%192 = OpCompositeExtract %8 %185 1 +%193 = OpCompositeExtract %8 %187 1 +%194 = OpFAdd %8 %192 %193 +%195 = OpCompositeExtract %8 %185 2 +%196 = OpCompositeExtract %8 %187 2 +%197 = OpFAdd %8 %195 %196 +%188 = OpCompositeConstruct %14 %191 %194 %197 +%198 = OpAccessChain %181 %54 %183 +OpStore %198 %188 +%202 = OpAccessChain %200 %49 %201 +%203 = OpLoad %15 %202 +%204 = OpAccessChain %199 %51 %201 +%205 = OpLoad %15 %204 +%207 = OpCompositeExtract %9 %203 0 +%208 = OpCompositeExtract %9 %205 0 +%209 = OpFAdd %9 %207 %208 +%210 = OpCompositeExtract %9 %203 1 +%211 = OpCompositeExtract %9 %205 1 +%212 = OpFAdd %9 %210 %211 +%213 = OpCompositeExtract %9 %203 2 +%214 = OpCompositeExtract %9 %205 2 +%215 = OpFAdd %9 %213 %214 +%206 = OpCompositeConstruct %15 %209 %212 %215 +%216 = OpAccessChain %199 %54 %201 +OpStore %216 %206 +%220 = OpAccessChain %218 %49 %219 +%221 = OpLoad %16 %220 +%222 = OpAccessChain %217 %51 %219 +%223 = OpLoad %16 %222 +%225 = OpCompositeExtract %7 %221 0 +%226 = OpCompositeExtract %7 %223 0 +%227 = OpFAdd %7 %225 %226 +%228 = OpCompositeExtract %7 %221 1 +%229 = OpCompositeExtract %7 %223 1 +%230 = OpFAdd %7 %228 %229 +%231 = OpCompositeExtract %7 %221 2 +%232 = OpCompositeExtract %7 %223 2 +%233 = OpFAdd %7 %231 %232 +%234 = OpCompositeExtract %7 %221 3 +%235 = OpCompositeExtract %7 %223 3 +%236 = OpFAdd %7 %234 %235 +%224 = OpCompositeConstruct %16 %227 %230 %233 %236 +%237 = OpAccessChain %217 %54 %219 +OpStore %237 %224 +%241 = OpAccessChain %239 %49 %240 +%242 = OpLoad %17 %241 +%243 = OpAccessChain %238 %51 %240 +%244 = OpLoad %17 %243 +%246 = OpCompositeExtract %8 %242 0 +%247 = OpCompositeExtract %8 %244 0 +%248 = OpFAdd %8 %246 %247 +%249 = OpCompositeExtract %8 %242 1 +%250 = OpCompositeExtract %8 %244 1 +%251 = OpFAdd %8 %249 %250 +%252 = OpCompositeExtract %8 %242 2 +%253 = OpCompositeExtract %8 %244 2 +%254 = OpFAdd %8 %252 %253 +%255 = OpCompositeExtract %8 %242 3 +%256 = OpCompositeExtract %8 %244 3 +%257 = OpFAdd %8 %255 %256 +%245 = OpCompositeConstruct %17 %248 %251 %254 %257 +%258 = OpAccessChain %238 %54 %240 +OpStore %258 %245 +%262 = OpAccessChain %260 %49 %261 +%263 = OpLoad %18 %262 +%264 = OpAccessChain %259 %51 %261 +%265 = OpLoad %18 %264 +%267 = OpCompositeExtract %9 %263 0 +%268 = OpCompositeExtract %9 %265 0 +%269 = OpFAdd %9 %267 %268 +%270 = OpCompositeExtract %9 %263 1 +%271 = OpCompositeExtract %9 %265 1 +%272 = OpFAdd %9 %270 %271 +%273 = OpCompositeExtract %9 %263 2 +%274 = OpCompositeExtract %9 %265 2 +%275 = OpFAdd %9 %273 %274 +%276 = OpCompositeExtract %9 %263 3 +%277 = OpCompositeExtract %9 %265 3 +%278 = OpFAdd %9 %276 %277 +%266 = OpCompositeConstruct %18 %269 %272 %275 %278 +%279 = OpAccessChain %259 %54 %261 +OpStore %279 %266 +%281 = OpAccessChain %280 %53 %48 +%282 = OpLoad %20 %281 +%283 = OpAccessChain %280 %55 %48 +OpStore %283 %282 +%284 = OpLoad %3 %58 +%285 = OpExtInst %3 %1 FAbs %284 +%286 = OpLoad %3 %58 +%287 = OpFAdd %3 %286 %285 +OpStore %58 %287 +%288 = OpLoad %3 %58 +%289 = OpLoad %3 %58 +%290 = OpLoad %3 %58 +%291 = OpExtInst %3 %1 FClamp %288 %289 %290 +%292 = OpLoad %3 %58 +%293 = OpFAdd %3 %292 %291 +OpStore %58 %293 +%294 = OpLoad %3 %58 +%295 = OpCompositeConstruct %7 %294 %294 +%296 = OpLoad %3 %58 +%297 = OpCompositeConstruct %7 %296 %296 +%298 = OpDot %3 %295 %297 +%299 = OpLoad %3 %58 +%300 = OpFAdd %3 %299 %298 +OpStore %58 %300 +%301 = OpLoad %3 %58 +%302 = OpLoad %3 %58 +%303 = OpExtInst %3 %1 FMax %301 %302 +%304 = OpLoad %3 %58 +%305 = OpFAdd %3 %304 %303 +OpStore %58 %305 +%306 = OpLoad %3 %58 +%307 = OpLoad %3 %58 +%308 = OpExtInst %3 %1 FMin %306 %307 +%309 = OpLoad %3 %58 +%310 = OpFAdd %3 %309 %308 +OpStore %58 %310 +%311 = OpLoad %3 %58 +%312 = OpExtInst %3 %1 FSign %311 +%313 = OpLoad %3 %58 +%314 = OpFAdd %3 %313 %312 +OpStore %58 %314 +%315 = OpLoad %3 %58 +%316 = OpFAdd %3 %315 %24 +OpStore %58 %316 +%317 = OpAccessChain %92 %49 %93 +%318 = OpLoad %7 %317 +%320 = OpFConvert %319 %318 +%321 = OpFConvert %7 %320 +%322 = OpAccessChain %91 %54 %93 +OpStore %322 %321 +%323 = OpAccessChain %101 %49 %102 +%324 = OpLoad %8 %323 +%326 = OpFConvert %325 %324 +%327 = OpFConvert %8 %326 +%328 = OpAccessChain %100 %54 %102 +OpStore %328 %327 +%329 = OpAccessChain %110 %49 %111 +%330 = OpLoad %9 %329 +%332 = OpFConvert %331 %330 +%333 = OpFConvert %9 %332 +%334 = OpAccessChain %109 %54 %111 +OpStore %334 %333 +%335 = OpAccessChain %119 %49 %120 +%336 = OpLoad %10 %335 +%338 = OpCompositeExtract %7 %336 0 +%339 = OpFConvert %319 %338 +%340 = OpCompositeExtract %7 %336 1 +%341 = OpFConvert %319 %340 +%342 = OpCompositeConstruct %337 %339 %341 +%343 = OpCompositeExtract %319 %342 0 +%344 = OpFConvert %7 %343 +%345 = OpCompositeExtract %319 %342 1 +%346 = OpFConvert %7 %345 +%347 = OpCompositeConstruct %10 %344 %346 +%348 = OpAccessChain %118 %54 %120 +OpStore %348 %347 +%349 = OpAccessChain %134 %49 %135 +%350 = OpLoad %11 %349 +%352 = OpCompositeExtract %8 %350 0 +%353 = OpFConvert %325 %352 +%354 = OpCompositeExtract %8 %350 1 +%355 = OpFConvert %325 %354 +%356 = OpCompositeConstruct %351 %353 %355 +%357 = OpCompositeExtract %325 %356 0 +%358 = OpFConvert %8 %357 +%359 = OpCompositeExtract %325 %356 1 +%360 = OpFConvert %8 %359 +%361 = OpCompositeConstruct %11 %358 %360 +%362 = OpAccessChain %133 %54 %135 +OpStore %362 %361 +%363 = OpAccessChain %149 %49 %150 +%364 = OpLoad %12 %363 +%366 = OpCompositeExtract %9 %364 0 +%367 = OpFConvert %331 %366 +%368 = OpCompositeExtract %9 %364 1 +%369 = OpFConvert %331 %368 +%370 = OpCompositeConstruct %365 %367 %369 +%371 = OpCompositeExtract %331 %370 0 +%372 = OpFConvert %9 %371 +%373 = OpCompositeExtract %331 %370 1 +%374 = OpFConvert %9 %373 +%375 = OpCompositeConstruct %12 %372 %374 +%376 = OpAccessChain %148 %54 %150 +OpStore %376 %375 +%377 = OpAccessChain %164 %49 %165 +%378 = OpLoad %13 %377 +%380 = OpCompositeExtract %7 %378 0 +%381 = OpFConvert %319 %380 +%382 = OpCompositeExtract %7 %378 1 +%383 = OpFConvert %319 %382 +%384 = OpCompositeExtract %7 %378 2 +%385 = OpFConvert %319 %384 +%386 = OpCompositeConstruct %379 %381 %383 %385 +%387 = OpCompositeExtract %319 %386 0 +%388 = OpFConvert %7 %387 +%389 = OpCompositeExtract %319 %386 1 +%390 = OpFConvert %7 %389 +%391 = OpCompositeExtract %319 %386 2 +%392 = OpFConvert %7 %391 +%393 = OpCompositeConstruct %13 %388 %390 %392 +%394 = OpAccessChain %163 %54 %165 +OpStore %394 %393 +%395 = OpAccessChain %182 %49 %183 +%396 = OpLoad %14 %395 +%398 = OpCompositeExtract %8 %396 0 +%399 = OpFConvert %325 %398 +%400 = OpCompositeExtract %8 %396 1 +%401 = OpFConvert %325 %400 +%402 = OpCompositeExtract %8 %396 2 +%403 = OpFConvert %325 %402 +%404 = OpCompositeConstruct %397 %399 %401 %403 +%405 = OpCompositeExtract %325 %404 0 +%406 = OpFConvert %8 %405 +%407 = OpCompositeExtract %325 %404 1 +%408 = OpFConvert %8 %407 +%409 = OpCompositeExtract %325 %404 2 +%410 = OpFConvert %8 %409 +%411 = OpCompositeConstruct %14 %406 %408 %410 +%412 = OpAccessChain %181 %54 %183 +OpStore %412 %411 +%413 = OpAccessChain %200 %49 %201 +%414 = OpLoad %15 %413 +%416 = OpCompositeExtract %9 %414 0 +%417 = OpFConvert %331 %416 +%418 = OpCompositeExtract %9 %414 1 +%419 = OpFConvert %331 %418 +%420 = OpCompositeExtract %9 %414 2 +%421 = OpFConvert %331 %420 +%422 = OpCompositeConstruct %415 %417 %419 %421 +%423 = OpCompositeExtract %331 %422 0 +%424 = OpFConvert %9 %423 +%425 = OpCompositeExtract %331 %422 1 +%426 = OpFConvert %9 %425 +%427 = OpCompositeExtract %331 %422 2 +%428 = OpFConvert %9 %427 +%429 = OpCompositeConstruct %15 %424 %426 %428 +%430 = OpAccessChain %199 %54 %201 +OpStore %430 %429 +%431 = OpAccessChain %218 %49 %219 +%432 = OpLoad %16 %431 +%434 = OpCompositeExtract %7 %432 0 +%435 = OpFConvert %319 %434 +%436 = OpCompositeExtract %7 %432 1 +%437 = OpFConvert %319 %436 +%438 = OpCompositeExtract %7 %432 2 +%439 = OpFConvert %319 %438 +%440 = OpCompositeExtract %7 %432 3 +%441 = OpFConvert %319 %440 +%442 = OpCompositeConstruct %433 %435 %437 %439 %441 +%443 = OpCompositeExtract %319 %442 0 +%444 = OpFConvert %7 %443 +%445 = OpCompositeExtract %319 %442 1 +%446 = OpFConvert %7 %445 +%447 = OpCompositeExtract %319 %442 2 +%448 = OpFConvert %7 %447 +%449 = OpCompositeExtract %319 %442 3 +%450 = OpFConvert %7 %449 +%451 = OpCompositeConstruct %16 %444 %446 %448 %450 +%452 = OpAccessChain %217 %54 %219 +OpStore %452 %451 +%453 = OpAccessChain %239 %49 %240 +%454 = OpLoad %17 %453 +%456 = OpCompositeExtract %8 %454 0 +%457 = OpFConvert %325 %456 +%458 = OpCompositeExtract %8 %454 1 +%459 = OpFConvert %325 %458 +%460 = OpCompositeExtract %8 %454 2 +%461 = OpFConvert %325 %460 +%462 = OpCompositeExtract %8 %454 3 +%463 = OpFConvert %325 %462 +%464 = OpCompositeConstruct %455 %457 %459 %461 %463 +%465 = OpCompositeExtract %325 %464 0 +%466 = OpFConvert %8 %465 +%467 = OpCompositeExtract %325 %464 1 +%468 = OpFConvert %8 %467 +%469 = OpCompositeExtract %325 %464 2 +%470 = OpFConvert %8 %469 +%471 = OpCompositeExtract %325 %464 3 +%472 = OpFConvert %8 %471 +%473 = OpCompositeConstruct %17 %466 %468 %470 %472 +%474 = OpAccessChain %238 %54 %240 +OpStore %474 %473 +%475 = OpAccessChain %260 %49 %261 +%476 = OpLoad %18 %475 +%478 = OpCompositeExtract %9 %476 0 +%479 = OpFConvert %331 %478 +%480 = OpCompositeExtract %9 %476 1 +%481 = OpFConvert %331 %480 +%482 = OpCompositeExtract %9 %476 2 +%483 = OpFConvert %331 %482 +%484 = OpCompositeExtract %9 %476 3 +%485 = OpFConvert %331 %484 +%486 = OpCompositeConstruct %477 %479 %481 %483 %485 +%487 = OpCompositeExtract %331 %486 0 +%488 = OpFConvert %9 %487 +%489 = OpCompositeExtract %331 %486 1 +%490 = OpFConvert %9 %489 +%491 = OpCompositeExtract %331 %486 2 +%492 = OpFConvert %9 %491 +%493 = OpCompositeExtract %331 %486 3 +%494 = OpFConvert %9 %493 +%495 = OpCompositeConstruct %18 %488 %490 %492 %494 +%496 = OpAccessChain %259 %54 %261 +OpStore %496 %495 +%497 = OpLoad %3 %58 +OpReturnValue %497 +OpFunctionEnd +%499 = OpFunction %2 None %500 +%498 = OpLabel +%501 = OpAccessChain %47 %28 %48 +%502 = OpAccessChain %50 %31 %48 +%503 = OpAccessChain %52 %34 %48 +%504 = OpAccessChain %50 %37 %48 +%505 = OpAccessChain %52 %40 %48 +OpBranch %507 +%507 = OpLabel +%508 = OpFunctionCall %3 %45 %506 +%510 = OpAccessChain %84 %504 %509 +OpStore %510 %508 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/image.spvasm b/naga/tests/out/spv/image.spvasm index 7a34bed86e0..94d8521267e 100644 --- a/naga/tests/out/spv/image.spvasm +++ b/naga/tests/out/spv/image.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 546 +; Bound: 545 OpCapability Shader OpCapability Image1D OpCapability Sampled1D @@ -10,19 +10,19 @@ OpCapability ImageQuery %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %99 "main" %96 -OpEntryPoint GLCompute %190 "depth_load" %188 -OpEntryPoint Vertex %210 "queries" %208 -OpEntryPoint Vertex %262 "levels_queries" %261 -OpEntryPoint Fragment %293 "texture_sample" %292 -OpEntryPoint Fragment %439 "texture_sample_comparison" %437 -OpEntryPoint Fragment %495 "gather" %494 -OpEntryPoint Fragment %529 "depth_no_comparison" %528 +OpEntryPoint GLCompute %189 "depth_load" %187 +OpEntryPoint Vertex %209 "queries" %207 +OpEntryPoint Vertex %261 "levels_queries" %260 +OpEntryPoint Fragment %292 "texture_sample" %291 +OpEntryPoint Fragment %438 "texture_sample_comparison" %436 +OpEntryPoint Fragment %494 "gather" %493 +OpEntryPoint Fragment %528 "depth_no_comparison" %527 OpExecutionMode %99 LocalSize 16 1 1 -OpExecutionMode %190 LocalSize 16 1 1 -OpExecutionMode %293 OriginUpperLeft -OpExecutionMode %439 OriginUpperLeft -OpExecutionMode %495 OriginUpperLeft -OpExecutionMode %529 OriginUpperLeft +OpExecutionMode %189 LocalSize 16 1 1 +OpExecutionMode %292 OriginUpperLeft +OpExecutionMode %438 OriginUpperLeft +OpExecutionMode %494 OriginUpperLeft +OpExecutionMode %528 OriginUpperLeft OpName %31 "image_mipmapped_src" OpName %33 "image_multisampled_src" OpName %35 "image_depth_multisampled_src" @@ -50,16 +50,16 @@ OpName %76 "lhs" OpName %77 "rhs" OpName %96 "local_id" OpName %99 "main" -OpName %188 "local_id" -OpName %190 "depth_load" -OpName %210 "queries" -OpName %262 "levels_queries" -OpName %293 "texture_sample" -OpName %306 "a" -OpName %439 "texture_sample_comparison" -OpName %444 "a" -OpName %495 "gather" -OpName %529 "depth_no_comparison" +OpName %187 "local_id" +OpName %189 "depth_load" +OpName %209 "queries" +OpName %261 "levels_queries" +OpName %292 "texture_sample" +OpName %305 "a" +OpName %438 "texture_sample_comparison" +OpName %443 "a" +OpName %494 "gather" +OpName %528 "depth_no_comparison" OpDecorate %31 DescriptorSet 0 OpDecorate %31 Binding 0 OpDecorate %33 DescriptorSet 0 @@ -108,13 +108,13 @@ OpDecorate %70 Binding 3 OpDecorate %72 DescriptorSet 1 OpDecorate %72 Binding 4 OpDecorate %96 BuiltIn LocalInvocationId -OpDecorate %188 BuiltIn LocalInvocationId -OpDecorate %208 BuiltIn Position -OpDecorate %261 BuiltIn Position -OpDecorate %292 Location 0 -OpDecorate %437 Location 0 -OpDecorate %494 Location 0 -OpDecorate %528 Location 0 +OpDecorate %187 BuiltIn LocalInvocationId +OpDecorate %207 BuiltIn Position +OpDecorate %260 BuiltIn Position +OpDecorate %291 Location 0 +OpDecorate %436 Location 0 +OpDecorate %493 Location 0 +OpDecorate %527 Location 0 %2 = OpTypeVoid %4 = OpTypeInt 32 0 %3 = OpTypeImage %4 2D 0 0 0 1 Unknown @@ -206,40 +206,40 @@ OpDecorate %528 Location 0 %111 = OpTypeVector %4 2 %119 = OpTypeVector %4 4 %130 = OpTypeVector %14 3 -%188 = OpVariable %97 Input -%209 = OpTypePointer Output %23 -%208 = OpVariable %209 Output -%219 = OpConstant %4 0 -%261 = OpVariable %209 Output -%292 = OpVariable %209 Output -%299 = OpConstant %7 0.5 -%300 = OpTypeVector %7 2 -%301 = OpConstantComposite %300 %299 %299 -%302 = OpTypeVector %7 3 -%303 = OpConstantComposite %302 %299 %299 %299 -%304 = OpConstant %7 2.3 -%305 = OpConstant %7 2.0 -%307 = OpTypePointer Function %23 -%308 = OpConstantNull %23 -%311 = OpTypeSampledImage %15 -%316 = OpTypeSampledImage %16 -%337 = OpTypeSampledImage %18 -%398 = OpTypeSampledImage %20 -%438 = OpTypePointer Output %7 -%437 = OpVariable %438 Output -%445 = OpTypePointer Function %7 -%446 = OpConstantNull %7 -%448 = OpTypeSampledImage %25 -%453 = OpTypeSampledImage %26 -%466 = OpTypeSampledImage %27 -%473 = OpConstant %7 0.0 -%494 = OpVariable %209 Output -%505 = OpConstant %4 1 -%508 = OpConstant %4 3 -%513 = OpTypeSampledImage %3 -%516 = OpTypeVector %14 4 -%517 = OpTypeSampledImage %17 -%528 = OpVariable %209 Output +%187 = OpVariable %97 Input +%208 = OpTypePointer Output %23 +%207 = OpVariable %208 Output +%218 = OpConstant %4 0 +%260 = OpVariable %208 Output +%291 = OpVariable %208 Output +%298 = OpConstant %7 0.5 +%299 = OpTypeVector %7 2 +%300 = OpConstantComposite %299 %298 %298 +%301 = OpTypeVector %7 3 +%302 = OpConstantComposite %301 %298 %298 %298 +%303 = OpConstant %7 2.3 +%304 = OpConstant %7 2.0 +%306 = OpTypePointer Function %23 +%307 = OpConstantNull %23 +%310 = OpTypeSampledImage %15 +%315 = OpTypeSampledImage %16 +%336 = OpTypeSampledImage %18 +%397 = OpTypeSampledImage %20 +%437 = OpTypePointer Output %7 +%436 = OpVariable %437 Output +%444 = OpTypePointer Function %7 +%445 = OpConstantNull %7 +%447 = OpTypeSampledImage %25 +%452 = OpTypeSampledImage %26 +%465 = OpTypeSampledImage %27 +%472 = OpConstant %7 0.0 +%493 = OpVariable %208 Output +%504 = OpConstant %4 1 +%507 = OpConstant %4 3 +%512 = OpTypeSampledImage %3 +%515 = OpTypeVector %14 4 +%516 = OpTypeSampledImage %17 +%527 = OpVariable %208 Output %74 = OpFunction %13 None %75 %76 = OpFunctionParameter %13 %77 = OpFunctionParameter %13 @@ -322,404 +322,404 @@ OpBranch %110 %169 = OpCompositeConstruct %12 %162 %168 %170 = OpImageFetch %119 %104 %169 Lod %167 %171 = OpCompositeExtract %4 %98 0 -%173 = OpCompositeExtract %4 %98 2 -%174 = OpBitcast %14 %173 -%175 = OpImageFetch %119 %105 %171 Lod %174 -%176 = OpCompositeExtract %14 %116 0 -%177 = OpIAdd %119 %120 %123 -%178 = OpIAdd %119 %177 %124 -%179 = OpIAdd %119 %178 %132 -%180 = OpIAdd %119 %179 %139 -OpImageWrite %106 %176 %180 -%181 = OpCompositeExtract %14 %116 0 -%182 = OpBitcast %4 %181 -%183 = OpIAdd %119 %148 %152 -%184 = OpIAdd %119 %183 %154 -%185 = OpIAdd %119 %184 %161 -%186 = OpIAdd %119 %185 %170 -OpImageWrite %106 %182 %186 +%172 = OpCompositeExtract %4 %98 2 +%173 = OpBitcast %14 %172 +%174 = OpImageFetch %119 %105 %171 Lod %173 +%175 = OpCompositeExtract %14 %116 0 +%176 = OpIAdd %119 %120 %123 +%177 = OpIAdd %119 %176 %124 +%178 = OpIAdd %119 %177 %132 +%179 = OpIAdd %119 %178 %139 +OpImageWrite %106 %175 %179 +%180 = OpCompositeExtract %14 %116 0 +%181 = OpBitcast %4 %180 +%182 = OpIAdd %119 %148 %152 +%183 = OpIAdd %119 %182 %154 +%184 = OpIAdd %119 %183 %161 +%185 = OpIAdd %119 %184 %170 +OpImageWrite %106 %181 %185 OpReturn OpFunctionEnd -%190 = OpFunction %2 None %100 -%187 = OpLabel -%189 = OpLoad %12 %188 -%191 = OpLoad %6 %35 -%192 = OpLoad %8 %37 -%193 = OpLoad %10 %45 -OpBranch %194 -%194 = OpLabel -%195 = OpImageQuerySize %111 %192 -%196 = OpVectorShuffle %111 %189 %189 0 1 -%197 = OpIMul %111 %195 %196 -%198 = OpBitcast %13 %197 -%199 = OpFunctionCall %13 %74 %198 %109 -%200 = OpCompositeExtract %4 %189 2 -%201 = OpBitcast %14 %200 -%202 = OpImageFetch %23 %191 %199 Sample %201 -%203 = OpCompositeExtract %7 %202 0 -%204 = OpCompositeExtract %14 %199 0 -%205 = OpConvertFToU %4 %203 -%206 = OpCompositeConstruct %119 %205 %205 %205 %205 -OpImageWrite %193 %204 %206 +%189 = OpFunction %2 None %100 +%186 = OpLabel +%188 = OpLoad %12 %187 +%190 = OpLoad %6 %35 +%191 = OpLoad %8 %37 +%192 = OpLoad %10 %45 +OpBranch %193 +%193 = OpLabel +%194 = OpImageQuerySize %111 %191 +%195 = OpVectorShuffle %111 %188 %188 0 1 +%196 = OpIMul %111 %194 %195 +%197 = OpBitcast %13 %196 +%198 = OpFunctionCall %13 %74 %197 %109 +%199 = OpCompositeExtract %4 %188 2 +%200 = OpBitcast %14 %199 +%201 = OpImageFetch %23 %190 %198 Sample %200 +%202 = OpCompositeExtract %7 %201 0 +%203 = OpCompositeExtract %14 %198 0 +%204 = OpConvertFToU %4 %202 +%205 = OpCompositeConstruct %119 %204 %204 %204 %204 +OpImageWrite %192 %203 %205 OpReturn OpFunctionEnd -%210 = OpFunction %2 None %100 -%207 = OpLabel -%211 = OpLoad %15 %47 -%212 = OpLoad %16 %49 -%213 = OpLoad %18 %54 -%214 = OpLoad %19 %56 -%215 = OpLoad %20 %58 -%216 = OpLoad %21 %60 -%217 = OpLoad %22 %62 -OpBranch %218 -%218 = OpLabel -%220 = OpImageQuerySizeLod %4 %211 %219 -%221 = OpBitcast %14 %220 -%222 = OpImageQuerySizeLod %4 %211 %221 -%223 = OpImageQuerySizeLod %111 %212 %219 -%224 = OpImageQuerySizeLod %111 %212 %29 -%225 = OpImageQuerySizeLod %12 %213 %219 -%226 = OpVectorShuffle %111 %225 %225 0 1 -%227 = OpImageQuerySizeLod %12 %213 %29 -%228 = OpVectorShuffle %111 %227 %227 0 1 -%229 = OpImageQuerySizeLod %111 %214 %219 -%230 = OpImageQuerySizeLod %111 %214 %29 -%231 = OpImageQuerySizeLod %12 %215 %219 -%232 = OpVectorShuffle %111 %231 %231 0 0 -%233 = OpImageQuerySizeLod %12 %215 %29 -%234 = OpVectorShuffle %111 %233 %233 0 0 -%235 = OpImageQuerySizeLod %12 %216 %219 -%236 = OpImageQuerySizeLod %12 %216 %29 -%237 = OpImageQuerySize %111 %217 -%238 = OpCompositeExtract %4 %223 1 -%239 = OpIAdd %4 %220 %238 -%240 = OpCompositeExtract %4 %224 1 -%241 = OpIAdd %4 %239 %240 -%242 = OpCompositeExtract %4 %226 1 -%243 = OpIAdd %4 %241 %242 -%244 = OpCompositeExtract %4 %228 1 -%245 = OpIAdd %4 %243 %244 -%246 = OpCompositeExtract %4 %229 1 -%247 = OpIAdd %4 %245 %246 -%248 = OpCompositeExtract %4 %230 1 -%249 = OpIAdd %4 %247 %248 -%250 = OpCompositeExtract %4 %232 1 -%251 = OpIAdd %4 %249 %250 -%252 = OpCompositeExtract %4 %234 1 -%253 = OpIAdd %4 %251 %252 -%254 = OpCompositeExtract %4 %235 2 -%255 = OpIAdd %4 %253 %254 -%256 = OpCompositeExtract %4 %236 2 -%257 = OpIAdd %4 %255 %256 -%258 = OpConvertUToF %7 %257 -%259 = OpCompositeConstruct %23 %258 %258 %258 %258 -OpStore %208 %259 +%209 = OpFunction %2 None %100 +%206 = OpLabel +%210 = OpLoad %15 %47 +%211 = OpLoad %16 %49 +%212 = OpLoad %18 %54 +%213 = OpLoad %19 %56 +%214 = OpLoad %20 %58 +%215 = OpLoad %21 %60 +%216 = OpLoad %22 %62 +OpBranch %217 +%217 = OpLabel +%219 = OpImageQuerySizeLod %4 %210 %218 +%220 = OpBitcast %14 %219 +%221 = OpImageQuerySizeLod %4 %210 %220 +%222 = OpImageQuerySizeLod %111 %211 %218 +%223 = OpImageQuerySizeLod %111 %211 %29 +%224 = OpImageQuerySizeLod %12 %212 %218 +%225 = OpVectorShuffle %111 %224 %224 0 1 +%226 = OpImageQuerySizeLod %12 %212 %29 +%227 = OpVectorShuffle %111 %226 %226 0 1 +%228 = OpImageQuerySizeLod %111 %213 %218 +%229 = OpImageQuerySizeLod %111 %213 %29 +%230 = OpImageQuerySizeLod %12 %214 %218 +%231 = OpVectorShuffle %111 %230 %230 0 0 +%232 = OpImageQuerySizeLod %12 %214 %29 +%233 = OpVectorShuffle %111 %232 %232 0 0 +%234 = OpImageQuerySizeLod %12 %215 %218 +%235 = OpImageQuerySizeLod %12 %215 %29 +%236 = OpImageQuerySize %111 %216 +%237 = OpCompositeExtract %4 %222 1 +%238 = OpIAdd %4 %219 %237 +%239 = OpCompositeExtract %4 %223 1 +%240 = OpIAdd %4 %238 %239 +%241 = OpCompositeExtract %4 %225 1 +%242 = OpIAdd %4 %240 %241 +%243 = OpCompositeExtract %4 %227 1 +%244 = OpIAdd %4 %242 %243 +%245 = OpCompositeExtract %4 %228 1 +%246 = OpIAdd %4 %244 %245 +%247 = OpCompositeExtract %4 %229 1 +%248 = OpIAdd %4 %246 %247 +%249 = OpCompositeExtract %4 %231 1 +%250 = OpIAdd %4 %248 %249 +%251 = OpCompositeExtract %4 %233 1 +%252 = OpIAdd %4 %250 %251 +%253 = OpCompositeExtract %4 %234 2 +%254 = OpIAdd %4 %252 %253 +%255 = OpCompositeExtract %4 %235 2 +%256 = OpIAdd %4 %254 %255 +%257 = OpConvertUToF %7 %256 +%258 = OpCompositeConstruct %23 %257 %257 %257 %257 +OpStore %207 %258 OpReturn OpFunctionEnd -%262 = OpFunction %2 None %100 -%260 = OpLabel -%263 = OpLoad %16 %49 -%264 = OpLoad %18 %54 -%265 = OpLoad %19 %56 -%266 = OpLoad %20 %58 -%267 = OpLoad %21 %60 -%268 = OpLoad %22 %62 -OpBranch %269 -%269 = OpLabel -%270 = OpImageQueryLevels %4 %263 -%271 = OpImageQuerySizeLod %12 %264 %219 -%272 = OpCompositeExtract %4 %271 2 -%273 = OpImageQueryLevels %4 %264 -%274 = OpImageQuerySizeLod %12 %264 %219 -%275 = OpCompositeExtract %4 %274 2 +%261 = OpFunction %2 None %100 +%259 = OpLabel +%262 = OpLoad %16 %49 +%263 = OpLoad %18 %54 +%264 = OpLoad %19 %56 +%265 = OpLoad %20 %58 +%266 = OpLoad %21 %60 +%267 = OpLoad %22 %62 +OpBranch %268 +%268 = OpLabel +%269 = OpImageQueryLevels %4 %262 +%270 = OpImageQuerySizeLod %12 %263 %218 +%271 = OpCompositeExtract %4 %270 2 +%272 = OpImageQueryLevels %4 %263 +%273 = OpImageQuerySizeLod %12 %263 %218 +%274 = OpCompositeExtract %4 %273 2 +%275 = OpImageQueryLevels %4 %264 %276 = OpImageQueryLevels %4 %265 -%277 = OpImageQueryLevels %4 %266 -%278 = OpImageQuerySizeLod %12 %266 %219 -%279 = OpCompositeExtract %4 %278 2 -%280 = OpImageQueryLevels %4 %267 -%281 = OpImageQuerySamples %4 %268 -%282 = OpIAdd %4 %272 %279 -%283 = OpIAdd %4 %282 %281 -%284 = OpIAdd %4 %283 %270 -%285 = OpIAdd %4 %284 %273 -%286 = OpIAdd %4 %285 %280 +%277 = OpImageQuerySizeLod %12 %265 %218 +%278 = OpCompositeExtract %4 %277 2 +%279 = OpImageQueryLevels %4 %266 +%280 = OpImageQuerySamples %4 %267 +%281 = OpIAdd %4 %271 %278 +%282 = OpIAdd %4 %281 %280 +%283 = OpIAdd %4 %282 %269 +%284 = OpIAdd %4 %283 %272 +%285 = OpIAdd %4 %284 %279 +%286 = OpIAdd %4 %285 %275 %287 = OpIAdd %4 %286 %276 -%288 = OpIAdd %4 %287 %277 -%289 = OpConvertUToF %7 %288 -%290 = OpCompositeConstruct %23 %289 %289 %289 %289 -OpStore %261 %290 +%288 = OpConvertUToF %7 %287 +%289 = OpCompositeConstruct %23 %288 %288 %288 %288 +OpStore %260 %289 OpReturn OpFunctionEnd -%293 = OpFunction %2 None %100 -%291 = OpLabel -%306 = OpVariable %307 Function %308 -%294 = OpLoad %15 %47 -%295 = OpLoad %16 %49 -%296 = OpLoad %18 %54 -%297 = OpLoad %20 %58 -%298 = OpLoad %24 %64 -OpBranch %309 -%309 = OpLabel -%310 = OpCompositeExtract %7 %301 0 -%312 = OpSampledImage %311 %294 %298 -%313 = OpImageSampleImplicitLod %23 %312 %310 -%314 = OpLoad %23 %306 -%315 = OpFAdd %23 %314 %313 -OpStore %306 %315 -%317 = OpSampledImage %316 %295 %298 -%318 = OpImageSampleImplicitLod %23 %317 %301 -%319 = OpLoad %23 %306 -%320 = OpFAdd %23 %319 %318 -OpStore %306 %320 -%321 = OpSampledImage %316 %295 %298 -%322 = OpImageSampleImplicitLod %23 %321 %301 ConstOffset %30 -%323 = OpLoad %23 %306 -%324 = OpFAdd %23 %323 %322 -OpStore %306 %324 -%325 = OpSampledImage %316 %295 %298 -%326 = OpImageSampleExplicitLod %23 %325 %301 Lod %304 -%327 = OpLoad %23 %306 -%328 = OpFAdd %23 %327 %326 -OpStore %306 %328 -%329 = OpSampledImage %316 %295 %298 -%330 = OpImageSampleExplicitLod %23 %329 %301 Lod|ConstOffset %304 %30 -%331 = OpLoad %23 %306 -%332 = OpFAdd %23 %331 %330 -OpStore %306 %332 -%333 = OpSampledImage %316 %295 %298 -%334 = OpImageSampleImplicitLod %23 %333 %301 Bias|ConstOffset %305 %30 -%335 = OpLoad %23 %306 -%336 = OpFAdd %23 %335 %334 -OpStore %306 %336 -%338 = OpConvertUToF %7 %219 -%339 = OpCompositeConstruct %302 %301 %338 -%340 = OpSampledImage %337 %296 %298 -%341 = OpImageSampleImplicitLod %23 %340 %339 -%342 = OpLoad %23 %306 -%343 = OpFAdd %23 %342 %341 -OpStore %306 %343 -%344 = OpConvertUToF %7 %219 -%345 = OpCompositeConstruct %302 %301 %344 -%346 = OpSampledImage %337 %296 %298 -%347 = OpImageSampleImplicitLod %23 %346 %345 ConstOffset %30 -%348 = OpLoad %23 %306 -%349 = OpFAdd %23 %348 %347 -OpStore %306 %349 -%350 = OpConvertUToF %7 %219 -%351 = OpCompositeConstruct %302 %301 %350 -%352 = OpSampledImage %337 %296 %298 -%353 = OpImageSampleExplicitLod %23 %352 %351 Lod %304 -%354 = OpLoad %23 %306 -%355 = OpFAdd %23 %354 %353 -OpStore %306 %355 -%356 = OpConvertUToF %7 %219 -%357 = OpCompositeConstruct %302 %301 %356 -%358 = OpSampledImage %337 %296 %298 -%359 = OpImageSampleExplicitLod %23 %358 %357 Lod|ConstOffset %304 %30 -%360 = OpLoad %23 %306 -%361 = OpFAdd %23 %360 %359 -OpStore %306 %361 -%362 = OpConvertUToF %7 %219 -%363 = OpCompositeConstruct %302 %301 %362 -%364 = OpSampledImage %337 %296 %298 -%365 = OpImageSampleImplicitLod %23 %364 %363 Bias|ConstOffset %305 %30 -%366 = OpLoad %23 %306 -%367 = OpFAdd %23 %366 %365 -OpStore %306 %367 -%368 = OpConvertSToF %7 %81 -%369 = OpCompositeConstruct %302 %301 %368 -%370 = OpSampledImage %337 %296 %298 -%371 = OpImageSampleImplicitLod %23 %370 %369 -%372 = OpLoad %23 %306 -%373 = OpFAdd %23 %372 %371 -OpStore %306 %373 -%374 = OpConvertSToF %7 %81 -%375 = OpCompositeConstruct %302 %301 %374 -%376 = OpSampledImage %337 %296 %298 -%377 = OpImageSampleImplicitLod %23 %376 %375 ConstOffset %30 -%378 = OpLoad %23 %306 -%379 = OpFAdd %23 %378 %377 -OpStore %306 %379 -%380 = OpConvertSToF %7 %81 -%381 = OpCompositeConstruct %302 %301 %380 -%382 = OpSampledImage %337 %296 %298 -%383 = OpImageSampleExplicitLod %23 %382 %381 Lod %304 -%384 = OpLoad %23 %306 -%385 = OpFAdd %23 %384 %383 -OpStore %306 %385 -%386 = OpConvertSToF %7 %81 -%387 = OpCompositeConstruct %302 %301 %386 -%388 = OpSampledImage %337 %296 %298 -%389 = OpImageSampleExplicitLod %23 %388 %387 Lod|ConstOffset %304 %30 -%390 = OpLoad %23 %306 -%391 = OpFAdd %23 %390 %389 -OpStore %306 %391 -%392 = OpConvertSToF %7 %81 -%393 = OpCompositeConstruct %302 %301 %392 -%394 = OpSampledImage %337 %296 %298 -%395 = OpImageSampleImplicitLod %23 %394 %393 Bias|ConstOffset %305 %30 -%396 = OpLoad %23 %306 -%397 = OpFAdd %23 %396 %395 -OpStore %306 %397 -%399 = OpConvertUToF %7 %219 -%400 = OpCompositeConstruct %23 %303 %399 -%401 = OpSampledImage %398 %297 %298 -%402 = OpImageSampleImplicitLod %23 %401 %400 -%403 = OpLoad %23 %306 -%404 = OpFAdd %23 %403 %402 -OpStore %306 %404 -%405 = OpConvertUToF %7 %219 -%406 = OpCompositeConstruct %23 %303 %405 -%407 = OpSampledImage %398 %297 %298 -%408 = OpImageSampleExplicitLod %23 %407 %406 Lod %304 -%409 = OpLoad %23 %306 -%410 = OpFAdd %23 %409 %408 -OpStore %306 %410 -%411 = OpConvertUToF %7 %219 -%412 = OpCompositeConstruct %23 %303 %411 -%413 = OpSampledImage %398 %297 %298 -%414 = OpImageSampleImplicitLod %23 %413 %412 Bias %305 -%415 = OpLoad %23 %306 -%416 = OpFAdd %23 %415 %414 -OpStore %306 %416 -%417 = OpConvertSToF %7 %81 -%418 = OpCompositeConstruct %23 %303 %417 -%419 = OpSampledImage %398 %297 %298 -%420 = OpImageSampleImplicitLod %23 %419 %418 -%421 = OpLoad %23 %306 -%422 = OpFAdd %23 %421 %420 -OpStore %306 %422 -%423 = OpConvertSToF %7 %81 -%424 = OpCompositeConstruct %23 %303 %423 -%425 = OpSampledImage %398 %297 %298 -%426 = OpImageSampleExplicitLod %23 %425 %424 Lod %304 -%427 = OpLoad %23 %306 -%428 = OpFAdd %23 %427 %426 -OpStore %306 %428 -%429 = OpConvertSToF %7 %81 -%430 = OpCompositeConstruct %23 %303 %429 -%431 = OpSampledImage %398 %297 %298 -%432 = OpImageSampleImplicitLod %23 %431 %430 Bias %305 -%433 = OpLoad %23 %306 -%434 = OpFAdd %23 %433 %432 -OpStore %306 %434 -%435 = OpLoad %23 %306 -OpStore %292 %435 +%292 = OpFunction %2 None %100 +%290 = OpLabel +%305 = OpVariable %306 Function %307 +%293 = OpLoad %15 %47 +%294 = OpLoad %16 %49 +%295 = OpLoad %18 %54 +%296 = OpLoad %20 %58 +%297 = OpLoad %24 %64 +OpBranch %308 +%308 = OpLabel +%309 = OpCompositeExtract %7 %300 0 +%311 = OpSampledImage %310 %293 %297 +%312 = OpImageSampleImplicitLod %23 %311 %309 +%313 = OpLoad %23 %305 +%314 = OpFAdd %23 %313 %312 +OpStore %305 %314 +%316 = OpSampledImage %315 %294 %297 +%317 = OpImageSampleImplicitLod %23 %316 %300 +%318 = OpLoad %23 %305 +%319 = OpFAdd %23 %318 %317 +OpStore %305 %319 +%320 = OpSampledImage %315 %294 %297 +%321 = OpImageSampleImplicitLod %23 %320 %300 ConstOffset %30 +%322 = OpLoad %23 %305 +%323 = OpFAdd %23 %322 %321 +OpStore %305 %323 +%324 = OpSampledImage %315 %294 %297 +%325 = OpImageSampleExplicitLod %23 %324 %300 Lod %303 +%326 = OpLoad %23 %305 +%327 = OpFAdd %23 %326 %325 +OpStore %305 %327 +%328 = OpSampledImage %315 %294 %297 +%329 = OpImageSampleExplicitLod %23 %328 %300 Lod|ConstOffset %303 %30 +%330 = OpLoad %23 %305 +%331 = OpFAdd %23 %330 %329 +OpStore %305 %331 +%332 = OpSampledImage %315 %294 %297 +%333 = OpImageSampleImplicitLod %23 %332 %300 Bias|ConstOffset %304 %30 +%334 = OpLoad %23 %305 +%335 = OpFAdd %23 %334 %333 +OpStore %305 %335 +%337 = OpConvertUToF %7 %218 +%338 = OpCompositeConstruct %301 %300 %337 +%339 = OpSampledImage %336 %295 %297 +%340 = OpImageSampleImplicitLod %23 %339 %338 +%341 = OpLoad %23 %305 +%342 = OpFAdd %23 %341 %340 +OpStore %305 %342 +%343 = OpConvertUToF %7 %218 +%344 = OpCompositeConstruct %301 %300 %343 +%345 = OpSampledImage %336 %295 %297 +%346 = OpImageSampleImplicitLod %23 %345 %344 ConstOffset %30 +%347 = OpLoad %23 %305 +%348 = OpFAdd %23 %347 %346 +OpStore %305 %348 +%349 = OpConvertUToF %7 %218 +%350 = OpCompositeConstruct %301 %300 %349 +%351 = OpSampledImage %336 %295 %297 +%352 = OpImageSampleExplicitLod %23 %351 %350 Lod %303 +%353 = OpLoad %23 %305 +%354 = OpFAdd %23 %353 %352 +OpStore %305 %354 +%355 = OpConvertUToF %7 %218 +%356 = OpCompositeConstruct %301 %300 %355 +%357 = OpSampledImage %336 %295 %297 +%358 = OpImageSampleExplicitLod %23 %357 %356 Lod|ConstOffset %303 %30 +%359 = OpLoad %23 %305 +%360 = OpFAdd %23 %359 %358 +OpStore %305 %360 +%361 = OpConvertUToF %7 %218 +%362 = OpCompositeConstruct %301 %300 %361 +%363 = OpSampledImage %336 %295 %297 +%364 = OpImageSampleImplicitLod %23 %363 %362 Bias|ConstOffset %304 %30 +%365 = OpLoad %23 %305 +%366 = OpFAdd %23 %365 %364 +OpStore %305 %366 +%367 = OpConvertSToF %7 %81 +%368 = OpCompositeConstruct %301 %300 %367 +%369 = OpSampledImage %336 %295 %297 +%370 = OpImageSampleImplicitLod %23 %369 %368 +%371 = OpLoad %23 %305 +%372 = OpFAdd %23 %371 %370 +OpStore %305 %372 +%373 = OpConvertSToF %7 %81 +%374 = OpCompositeConstruct %301 %300 %373 +%375 = OpSampledImage %336 %295 %297 +%376 = OpImageSampleImplicitLod %23 %375 %374 ConstOffset %30 +%377 = OpLoad %23 %305 +%378 = OpFAdd %23 %377 %376 +OpStore %305 %378 +%379 = OpConvertSToF %7 %81 +%380 = OpCompositeConstruct %301 %300 %379 +%381 = OpSampledImage %336 %295 %297 +%382 = OpImageSampleExplicitLod %23 %381 %380 Lod %303 +%383 = OpLoad %23 %305 +%384 = OpFAdd %23 %383 %382 +OpStore %305 %384 +%385 = OpConvertSToF %7 %81 +%386 = OpCompositeConstruct %301 %300 %385 +%387 = OpSampledImage %336 %295 %297 +%388 = OpImageSampleExplicitLod %23 %387 %386 Lod|ConstOffset %303 %30 +%389 = OpLoad %23 %305 +%390 = OpFAdd %23 %389 %388 +OpStore %305 %390 +%391 = OpConvertSToF %7 %81 +%392 = OpCompositeConstruct %301 %300 %391 +%393 = OpSampledImage %336 %295 %297 +%394 = OpImageSampleImplicitLod %23 %393 %392 Bias|ConstOffset %304 %30 +%395 = OpLoad %23 %305 +%396 = OpFAdd %23 %395 %394 +OpStore %305 %396 +%398 = OpConvertUToF %7 %218 +%399 = OpCompositeConstruct %23 %302 %398 +%400 = OpSampledImage %397 %296 %297 +%401 = OpImageSampleImplicitLod %23 %400 %399 +%402 = OpLoad %23 %305 +%403 = OpFAdd %23 %402 %401 +OpStore %305 %403 +%404 = OpConvertUToF %7 %218 +%405 = OpCompositeConstruct %23 %302 %404 +%406 = OpSampledImage %397 %296 %297 +%407 = OpImageSampleExplicitLod %23 %406 %405 Lod %303 +%408 = OpLoad %23 %305 +%409 = OpFAdd %23 %408 %407 +OpStore %305 %409 +%410 = OpConvertUToF %7 %218 +%411 = OpCompositeConstruct %23 %302 %410 +%412 = OpSampledImage %397 %296 %297 +%413 = OpImageSampleImplicitLod %23 %412 %411 Bias %304 +%414 = OpLoad %23 %305 +%415 = OpFAdd %23 %414 %413 +OpStore %305 %415 +%416 = OpConvertSToF %7 %81 +%417 = OpCompositeConstruct %23 %302 %416 +%418 = OpSampledImage %397 %296 %297 +%419 = OpImageSampleImplicitLod %23 %418 %417 +%420 = OpLoad %23 %305 +%421 = OpFAdd %23 %420 %419 +OpStore %305 %421 +%422 = OpConvertSToF %7 %81 +%423 = OpCompositeConstruct %23 %302 %422 +%424 = OpSampledImage %397 %296 %297 +%425 = OpImageSampleExplicitLod %23 %424 %423 Lod %303 +%426 = OpLoad %23 %305 +%427 = OpFAdd %23 %426 %425 +OpStore %305 %427 +%428 = OpConvertSToF %7 %81 +%429 = OpCompositeConstruct %23 %302 %428 +%430 = OpSampledImage %397 %296 %297 +%431 = OpImageSampleImplicitLod %23 %430 %429 Bias %304 +%432 = OpLoad %23 %305 +%433 = OpFAdd %23 %432 %431 +OpStore %305 %433 +%434 = OpLoad %23 %305 +OpStore %291 %434 OpReturn OpFunctionEnd -%439 = OpFunction %2 None %100 -%436 = OpLabel -%444 = OpVariable %445 Function %446 -%440 = OpLoad %24 %66 -%441 = OpLoad %25 %68 -%442 = OpLoad %26 %70 -%443 = OpLoad %27 %72 -OpBranch %447 -%447 = OpLabel -%449 = OpSampledImage %448 %441 %440 -%450 = OpImageSampleDrefImplicitLod %7 %449 %301 %299 -%451 = OpLoad %7 %444 -%452 = OpFAdd %7 %451 %450 -OpStore %444 %452 -%454 = OpConvertUToF %7 %219 -%455 = OpCompositeConstruct %302 %301 %454 -%456 = OpSampledImage %453 %442 %440 -%457 = OpImageSampleDrefImplicitLod %7 %456 %455 %299 -%458 = OpLoad %7 %444 -%459 = OpFAdd %7 %458 %457 -OpStore %444 %459 -%460 = OpConvertSToF %7 %81 -%461 = OpCompositeConstruct %302 %301 %460 -%462 = OpSampledImage %453 %442 %440 -%463 = OpImageSampleDrefImplicitLod %7 %462 %461 %299 -%464 = OpLoad %7 %444 -%465 = OpFAdd %7 %464 %463 -OpStore %444 %465 -%467 = OpSampledImage %466 %443 %440 -%468 = OpImageSampleDrefImplicitLod %7 %467 %303 %299 -%469 = OpLoad %7 %444 -%470 = OpFAdd %7 %469 %468 -OpStore %444 %470 -%471 = OpSampledImage %448 %441 %440 -%472 = OpImageSampleDrefExplicitLod %7 %471 %301 %299 Lod %473 -%474 = OpLoad %7 %444 -%475 = OpFAdd %7 %474 %472 -OpStore %444 %475 -%476 = OpConvertUToF %7 %219 -%477 = OpCompositeConstruct %302 %301 %476 -%478 = OpSampledImage %453 %442 %440 -%479 = OpImageSampleDrefExplicitLod %7 %478 %477 %299 Lod %473 -%480 = OpLoad %7 %444 -%481 = OpFAdd %7 %480 %479 -OpStore %444 %481 -%482 = OpConvertSToF %7 %81 -%483 = OpCompositeConstruct %302 %301 %482 -%484 = OpSampledImage %453 %442 %440 -%485 = OpImageSampleDrefExplicitLod %7 %484 %483 %299 Lod %473 -%486 = OpLoad %7 %444 -%487 = OpFAdd %7 %486 %485 -OpStore %444 %487 -%488 = OpSampledImage %466 %443 %440 -%489 = OpImageSampleDrefExplicitLod %7 %488 %303 %299 Lod %473 -%490 = OpLoad %7 %444 -%491 = OpFAdd %7 %490 %489 -OpStore %444 %491 -%492 = OpLoad %7 %444 -OpStore %437 %492 +%438 = OpFunction %2 None %100 +%435 = OpLabel +%443 = OpVariable %444 Function %445 +%439 = OpLoad %24 %66 +%440 = OpLoad %25 %68 +%441 = OpLoad %26 %70 +%442 = OpLoad %27 %72 +OpBranch %446 +%446 = OpLabel +%448 = OpSampledImage %447 %440 %439 +%449 = OpImageSampleDrefImplicitLod %7 %448 %300 %298 +%450 = OpLoad %7 %443 +%451 = OpFAdd %7 %450 %449 +OpStore %443 %451 +%453 = OpConvertUToF %7 %218 +%454 = OpCompositeConstruct %301 %300 %453 +%455 = OpSampledImage %452 %441 %439 +%456 = OpImageSampleDrefImplicitLod %7 %455 %454 %298 +%457 = OpLoad %7 %443 +%458 = OpFAdd %7 %457 %456 +OpStore %443 %458 +%459 = OpConvertSToF %7 %81 +%460 = OpCompositeConstruct %301 %300 %459 +%461 = OpSampledImage %452 %441 %439 +%462 = OpImageSampleDrefImplicitLod %7 %461 %460 %298 +%463 = OpLoad %7 %443 +%464 = OpFAdd %7 %463 %462 +OpStore %443 %464 +%466 = OpSampledImage %465 %442 %439 +%467 = OpImageSampleDrefImplicitLod %7 %466 %302 %298 +%468 = OpLoad %7 %443 +%469 = OpFAdd %7 %468 %467 +OpStore %443 %469 +%470 = OpSampledImage %447 %440 %439 +%471 = OpImageSampleDrefExplicitLod %7 %470 %300 %298 Lod %472 +%473 = OpLoad %7 %443 +%474 = OpFAdd %7 %473 %471 +OpStore %443 %474 +%475 = OpConvertUToF %7 %218 +%476 = OpCompositeConstruct %301 %300 %475 +%477 = OpSampledImage %452 %441 %439 +%478 = OpImageSampleDrefExplicitLod %7 %477 %476 %298 Lod %472 +%479 = OpLoad %7 %443 +%480 = OpFAdd %7 %479 %478 +OpStore %443 %480 +%481 = OpConvertSToF %7 %81 +%482 = OpCompositeConstruct %301 %300 %481 +%483 = OpSampledImage %452 %441 %439 +%484 = OpImageSampleDrefExplicitLod %7 %483 %482 %298 Lod %472 +%485 = OpLoad %7 %443 +%486 = OpFAdd %7 %485 %484 +OpStore %443 %486 +%487 = OpSampledImage %465 %442 %439 +%488 = OpImageSampleDrefExplicitLod %7 %487 %302 %298 Lod %472 +%489 = OpLoad %7 %443 +%490 = OpFAdd %7 %489 %488 +OpStore %443 %490 +%491 = OpLoad %7 %443 +OpStore %436 %491 OpReturn OpFunctionEnd -%495 = OpFunction %2 None %100 -%493 = OpLabel -%496 = OpLoad %16 %49 -%497 = OpLoad %3 %51 -%498 = OpLoad %17 %52 -%499 = OpLoad %24 %64 -%500 = OpLoad %24 %66 -%501 = OpLoad %25 %68 -OpBranch %502 -%502 = OpLabel -%503 = OpSampledImage %316 %496 %499 -%504 = OpImageGather %23 %503 %301 %505 -%506 = OpSampledImage %316 %496 %499 -%507 = OpImageGather %23 %506 %301 %508 ConstOffset %30 -%509 = OpSampledImage %448 %501 %500 -%510 = OpImageDrefGather %23 %509 %301 %299 -%511 = OpSampledImage %448 %501 %500 -%512 = OpImageDrefGather %23 %511 %301 %299 ConstOffset %30 -%514 = OpSampledImage %513 %497 %499 -%515 = OpImageGather %119 %514 %301 %219 -%518 = OpSampledImage %517 %498 %499 -%519 = OpImageGather %516 %518 %301 %219 -%520 = OpConvertUToF %23 %515 -%521 = OpConvertSToF %23 %519 -%522 = OpFAdd %23 %520 %521 -%523 = OpFAdd %23 %504 %507 -%524 = OpFAdd %23 %523 %510 -%525 = OpFAdd %23 %524 %512 -%526 = OpFAdd %23 %525 %522 -OpStore %494 %526 +%494 = OpFunction %2 None %100 +%492 = OpLabel +%495 = OpLoad %16 %49 +%496 = OpLoad %3 %51 +%497 = OpLoad %17 %52 +%498 = OpLoad %24 %64 +%499 = OpLoad %24 %66 +%500 = OpLoad %25 %68 +OpBranch %501 +%501 = OpLabel +%502 = OpSampledImage %315 %495 %498 +%503 = OpImageGather %23 %502 %300 %504 +%505 = OpSampledImage %315 %495 %498 +%506 = OpImageGather %23 %505 %300 %507 ConstOffset %30 +%508 = OpSampledImage %447 %500 %499 +%509 = OpImageDrefGather %23 %508 %300 %298 +%510 = OpSampledImage %447 %500 %499 +%511 = OpImageDrefGather %23 %510 %300 %298 ConstOffset %30 +%513 = OpSampledImage %512 %496 %498 +%514 = OpImageGather %119 %513 %300 %218 +%517 = OpSampledImage %516 %497 %498 +%518 = OpImageGather %515 %517 %300 %218 +%519 = OpConvertUToF %23 %514 +%520 = OpConvertSToF %23 %518 +%521 = OpFAdd %23 %519 %520 +%522 = OpFAdd %23 %503 %506 +%523 = OpFAdd %23 %522 %509 +%524 = OpFAdd %23 %523 %511 +%525 = OpFAdd %23 %524 %521 +OpStore %493 %525 OpReturn OpFunctionEnd -%529 = OpFunction %2 None %100 -%527 = OpLabel -%530 = OpLoad %24 %64 -%531 = OpLoad %25 %68 -OpBranch %532 -%532 = OpLabel -%533 = OpSampledImage %448 %531 %530 -%534 = OpImageSampleImplicitLod %23 %533 %301 -%535 = OpCompositeExtract %7 %534 0 -%536 = OpSampledImage %448 %531 %530 -%537 = OpImageGather %23 %536 %301 %219 -%538 = OpSampledImage %448 %531 %530 -%540 = OpConvertSToF %7 %29 -%539 = OpImageSampleExplicitLod %23 %538 %301 Lod %540 -%541 = OpCompositeExtract %7 %539 0 -%542 = OpCompositeConstruct %23 %535 %535 %535 %535 -%543 = OpFAdd %23 %542 %537 -%544 = OpCompositeConstruct %23 %541 %541 %541 %541 -%545 = OpFAdd %23 %543 %544 -OpStore %528 %545 +%528 = OpFunction %2 None %100 +%526 = OpLabel +%529 = OpLoad %24 %64 +%530 = OpLoad %25 %68 +OpBranch %531 +%531 = OpLabel +%532 = OpSampledImage %447 %530 %529 +%533 = OpImageSampleImplicitLod %23 %532 %300 +%534 = OpCompositeExtract %7 %533 0 +%535 = OpSampledImage %447 %530 %529 +%536 = OpImageGather %23 %535 %300 %218 +%537 = OpSampledImage %447 %530 %529 +%539 = OpConvertSToF %7 %29 +%538 = OpImageSampleExplicitLod %23 %537 %300 Lod %539 +%540 = OpCompositeExtract %7 %538 0 +%541 = OpCompositeConstruct %23 %534 %534 %534 %534 +%542 = OpFAdd %23 %541 %536 +%543 = OpCompositeConstruct %23 %540 %540 %540 %540 +%544 = OpFAdd %23 %542 %543 +OpStore %527 %544 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/extra.wgsl b/naga/tests/out/wgsl/extra.wgsl index 25f894e7f5d..574c35ab0b4 100644 --- a/naga/tests/out/wgsl/extra.wgsl +++ b/naga/tests/out/wgsl/extra.wgsl @@ -1,6 +1,6 @@ struct PushConstants { index: u32, - double: vec2, + double: vec2, } struct FragmentIn { diff --git a/naga/tests/out/wgsl/f16-glsl.comp.wgsl b/naga/tests/out/wgsl/f16-glsl.comp.wgsl new file mode 100644 index 00000000000..e12dcb14086 --- /dev/null +++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl @@ -0,0 +1,47 @@ +enable f16; + +struct A { + a_1_: f16, + a_vec2_: vec2, + a_vec3_: vec3, + a_vec4_: vec4, +} + +struct B { + b_1_: f16, + b_vec2_: vec2, + b_vec3_: vec3, + b_vec4_: vec4, + b_mat2_: mat2x2, + b_mat2x3_: mat2x3, + b_mat2x4_: mat2x4, + b_mat3x2_: mat3x2, + b_mat3_: mat3x3, + b_mat3x4_: mat3x4, + b_mat4x2_: mat4x2, + b_mat4x3_: mat4x3, + b_mat4_: mat4x4, +} + +@group(0) @binding(0) +var global: A; +@group(0) @binding(1) +var global_1: B; + +fn main_1() { + let _e16 = global.a_1_; + global_1.b_1_ = _e16; + let _e17 = global.a_vec2_; + global_1.b_vec2_ = _e17; + let _e18 = global.a_vec3_; + global_1.b_vec3_ = _e18; + let _e19 = global.a_vec4_; + global_1.b_vec4_ = _e19; + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + main_1(); + return; +} diff --git a/naga/tests/out/wgsl/f16-spv.wgsl b/naga/tests/out/wgsl/f16-spv.wgsl new file mode 100644 index 00000000000..4c29ace8c34 --- /dev/null +++ b/naga/tests/out/wgsl/f16-spv.wgsl @@ -0,0 +1,46 @@ +enable f16; + +struct B { + b_1_: f16, + b_vec2_: vec2, + b_vec3_: vec3, + b_vec4_: vec4, + b_mat2_: mat2x2, + b_mat2x3_: mat2x3, + b_mat2x4_: mat2x4, + b_mat3x2_: mat3x2, + b_mat3_: mat3x3, + b_mat3x4_: mat3x4, + b_mat4x2_: mat4x2, + b_mat4x3_: mat4x3, + b_mat4_: mat4x4, +} + +struct A { + a_1_: f16, + a_vec2_: vec2, + a_vec3_: vec3, + a_vec4_: vec4, +} + +@group(0) @binding(1) +var unnamed: B; +@group(0) @binding(0) +var unnamed_1: A; + +fn main_1() { + let _e3 = unnamed_1.a_1_; + unnamed.b_1_ = _e3; + let _e6 = unnamed_1.a_vec2_; + unnamed.b_vec2_ = _e6; + let _e9 = unnamed_1.a_vec3_; + unnamed.b_vec3_ = _e9; + let _e12 = unnamed_1.a_vec4_; + unnamed.b_vec4_ = _e12; + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + main_1(); +} diff --git a/naga/tests/out/wgsl/f16.wgsl b/naga/tests/out/wgsl/f16.wgsl new file mode 100644 index 00000000000..a45edce3b58 --- /dev/null +++ b/naga/tests/out/wgsl/f16.wgsl @@ -0,0 +1,167 @@ +enable f16; + +struct UniformCompatible { + val_u32_: u32, + val_i32_: i32, + val_f32_: f32, + val_f16_: f16, + val_f16_2_: vec2, + val_f16_3_: vec3, + val_f16_4_: vec4, + final_value: f16, + val_mat2x2_: mat2x2, + val_mat2x3_: mat2x3, + val_mat2x4_: mat2x4, + val_mat3x2_: mat3x2, + val_mat3x3_: mat3x3, + val_mat3x4_: mat3x4, + val_mat4x2_: mat4x2, + val_mat4x3_: mat4x3, + val_mat4x4_: mat4x4, +} + +struct StorageCompatible { + val_f16_array_2_: array, +} + +struct LayoutTest { + scalar1_: f16, + scalar2_: f16, + v3_: vec3, + tuck_in: f16, + scalar4_: f16, + larger: u32, +} + +const constant_variable: f16 = 15.203125h; + +var private_variable: f16 = 1h; +@group(0) @binding(0) +var input_uniform: UniformCompatible; +@group(0) @binding(1) +var input_storage: UniformCompatible; +@group(0) @binding(2) +var input_arrays: StorageCompatible; +@group(0) @binding(3) +var output: UniformCompatible; +@group(0) @binding(4) +var output_arrays: StorageCompatible; + +fn f16_function(x: f16) -> f16 { + var val: f16 = 15.203125h; + + let _e4 = val; + val = (_e4 + -33344h); + let _e6 = val; + let _e9 = val; + val = (_e9 + (_e6 + 5h)); + let _e13 = input_uniform.val_f32_; + let _e14 = val; + let _e18 = val; + val = (_e18 + f16((_e13 + f32(_e14)))); + let _e22 = input_uniform.val_f16_; + let _e25 = val; + val = (_e25 + vec3(_e22).z); + let _e31 = input_uniform.val_f16_; + let _e34 = input_storage.val_f16_; + output.val_f16_ = (_e31 + _e34); + let _e40 = input_uniform.val_f16_2_; + let _e43 = input_storage.val_f16_2_; + output.val_f16_2_ = (_e40 + _e43); + let _e49 = input_uniform.val_f16_3_; + let _e52 = input_storage.val_f16_3_; + output.val_f16_3_ = (_e49 + _e52); + let _e58 = input_uniform.val_f16_4_; + let _e61 = input_storage.val_f16_4_; + output.val_f16_4_ = (_e58 + _e61); + let _e67 = input_uniform.val_mat2x2_; + let _e70 = input_storage.val_mat2x2_; + output.val_mat2x2_ = (_e67 + _e70); + let _e76 = input_uniform.val_mat2x3_; + let _e79 = input_storage.val_mat2x3_; + output.val_mat2x3_ = (_e76 + _e79); + let _e85 = input_uniform.val_mat2x4_; + let _e88 = input_storage.val_mat2x4_; + output.val_mat2x4_ = (_e85 + _e88); + let _e94 = input_uniform.val_mat3x2_; + let _e97 = input_storage.val_mat3x2_; + output.val_mat3x2_ = (_e94 + _e97); + let _e103 = input_uniform.val_mat3x3_; + let _e106 = input_storage.val_mat3x3_; + output.val_mat3x3_ = (_e103 + _e106); + let _e112 = input_uniform.val_mat3x4_; + let _e115 = input_storage.val_mat3x4_; + output.val_mat3x4_ = (_e112 + _e115); + let _e121 = input_uniform.val_mat4x2_; + let _e124 = input_storage.val_mat4x2_; + output.val_mat4x2_ = (_e121 + _e124); + let _e130 = input_uniform.val_mat4x3_; + let _e133 = input_storage.val_mat4x3_; + output.val_mat4x3_ = (_e130 + _e133); + let _e139 = input_uniform.val_mat4x4_; + let _e142 = input_storage.val_mat4x4_; + output.val_mat4x4_ = (_e139 + _e142); + let _e148 = input_arrays.val_f16_array_2_; + output_arrays.val_f16_array_2_ = _e148; + let _e149 = val; + let _e151 = val; + val = (_e151 + abs(_e149)); + let _e153 = val; + let _e154 = val; + let _e155 = val; + let _e157 = val; + val = (_e157 + clamp(_e153, _e154, _e155)); + let _e159 = val; + let _e161 = val; + let _e164 = val; + val = (_e164 + dot(vec2(_e159), vec2(_e161))); + let _e166 = val; + let _e167 = val; + let _e169 = val; + val = (_e169 + max(_e166, _e167)); + let _e171 = val; + let _e172 = val; + let _e174 = val; + val = (_e174 + min(_e171, _e172)); + let _e176 = val; + let _e178 = val; + val = (_e178 + sign(_e176)); + let _e181 = val; + val = (_e181 + 1h); + let _e185 = input_uniform.val_f16_2_; + let float_vec2_ = vec2(_e185); + output.val_f16_2_ = vec2(float_vec2_); + let _e192 = input_uniform.val_f16_3_; + let float_vec3_ = vec3(_e192); + output.val_f16_3_ = vec3(float_vec3_); + let _e199 = input_uniform.val_f16_4_; + let float_vec4_ = vec4(_e199); + output.val_f16_4_ = vec4(float_vec4_); + let _e208 = input_uniform.val_mat2x2_; + output.val_mat2x2_ = mat2x2(mat2x2(_e208)); + let _e215 = input_uniform.val_mat2x3_; + output.val_mat2x3_ = mat2x3(mat2x3(_e215)); + let _e222 = input_uniform.val_mat2x4_; + output.val_mat2x4_ = mat2x4(mat2x4(_e222)); + let _e229 = input_uniform.val_mat3x2_; + output.val_mat3x2_ = mat3x2(mat3x2(_e229)); + let _e236 = input_uniform.val_mat3x3_; + output.val_mat3x3_ = mat3x3(mat3x3(_e236)); + let _e243 = input_uniform.val_mat3x4_; + output.val_mat3x4_ = mat3x4(mat3x4(_e243)); + let _e250 = input_uniform.val_mat4x2_; + output.val_mat4x2_ = mat4x2(mat4x2(_e250)); + let _e257 = input_uniform.val_mat4x3_; + output.val_mat4x3_ = mat4x3(mat4x3(_e257)); + let _e264 = input_uniform.val_mat4x4_; + output.val_mat4x4_ = mat4x4(mat4x4(_e264)); + let _e267 = val; + return _e267; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + let _e3 = f16_function(2h); + output.final_value = _e3; + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index cc56f5e053c..f87c9c335ad 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -929,6 +929,10 @@ fn convert_wgsl() { "int64", Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL, ), + ( + "f16", + Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL, + ), ( "subgroup-operations", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, @@ -1134,6 +1138,7 @@ fn convert_spv_all() { convert_spv("atomic_i_decrement", false, Targets::WGSL); convert_spv("atomic_i_add_sub", false, Targets::WGSL); convert_spv("atomic_global_struct_field_vertex", false, Targets::WGSL); + convert_spv("f16-spv", false, Targets::WGSL); convert_spv( "fetch_depth", false, diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs index f221c7896e3..2d46e37f72d 100644 --- a/naga/tests/spirv_capabilities.rs +++ b/naga/tests/spirv_capabilities.rs @@ -208,3 +208,8 @@ fn int64() { "#, ); } + +#[test] +fn float16() { + require(&[Ca::Float16], "enable f16; fn f(x: f16) { }"); +} diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index 2dcd0588cac..34e1e18ece8 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -3,9 +3,11 @@ Tests for the WGSL front end. */ #![cfg(feature = "wgsl-in")] +#[track_caller] fn check(input: &str, snapshot: &str) { let output = naga::front::wgsl::parse_str(input) - .expect_err("expected parser error") + .map(|_| panic!("expected parser error, but parsing succeeded!")) + .unwrap_err() .emit_to_string(input); if output != snapshot { for diff in diff::lines(snapshot, &output) { @@ -19,6 +21,19 @@ fn check(input: &str, snapshot: &str) { } } +#[track_caller] +fn check_success(input: &str) { + match naga::front::wgsl::parse_str(input) { + Ok(_) => {} + Err(err) => { + panic!( + "expected success, but parsing failed with:\n{}", + err.emit_to_string(input) + ); + } + } +} + #[test] fn very_negative_integers() { // wgpu#4492 @@ -824,6 +839,50 @@ fn matrix_constructor_inferred() { ); } +#[test] +fn float16_requires_enable() { + check( + r#" + const a: f16 = 1.0; + "#, + r#"error: the `f16` language extension is not enabled + ┌─ wgsl:2:22 + │ +2 │ const a: f16 = 1.0; + │ ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable f16;` at the top of the shader. + +"#, + ); + + check( + r#" + const a = 1.0h; + "#, + r#"error: the `f16` language extension is not enabled + ┌─ wgsl:2:23 + │ +2 │ const a = 1.0h; + │ ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable f16;` at the top of the shader. + +"#, + ); +} + +#[test] +fn multiple_enables_valid() { + check_success( + r#" + enable f16; + enable f16; + const a: f16 = 1.0h; + "#, + ); +} + /// Check the result of validating a WGSL program against a pattern. /// /// Unless you are generating code programmatically, the @@ -900,6 +959,7 @@ macro_rules! check_validation { } } +#[track_caller] fn validation_error( source: &str, caps: naga::valid::Capabilities, @@ -928,6 +988,53 @@ fn int64_capability() { } } +#[test] +fn float16_capability() { + check_validation! { + "enable f16; var input: f16;", + "enable f16; var input: vec2;": + Err(naga::valid::ValidationError::Type { + source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}), + .. + }) + } +} + +#[test] +fn float16_in_push_constant() { + check_validation! { + "enable f16; var input: f16;", + "enable f16; var input: vec2;", + "enable f16; var input: mat4x4;", + "enable f16; struct S { a: f16 }; var input: S;", + "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var input: S2;": + Err(naga::valid::ValidationError::GlobalVariable { + source: naga::valid::GlobalVariableError::InvalidPushConstantType( + naga::valid::PushConstantError::InvalidScalar( + naga::Scalar::F16 + ) + ), + .. + }), + naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT + } +} + +#[test] +fn float16_in_atomic() { + check_validation! { + "enable f16; var a: atomic;": + Err(naga::valid::ValidationError::Type { + source: naga::valid::TypeError::InvalidAtomicWidth( + naga::ScalarKind::Float, + 2 + ), + .. + }), + naga::valid::Capabilities::SHADER_FLOAT16 + } +} + #[test] fn invalid_arrays() { check_validation! { diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs index be6a9006309..8eb72b3ea8d 100644 --- a/naga/xtask/src/validate.rs +++ b/naga/xtask/src/validate.rs @@ -1,5 +1,5 @@ use std::{ - io::{BufRead, BufReader}, + io::{BufRead, BufReader, Write}, path::Path, process::Stdio, }; @@ -214,17 +214,23 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul else { bail!("no {expected_header_prefix:?} header found in {path:?}"); }; - let file = open_file(path)?; let mut spirv_as_cmd = EasyCommand::new(spirv_as, |cmd| { - cmd.stdin(Stdio::from(file)) - .stdout(Stdio::piped()) + cmd.stdout(Stdio::piped()) .arg("--target-env") .arg(format!("spv{version}")) - .args(["-", "-o", "-"]) + .args([path.to_str().unwrap(), "-o", "-"]) }); - let child = spirv_as_cmd - .spawn() - .with_context(|| format!("failed to spawn {spirv_as_cmd:?}"))?; + let assembled_spirv = spirv_as_cmd + .output() + .with_context(|| format!("Failed to run {spirv_as_cmd}"))?; + + if !assembled_spirv.status.success() { + bail!( + "Failed to assemble {path:?} with {spirv_as_cmd}:\n{}", + String::from_utf8_lossy(&assembled_spirv.stderr) + ); + } + let error_message = || { format!( "Failed to validate {path:?}. @@ -234,9 +240,26 @@ Note: Labels and line numbers will not match the input file. path.display(), ) }; - EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap())) - .success() - .with_context(error_message) + let mut spirv_val_command = EasyCommand::new(spirv_val, |cmd| cmd.stdin(Stdio::piped())); + let mut spirv_val_process = spirv_val_command + .spawn() + .with_context(|| format!("Failed to run {spirv_val_command}"))?; + + spirv_val_process + .stdin + .as_mut() + .unwrap() + .write_all(&assembled_spirv.stdout)?; + + let spirv_val_output = spirv_val_process + .wait() + .with_context(|| format!("Failed to wait for {spirv_val_command}"))?; + + if !spirv_val_output.success() { + bail!("{}", error_message()); + } + + Ok(()) } fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> { @@ -257,22 +280,17 @@ fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> { } else { format!("-std={language}") }; - let file = open_file(path)?; EasyCommand::new(xcrun, |cmd| { - cmd.stdin(Stdio::from(file)) - .args(["-sdk", "macosx", "metal", "-mmacosx-version-min=10.11"]) + cmd.args(["-sdk", "macosx", "metal", "-mmacosx-version-min=10.11"]) .arg(std_arg) - .args(["-x", "metal", "-", "-o", "/dev/null"]) + .args(["-x", "metal", &*path.to_string_lossy(), "-o", "/dev/null"]) }) .success() } fn validate_glsl(path: &Path, type_arg: &str, glslang_validator: &str) -> anyhow::Result<()> { - let file = open_file(path)?; EasyCommand::new(glslang_validator, |cmd| { - cmd.stdin(Stdio::from(file)) - .args(["--stdin", "-S"]) - .arg(type_arg) + cmd.args([&*path.to_string_lossy(), "-S"]).arg(type_arg) }) .success() } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 5ee691ba05e..bc2659da8a3 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -43,6 +43,7 @@ cfg-if.workspace = true ctor.workspace = true futures-lite.workspace = true glam.workspace = true +half = { workspace = true, features = ["bytemuck"] } itertools.workspace = true image.workspace = true libtest-mimic.workspace = true diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index 9a3bae0d401..15d4c5e5cf7 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -143,8 +143,8 @@ impl ShaderTest { body, input_type: String::from("CustomStruct"), output_type: String::from("array"), - input_values: bytemuck::cast_slice(input_values).to_vec(), - output_values: vec![bytemuck::cast_slice(output_values).to_vec()], + input_values: bytemuck::pod_collect_to_vec(input_values), + output_values: vec![bytemuck::pod_collect_to_vec(output_values)], output_comparison_fn: Self::default_comparison_function::, output_initialization: u32::MAX, failures: Backends::empty(), diff --git a/tests/tests/shader/struct_layout.rs b/tests/tests/shader/struct_layout.rs index 38a040fcad5..25bd32bf77f 100644 --- a/tests/tests/shader/struct_layout.rs +++ b/tests/tests/shader/struct_layout.rs @@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits}; use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE}; use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters}; +#[gpu_test] +static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371 + .expect_fail( + FailureCase::backend(wgpu::Backends::VULKAN) + .validation_error("a matrix with stride 8 not satisfying alignment to 16"), + ) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Uniform, + create_struct_layout_tests(InputStorageType::Uniform), + ) + }); + +#[gpu_test] +static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_struct_layout_tests(InputStorageType::Storage), + ) + }); + +#[gpu_test] +static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::PUSH_CONSTANTS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits { + max_push_constant_size: MAX_BUFFER_SIZE as u32, + ..Limits::downlevel_defaults() + }), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::PushConstant, + create_struct_layout_tests(InputStorageType::PushConstant), + ) + }); + fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec { let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect(); @@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec tests } +#[gpu_test] +static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_64bit_struct_layout_tests(), + ) + }); + +#[gpu_test] +static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_64bit_struct_layout_tests(), + ) + }); + +#[gpu_test] +static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits { + max_push_constant_size: MAX_BUFFER_SIZE as u32, + ..Limits::downlevel_defaults() + }), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::PushConstant, + create_64bit_struct_layout_tests(), + ) + }); + fn create_64bit_struct_layout_tests() -> Vec { let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect(); @@ -356,29 +461,26 @@ fn create_64bit_struct_layout_tests() -> Vec { } #[gpu_test] -static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() +static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() + .features(Features::SHADER_F16) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371 - .expect_fail( - FailureCase::backend(wgpu::Backends::VULKAN) - .validation_error("a matrix with stride 8 not satisfying alignment to 16"), - ) .limits(Limits::downlevel_defaults()), ) .run_async(|ctx| { shader_input_output_test( ctx, - InputStorageType::Uniform, - create_struct_layout_tests(InputStorageType::Uniform), + InputStorageType::Storage, + create_16bit_struct_layout_test(), ) }); #[gpu_test] -static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() +static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() + .features(Features::SHADER_F16) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) .limits(Limits::downlevel_defaults()), ) @@ -386,76 +488,205 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() shader_input_output_test( ctx, InputStorageType::Storage, - create_struct_layout_tests(InputStorageType::Storage), + create_16bit_struct_layout_test(), ) }); -#[gpu_test] -static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::PUSH_CONSTANTS) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits { - max_push_constant_size: MAX_BUFFER_SIZE as u32, - ..Limits::downlevel_defaults() - }), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::PushConstant, - create_struct_layout_tests(InputStorageType::PushConstant), - ) - }); +fn create_16bit_struct_layout_test() -> Vec { + let mut tests = Vec::new(); -#[gpu_test] -static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits::downlevel_defaults()), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::Storage, - create_64bit_struct_layout_tests(), - ) - }); + fn f16asu16(f32: f32) -> u16 { + half::f16::from_f32(f32).to_bits() + } -#[gpu_test] -static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits::downlevel_defaults()), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::Storage, - create_64bit_struct_layout_tests(), - ) - }); + // 16 bit alignment tests + { + let members = + "scalar1: f16, scalar2: f16, v3: vec3, tuck_in: f16, scalar4: f16, larger: u32"; + let direct = String::from( + "\ + output[0] = u32(input.scalar1); + output[1] = u32(input.scalar2); + output[2] = u32(input.v3.x); + output[3] = u32(input.v3.y); + output[4] = u32(input.v3.z); + output[5] = u32(input.tuck_in); + output[6] = u32(input.scalar4); + output[7] = u32(extractBits(input.larger, 0u, 16u)); + output[8] = u32(extractBits(input.larger, 16u, 16u)); + ", + ); -#[gpu_test] -static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits { - max_push_constant_size: MAX_BUFFER_SIZE as u32, - ..Limits::downlevel_defaults() - }), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::PushConstant, - create_64bit_struct_layout_tests(), - ) - }); + tests.push(ShaderTest::new( + "f16 alignment".into(), + members.into(), + direct, + &[ + f16asu16(0.0), + f16asu16(1.0), + f16asu16(2.0), + f16asu16(3.0), + f16asu16(4.0), + f16asu16(5.0), + f16asu16(6.0), + f16asu16(7.0), + f16asu16(8.0), + f16asu16(9.0), + 10_u16, + 11_u16, + // Some extra values to help debug if the test fails. + 12_u16, + 13_u16, + 14_u16, + 15_u16, + 16_u16, + 17_u16, + 18_u16, + 19_u16, + 20_u16, + ], + &[ + 0, // scalar1 + 1, // scalar2 + 4, 5, 6, // v3 + 7, // tuck_in + 8, // scalar4 + 10, // larger[0..16] + 11, // larger[16..32] + ], + )); + } + + // Matrix tests + { + let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h"; + let direct = String::from( + "\ + output[0] = u32(input.m2[0].x); + output[1] = u32(input.m2[0].y); + output[2] = u32(input.m2[1].x); + output[3] = u32(input.m2[1].y); + + output[4] = u32(input.m3[0].x); + output[5] = u32(input.m3[0].y); + output[6] = u32(input.m3[0].z); + output[7] = u32(input.m3[1].x); + output[8] = u32(input.m3[1].y); + output[9] = u32(input.m3[1].z); + output[10] = u32(input.m3[2].x); + output[11] = u32(input.m3[2].y); + output[12] = u32(input.m3[2].z); + + output[13] = u32(input.m4[0].x); + output[14] = u32(input.m4[0].y); + output[15] = u32(input.m4[0].z); + output[16] = u32(input.m4[0].w); + output[17] = u32(input.m4[1].x); + output[18] = u32(input.m4[1].y); + output[19] = u32(input.m4[1].z); + output[20] = u32(input.m4[1].w); + output[21] = u32(input.m4[2].x); + output[22] = u32(input.m4[2].y); + output[23] = u32(input.m4[2].z); + output[24] = u32(input.m4[2].w); + output[25] = u32(input.m4[3].x); + output[26] = u32(input.m4[3].y); + output[27] = u32(input.m4[3].z); + output[28] = u32(input.m4[3].w); + ", + ); + + tests.push(ShaderTest::new( + "f16 matrix alignment".into(), + members.into(), + direct, + &(0..32).map(|x| f16asu16(x as f32)).collect::>(), + &[ + 0, 1, // m2[0] + 2, 3, // m2[1] + // + 4, 5, 6, // m3[0] + 8, 9, 10, // m3[1] + 12, 13, 14, // m3[2] + // + 16, 17, 18, 19, // m4[0] + 20, 21, 22, 23, // m4[1] + 24, 25, 26, 27, // m4[2] + 28, 29, 30, 31, // m4[3] + ], + )); + } + + // // Nested struct and array test. + // // + // // This tries to exploit all the weird edge cases of the struct layout algorithm. + // // We dont go as all-out as the other nested struct test because + // // all our primitives are twice as wide and we have only so much buffer to spare. + // { + // let header = String::from( + // "struct Inner { scalar: u64, scalar32: u32, member: array, 2> }", + // ); + // let members = String::from("inner: Inner"); + // let direct = String::from( + // "\ + + // ", + // ); + + // tests.push( + // ShaderTest::new( + // String::from("nested struct and array"), + // members, + // direct, + // &input_values, + // &[ + // 0, 1, // inner.scalar + // 2, // inner.scalar32 + // 8, 9, 10, 11, 12, 13, // inner.member[0] + // 16, 17, 18, 19, 20, 21, // inner.member[1] + // ], + // ) + // .header(header), + // ); + // } + // { + // let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }"); + // let members = String::from("inner: Inner, vector: vec3"); + // let direct = String::from( + // "\ + // output[0] = bitcast(input.inner.scalar32); + // output[1] = u32(bitcast(input.inner.scalar) & 0xFFFFFFFF); + // output[2] = u32((bitcast(input.inner.scalar) >> 32) & 0xFFFFFFFF); + // output[3] = bitcast(input.inner.scalar32_2); + // output[4] = u32(bitcast(input.vector.x) & 0xFFFFFFFF); + // output[5] = u32((bitcast(input.vector.x) >> 32) & 0xFFFFFFFF); + // output[6] = u32(bitcast(input.vector.y) & 0xFFFFFFFF); + // output[7] = u32((bitcast(input.vector.y) >> 32) & 0xFFFFFFFF); + // output[8] = u32(bitcast(input.vector.z) & 0xFFFFFFFF); + // output[9] = u32((bitcast(input.vector.z) >> 32) & 0xFFFFFFFF); + // ", + // ); + + // tests.push( + // ShaderTest::new( + // String::from("nested struct and array"), + // members, + // direct, + // &input_values, + // &[ + // 0, // inner.scalar32 + // 2, 3, // inner.scalar + // 4, // inner.scalar32_2 + // 8, 9, 10, 11, 12, 13, // vector + // ], + // ) + // .header(header), + // ); + // } + + // Insert `enable f16;` header + tests + .into_iter() + .map(|test| test.header("enable f16;".into())) + .collect() +} diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index bb2ffd6b033..94bfbf4258e 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -382,6 +382,10 @@ pub fn create_validator( features.contains(wgt::Features::PUSH_CONSTANTS), ); caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64)); + caps.set( + Caps::SHADER_FLOAT16, + features.contains(wgt::Features::SHADER_F16), + ); caps.set( Caps::PRIMITIVE_INDEX, features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX), diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 370b842aded..53b67d238d6 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -1,5 +1,5 @@ use std::{ - mem::{size_of, size_of_val}, + mem::{self, size_of, size_of_val}, ptr, sync::Arc, thread, @@ -430,6 +430,24 @@ impl super::Adapter { && features1.Int64ShaderOps.as_bool(), ); + let float16_supported = { + let mut features4: Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax + ptr::from_mut(&mut features4).cast(), + size_of::() as _, + ) + }; + hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool() + }; + + features.set( + wgt::Features::SHADER_F16, + shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported, + ); + features.set( wgt::Features::TEXTURE_INT64_ATOMIC, shader_model >= naga::back::hlsl::ShaderModel::V6_6 @@ -617,7 +635,7 @@ impl crate::Adapter for super::Adapter { unsafe fn open( &self, - _features: wgt::Features, + features: wgt::Features, limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { @@ -638,6 +656,7 @@ impl crate::Adapter for super::Adapter { let device = super::Device::new( self.device.clone(), queue.clone(), + features, limits, memory_hints, self.private_caps, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 9cbb6cd14e5..3876b76dae1 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -32,9 +32,11 @@ use crate::{ const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; impl super::Device { + #[allow(clippy::too_many_arguments)] pub(super) fn new( raw: Direct3D12::ID3D12Device, present_queue: Direct3D12::ID3D12CommandQueue, + features: wgt::Features, limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, private_caps: super::PrivateCapabilities, @@ -177,6 +179,7 @@ impl super::Device { event: Event::create(false, false)?, }, private_caps, + features, shared: Arc::new(shared), rtv_pool: Mutex::new(rtv_pool), dsv_pool: Mutex::new(descriptor::CpuPool::new( diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index d5d6843c39c..1611fa64cf8 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -627,6 +627,7 @@ pub struct Device { present_queue: Direct3D12::ID3D12CommandQueue, idler: Idler, private_caps: PrivateCapabilities, + features: wgt::Features, shared: Arc, // CPU only pools rtv_pool: Mutex, diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs index 957bd8c3c48..90a1b36741d 100644 --- a/wgpu-hal/src/dx12/shader_compilation.rs +++ b/wgpu-hal/src/dx12/shader_compilation.rs @@ -268,7 +268,7 @@ pub(super) fn compile_dxc( let raw_ep = OPCWSTR::new(raw_ep); let full_stage = OPCWSTR::new(full_stage); - let mut compile_args = arrayvec::ArrayVec::::new_const(); + let mut compile_args = arrayvec::ArrayVec::::new_const(); if let Some(source_name) = source_name.as_ref() { compile_args.push(source_name.ptr()) @@ -298,6 +298,10 @@ pub(super) fn compile_dxc( compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS); } + if device.features.contains(wgt::Features::SHADER_F16) { + compile_args.push(windows::core::w!("-enable-16bit-types")); + } + let buffer = Dxc::DxcBuffer { Ptr: source.as_ptr().cast(), Size: source.len(), diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 8b41be69fcb..ce1aa94e346 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -378,6 +378,7 @@ impl PhysicalDeviceFeatures { vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true), vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) + .storage_input_output16(true) .uniform_and_storage_buffer16_bit_access(true), )) } else { @@ -668,7 +669,8 @@ impl PhysicalDeviceFeatures { F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0, + && bit16.uniform_and_storage_buffer16_bit_access != 0 + && bit16.storage_input_output16 != 0, ); } @@ -1846,6 +1848,10 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64); } + if features.contains(wgt::Features::SHADER_F16) { + capabilities.push(spv::Capability::Float16); + } + if features.intersects( wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX diff --git a/wgpu-info/src/report.rs b/wgpu-info/src/report.rs index 954908b09e5..c453b21e06d 100644 --- a/wgpu-info/src/report.rs +++ b/wgpu-info/src/report.rs @@ -3,7 +3,8 @@ use std::io; use hashbrown::HashMap; use serde::{Deserialize, Serialize}; use wgpu::{ - AdapterInfo, DownlevelCapabilities, Features, Limits, TextureFormat, TextureFormatFeatures, + AdapterInfo, DownlevelCapabilities, Dx12Compiler, Features, Limits, TextureFormat, + TextureFormatFeatures, }; use crate::texture; @@ -20,6 +21,7 @@ impl GpuReport { pub fn generate() -> Self { let instance = wgpu::Instance::new(&{ let mut desc = wgpu::InstanceDescriptor::from_env_or_default(); + desc.backend_options.dx12.shader_compiler = Dx12Compiler::StaticDxc; desc.flags = wgpu::InstanceFlags::debugging().with_env(); desc });