From 803feb5dc679556ba2d93fe616d1883d7ebc7775 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 29 Jan 2025 12:23:15 +0100 Subject: [PATCH] x86-sse2 ABI: use SSE registers for floats and SIMD --- compiler/rustc_target/src/callconv/mod.rs | 118 +++++++++++------- compiler/rustc_target/src/callconv/x86.rs | 14 ++- .../closure-inherit-target-feature.rs | 7 +- tests/assembly/x86-return-float.rs | 42 +++---- tests/codegen/abi-x86-sse.rs | 36 ++++++ tests/codegen/float/f128.rs | 82 ++++++++---- tests/codegen/float/f16.rs | 43 ++++--- tests/codegen/intrinsics/transmute-x64.rs | 9 -- tests/codegen/issues/issue-32031.rs | 6 +- .../simd-intrinsic-transmute-array.rs | 17 ++- tests/codegen/simd/packed-simd.rs | 25 ++-- tests/codegen/union-abi.rs | 16 ++- ...e-abi-checks.rs => sse-simd-abi-checks.rs} | 5 +- ...ecks.stderr => sse-simd-abi-checks.stderr} | 4 +- 14 files changed, 273 insertions(+), 151 deletions(-) create mode 100644 tests/codegen/abi-x86-sse.rs rename tests/ui/{sse-abi-checks.rs => sse-simd-abi-checks.rs} (82%) rename tests/ui/{sse-abi-checks.stderr => sse-simd-abi-checks.stderr} (94%) diff --git a/compiler/rustc_target/src/callconv/mod.rs b/compiler/rustc_target/src/callconv/mod.rs index 2bde105562218..c2a176facdfa1 100644 --- a/compiler/rustc_target/src/callconv/mod.rs +++ b/compiler/rustc_target/src/callconv/mod.rs @@ -7,7 +7,7 @@ use rustc_abi::{ }; use rustc_macros::HashStable_Generic; -use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi}; +use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi}; mod aarch64; mod amdgpu; @@ -386,6 +386,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> { /// Pass this argument directly instead. Should NOT be used! /// Only exists because of past ABI mistakes that will take time to fix /// (see ). + #[track_caller] pub fn make_direct_deprecated(&mut self) { match self.mode { PassMode::Indirect { .. } => { @@ -398,6 +399,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> { /// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead. /// This is valid for both sized and unsized arguments. + #[track_caller] pub fn make_indirect(&mut self) { match self.mode { PassMode::Direct(_) | PassMode::Pair(_, _) => { @@ -412,6 +414,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> { /// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass /// ZSTs indirectly. + #[track_caller] pub fn make_indirect_from_ignore(&mut self) { match self.mode { PassMode::Ignore => { @@ -716,7 +719,7 @@ impl<'a, Ty> FnAbi<'a, Ty> { C: HasDataLayout + HasTargetSpec, { let spec = cx.target_spec(); - match &spec.arch[..] { + match &*spec.arch { "x86" => x86::compute_rust_abi_info(cx, self, abi), "riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi), "loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi), @@ -724,6 +727,22 @@ impl<'a, Ty> FnAbi<'a, Ty> { _ => {} }; + // Decides whether we can pass the given SIMD argument via `PassMode::Direct`. + // May only return `true` if the target will always pass those arguments the same way, + // no matter what the user does with `-Ctarget-feature`! In other words, whatever + // target features are required to pass a SIMD value in registers must be listed in + // the `abi_required_features` for the current target and ABI. + let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch { + // On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up + // to 128-bit-sized vectors. + "x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128, + "x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => { + arg.layout.size.bits() <= 128 + } + // So far, we haven't implemented this logic for any other target. + _ => false, + }; + for (arg_idx, arg) in self .args .iter_mut() @@ -731,12 +750,15 @@ impl<'a, Ty> FnAbi<'a, Ty> { .map(|(idx, arg)| (Some(idx), arg)) .chain(iter::once((None, &mut self.ret))) { - if arg.is_ignore() { + // If the logic above already picked a specific type to cast the argument to, leave that + // in place. + if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) { continue; } if arg_idx.is_none() && arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2 + && !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. }) { // Return values larger than 2 registers using a return area // pointer. LLVM and Cranelift disagree about how to return @@ -746,7 +768,8 @@ impl<'a, Ty> FnAbi<'a, Ty> { // return value independently and decide to pass it in a // register or not, which would result in the return value // being passed partially in registers and partially through a - // return area pointer. + // return area pointer. For large IR-level values such as `i128`, + // cranelift will even split up the value into smaller chunks. // // While Cranelift may need to be fixed as the LLVM behavior is // generally more correct with respect to the surface language, @@ -776,53 +799,60 @@ impl<'a, Ty> FnAbi<'a, Ty> { // rustc_target already ensure any return value which doesn't // fit in the available amount of return registers is passed in // the right way for the current target. + // + // The adjustment is not necessary nor desired for types with a vector + // representation; those are handled below. arg.make_indirect(); continue; } match arg.layout.backend_repr { - BackendRepr::Memory { .. } => {} - - // This is a fun case! The gist of what this is doing is - // that we want callers and callees to always agree on the - // ABI of how they pass SIMD arguments. If we were to *not* - // make these arguments indirect then they'd be immediates - // in LLVM, which means that they'd used whatever the - // appropriate ABI is for the callee and the caller. That - // means, for example, if the caller doesn't have AVX - // enabled but the callee does, then passing an AVX argument - // across this boundary would cause corrupt data to show up. - // - // This problem is fixed by unconditionally passing SIMD - // arguments through memory between callers and callees - // which should get them all to agree on ABI regardless of - // target feature sets. Some more information about this - // issue can be found in #44367. - // - // Note that the intrinsic ABI is exempt here as - // that's how we connect up to LLVM and it's unstable - // anyway, we control all calls to it in libstd. - BackendRepr::Vector { .. } - if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect => - { - arg.make_indirect(); - continue; + BackendRepr::Memory { .. } => { + // Compute `Aggregate` ABI. + + let is_indirect_not_on_stack = + matches!(arg.mode, PassMode::Indirect { on_stack: false, .. }); + assert!(is_indirect_not_on_stack); + + let size = arg.layout.size; + if arg.layout.is_sized() + && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) + { + // We want to pass small aggregates as immediates, but using + // an LLVM aggregate type for this leads to bad optimizations, + // so we pick an appropriately sized integer type instead. + arg.cast_to(Reg { kind: RegKind::Integer, size }); + } } - _ => continue, - } - // Compute `Aggregate` ABI. - - let is_indirect_not_on_stack = - matches!(arg.mode, PassMode::Indirect { on_stack: false, .. }); - assert!(is_indirect_not_on_stack); - - let size = arg.layout.size; - if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) { - // We want to pass small aggregates as immediates, but using - // an LLVM aggregate type for this leads to bad optimizations, - // so we pick an appropriately sized integer type instead. - arg.cast_to(Reg { kind: RegKind::Integer, size }); + BackendRepr::Vector { .. } => { + // This is a fun case! The gist of what this is doing is + // that we want callers and callees to always agree on the + // ABI of how they pass SIMD arguments. If we were to *not* + // make these arguments indirect then they'd be immediates + // in LLVM, which means that they'd used whatever the + // appropriate ABI is for the callee and the caller. That + // means, for example, if the caller doesn't have AVX + // enabled but the callee does, then passing an AVX argument + // across this boundary would cause corrupt data to show up. + // + // This problem is fixed by unconditionally passing SIMD + // arguments through memory between callers and callees + // which should get them all to agree on ABI regardless of + // target feature sets. Some more information about this + // issue can be found in #44367. + // + // Note that the intrinsic ABI is exempt here as those are not + // real functions anyway, and the backend expects very specific types. + if abi != ExternAbi::RustIntrinsic + && spec.simd_types_indirect + && !can_pass_simd_directly(arg) + { + arg.make_indirect(); + } + } + + _ => {} } } } diff --git a/compiler/rustc_target/src/callconv/x86.rs b/compiler/rustc_target/src/callconv/x86.rs index 5b9414536d8ca..5f4f4cd57f654 100644 --- a/compiler/rustc_target/src/callconv/x86.rs +++ b/compiler/rustc_target/src/callconv/x86.rs @@ -4,7 +4,7 @@ use rustc_abi::{ }; use crate::callconv::{ArgAttribute, FnAbi, PassMode}; -use crate::spec::HasTargetSpec; +use crate::spec::{HasTargetSpec, RustcAbi}; #[derive(PartialEq)] pub(crate) enum Flavor { @@ -236,8 +236,16 @@ where _ => false, // anyway not passed via registers on x86 }; if has_float { - if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) { - // Same size or smaller than pointer, return in a register. + if cx.target_spec().rustc_abi == Some(RustcAbi::X86Sse2) + && fn_abi.ret.layout.backend_repr.is_scalar() + && fn_abi.ret.layout.size.bits() <= 128 + { + // This is a single scalar that fits into an SSE register, and the target uses the + // SSE ABI. We prefer this over integer registers as float scalars need to be in SSE + // registers for float operations, so that's the best place to pass them around. + fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size }); + } else if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) { + // Same size or smaller than pointer, return in an integer register. fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size }); } else { // Larger than a pointer, return indirectly. diff --git a/tests/assembly/closure-inherit-target-feature.rs b/tests/assembly/closure-inherit-target-feature.rs index b629d8769edf2..069204bbd345f 100644 --- a/tests/assembly/closure-inherit-target-feature.rs +++ b/tests/assembly/closure-inherit-target-feature.rs @@ -8,8 +8,9 @@ use std::arch::x86_64::{__m128, _mm_blend_ps}; +// Use an explicit return pointer to prevent tail call optimization. #[no_mangle] -pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 { +pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128, ret: *mut __m128) { let f = { // check that _mm_blend_ps is not being inlined into the closure // CHECK-LABEL: {{sse41_blend_nofeature.*closure.*:}} @@ -18,9 +19,9 @@ pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 { // CHECK-NOT: blendps // CHECK: ret #[inline(never)] - |x, y| _mm_blend_ps(x, y, 0b0101) + |x, y, ret: *mut __m128| unsafe { *ret = _mm_blend_ps(x, y, 0b0101) } }; - f(x, y) + f(x, y, ret); } #[no_mangle] diff --git a/tests/assembly/x86-return-float.rs b/tests/assembly/x86-return-float.rs index ad760627b3a36..2c39c830684ec 100644 --- a/tests/assembly/x86-return-float.rs +++ b/tests/assembly/x86-return-float.rs @@ -33,19 +33,18 @@ use minicore::*; // CHECK-LABEL: return_f32: #[no_mangle] pub fn return_f32(x: f32) -> f32 { - // CHECK: movl {{.*}}(%ebp), %eax - // CHECK-NOT: ax - // CHECK: retl + // CHECK: movss {{.*}}(%ebp), %xmm0 + // CHECK-NEXT: popl %ebp + // CHECK-NEXT: retl x } // CHECK-LABEL: return_f64: #[no_mangle] pub fn return_f64(x: f64) -> f64 { - // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]] - // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]] - // CHECK-NEXT: movsd %[[VAL]], (%[[PTR]]) - // CHECK: retl + // CHECK: movsd {{.*}}(%ebp), %xmm0 + // CHECK-NEXT: popl %ebp + // CHECK-NEXT: retl x } @@ -157,7 +156,7 @@ pub unsafe fn call_f32(x: &mut f32) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f32 - // CHECK-NEXT: movl %eax, (%[[PTR]]) + // CHECK-NEXT: movss %xmm0, (%[[PTR]]) *x = get_f32(); } @@ -169,8 +168,7 @@ pub unsafe fn call_f64(x: &mut f64) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f64 - // CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]] - // CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]]) + // CHECK-NEXT: movlps %xmm0, (%[[PTR]]) *x = get_f64(); } @@ -315,25 +313,21 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) { #[no_mangle] pub fn return_f16(x: f16) -> f16 { // CHECK: pushl %ebp - // CHECK: movl %esp, %ebp - // CHECK: movzwl 8(%ebp), %eax - // CHECK: popl %ebp - // CHECK: retl + // CHECK-NEXT: movl %esp, %ebp + // CHECK-NEXT: pinsrw $0, 8(%ebp), %xmm0 + // CHECK-NEXT: popl %ebp + // CHECK-NEXT: retl x } // CHECK-LABEL: return_f128: #[no_mangle] pub fn return_f128(x: f128) -> f128 { - // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]] - // CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]]) - // CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]]) - // CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]]) - // CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]]) - // CHECK: retl + // CHECK: pushl %ebp + // CHECK-NEXT: movl %esp, %ebp + // linux-NEXT: movaps 8(%ebp), %xmm0 + // win-NEXT: movups 8(%ebp), %xmm0 + // CHECK-NEXT: popl %ebp + // CHECK-NEXT: retl x } diff --git a/tests/codegen/abi-x86-sse.rs b/tests/codegen/abi-x86-sse.rs new file mode 100644 index 0000000000000..837bf6134b01c --- /dev/null +++ b/tests/codegen/abi-x86-sse.rs @@ -0,0 +1,36 @@ +//@ compile-flags: -Z merge-functions=disabled + +//@ revisions: x86-64 +//@[x86-64] compile-flags: --target x86_64-unknown-linux-gnu +//@[x86-64] needs-llvm-components: x86 + +//@ revisions: x86-32 +//@[x86-32] compile-flags: --target i686-unknown-linux-gnu +//@[x86-32] needs-llvm-components: x86 + +//@ revisions: x86-32-nosse +//@[x86-32-nosse] compile-flags: --target i586-unknown-linux-gnu +//@[x86-32-nosse] needs-llvm-components: x86 + +#![feature(no_core, lang_items, rustc_attrs, repr_simd)] +#![no_core] +#![crate_type = "lib"] + +#[lang = "sized"] +trait Sized {} + +#[lang = "copy"] +trait Copy {} + +// Ensure this type is passed without ptr indirection on targets that +// require SSE2. +#[repr(simd)] +pub struct Sse([f32; 4]); + +// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}}) +// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}}) +// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}}) +#[no_mangle] +pub fn sse_id(x: Sse) -> Sse { + x +} diff --git a/tests/codegen/float/f128.rs b/tests/codegen/float/f128.rs index 562a8e6c9e9ba..d87bab1172a24 100644 --- a/tests/codegen/float/f128.rs +++ b/tests/codegen/float/f128.rs @@ -1,8 +1,11 @@ // 32-bit x86 returns float types differently to avoid the x87 stack. // 32-bit systems will return 128bit values using a return area pointer. // Emscripten aligns f128 to 8 bytes, not 16. -//@ revisions: x86 bit32 bit64 emscripten -//@[x86] only-x86 +//@ revisions: x86-sse x86-nosse bit32 bit64 emscripten +//@[x86-sse] only-x86 +//@[x86-sse] only-rustc_abi-x86-sse2 +//@[x86-nosse] only-x86 +//@[x86-nosse] ignore-rustc_abi-x86-sse2 //@[bit32] ignore-x86 //@[bit32] ignore-emscripten //@[bit32] only-32bit @@ -60,7 +63,8 @@ pub fn f128_le(a: f128, b: f128) -> bool { a <= b } -// x86-LABEL: void @f128_neg({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_neg({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_neg(fp128 // bit32-LABEL: void @f128_neg({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_neg( // emscripten-LABEL: void @f128_neg({{.*}}sret([16 x i8]) @@ -70,7 +74,8 @@ pub fn f128_neg(a: f128) -> f128 { -a } -// x86-LABEL: void @f128_add({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_add({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_add(fp128 // bit32-LABEL: void @f128_add({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_add( // emscripten-LABEL: void @f128_add({{.*}}sret([16 x i8]) @@ -80,7 +85,8 @@ pub fn f128_add(a: f128, b: f128) -> f128 { a + b } -// x86-LABEL: void @f128_sub({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_sub({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_sub(fp128 // bit32-LABEL: void @f128_sub({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_sub( // emscripten-LABEL: void @f128_sub({{.*}}sret([16 x i8]) @@ -90,7 +96,8 @@ pub fn f128_sub(a: f128, b: f128) -> f128 { a - b } -// x86-LABEL: void @f128_mul({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_mul({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_mul(fp128 // bit32-LABEL: void @f128_mul({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_mul( // emscripten-LABEL: void @f128_mul({{.*}}sret([16 x i8]) @@ -100,7 +107,8 @@ pub fn f128_mul(a: f128, b: f128) -> f128 { a * b } -// x86-LABEL: void @f128_div({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_div({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_div(fp128 // bit32-LABEL: void @f128_div({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_div( // emscripten-LABEL: void @f128_div({{.*}}sret([16 x i8]) @@ -110,7 +118,8 @@ pub fn f128_div(a: f128, b: f128) -> f128 { a / b } -// x86-LABEL: void @f128_rem({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_rem({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_rem(fp128 // bit32-LABEL: void @f128_rem({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_rem( // emscripten-LABEL: void @f128_rem({{.*}}sret([16 x i8]) @@ -162,7 +171,8 @@ pub fn f128_rem_assign(a: &mut f128, b: f128) { /* float to float conversions */ -// x86-LABEL: i16 @f128_as_f16( +// x86-sse-LABEL: <2 x i8> @f128_as_f16( +// x86-nosse-LABEL: i16 @f128_as_f16( // bits32-LABEL: half @f128_as_f16( // bits64-LABEL: half @f128_as_f16( #[no_mangle] @@ -171,7 +181,8 @@ pub fn f128_as_f16(a: f128) -> f16 { a as f16 } -// x86-LABEL: i32 @f128_as_f32( +// x86-sse-LABEL: <4 x i8> @f128_as_f32( +// x86-nosse-LABEL: i32 @f128_as_f32( // bit32-LABEL: float @f128_as_f32( // bit64-LABEL: float @f128_as_f32( // emscripten-LABEL: float @f128_as_f32( @@ -181,7 +192,8 @@ pub fn f128_as_f32(a: f128) -> f32 { a as f32 } -// x86-LABEL: void @f128_as_f64( +// x86-sse-LABEL: <8 x i8> @f128_as_f64( +// x86-nosse-LABEL: void @f128_as_f64({{.*}}sret([8 x i8]) // bit32-LABEL: double @f128_as_f64( // bit64-LABEL: double @f128_as_f64( // emscripten-LABEL: double @f128_as_f64( @@ -191,7 +203,8 @@ pub fn f128_as_f64(a: f128) -> f64 { a as f64 } -// x86-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_as_self( +// x86-nosse-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) // bit32-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_as_self( // emscripten-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) @@ -204,7 +217,8 @@ pub fn f128_as_self(a: f128) -> f128 { a as f128 } -// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f16_as_f128( +// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f16_as_f128( // emscripten-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) @@ -214,7 +228,8 @@ pub fn f16_as_f128(a: f16) -> f128 { a as f128 } -// x86-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f32_as_f128( +// x86-nosse-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f32_as_f128( // emscripten-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) @@ -224,7 +239,8 @@ pub fn f32_as_f128(a: f32) -> f128 { a as f128 } -// x86-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f64_as_f128( +// x86-nosse-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f64_as_f128( // emscripten-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) @@ -263,7 +279,8 @@ pub fn f128_as_u64(a: f128) -> u64 { a as u64 } -// x86-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f128_as_u128( // emscripten-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) @@ -300,7 +317,8 @@ pub fn f128_as_i64(a: f128) -> i64 { a as i64 } -// x86-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f128_as_i128( // emscripten-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) @@ -312,7 +330,8 @@ pub fn f128_as_i128(a: f128) -> i128 { /* int to float conversions */ -// x86-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u8_as_f128( +// x86-nosse-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u8_as_f128( // emscripten-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) @@ -322,7 +341,8 @@ pub fn u8_as_f128(a: u8) -> f128 { a as f128 } -// x86-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u16_as_f128( +// x86-nosse-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u16_as_f128( // emscripten-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) @@ -332,7 +352,8 @@ pub fn u16_as_f128(a: u16) -> f128 { a as f128 } -// x86-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u32_as_f128( +// x86-nosse-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u32_as_f128( // emscripten-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) @@ -342,7 +363,8 @@ pub fn u32_as_f128(a: u32) -> f128 { a as f128 } -// x86-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u64_as_f128( +// x86-nosse-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u64_as_f128( // emscripten-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) @@ -352,7 +374,8 @@ pub fn u64_as_f128(a: u64) -> f128 { a as f128 } -// x86-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u128_as_f128( +// x86-nosse-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u128_as_f128( // emscripten-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) @@ -362,7 +385,8 @@ pub fn u128_as_f128(a: u128) -> f128 { a as f128 } -// x86-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i8_as_f128( +// x86-nosse-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i8_as_f128( // emscripten-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) @@ -372,7 +396,8 @@ pub fn i8_as_f128(a: i8) -> f128 { a as f128 } -// x86-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i16_as_f128( +// x86-nosse-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i16_as_f128( // emscripten-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) @@ -382,7 +407,8 @@ pub fn i16_as_f128(a: i16) -> f128 { a as f128 } -// x86-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i32_as_f128( +// x86-nosse-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i32_as_f128( // emscripten-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) @@ -392,7 +418,8 @@ pub fn i32_as_f128(a: i32) -> f128 { a as f128 } -// x86-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i64_as_f128( +// x86-nosse-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i64_as_f128( // emscripten-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) @@ -402,7 +429,8 @@ pub fn i64_as_f128(a: i64) -> f128 { a as f128 } -// x86-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i128_as_f128( +// x86-nosse-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i128_as_f128( // emscripten-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) diff --git a/tests/codegen/float/f16.rs b/tests/codegen/float/f16.rs index 5c3a5893b9d23..0c40606ad8a42 100644 --- a/tests/codegen/float/f16.rs +++ b/tests/codegen/float/f16.rs @@ -1,7 +1,10 @@ // 32-bit x86 returns float types differently to avoid the x87 stack. // 32-bit systems will return 128bit values using a return area pointer. -//@ revisions: x86 bit32 bit64 -//@[x86] only-x86 +//@ revisions: x86-sse x86-nosse bit32 bit64 +//@[x86-sse] only-x86 +//@[x86-sse] only-rustc_abi-x86-sse2 +//@[x86-nosse] only-x86 +//@[x86-nosse] ignore-rustc_abi-x86-sse2 //@[bit32] ignore-x86 //@[bit32] only-32bit //@[bit64] ignore-x86 @@ -59,8 +62,10 @@ pub fn f16_le(a: f16, b: f16) -> bool { } // This is where we check the argument and return ABI for f16. -// other-LABEL: half @f16_neg(half -// x86-LABEL: i16 @f16_neg(half +// bit32-LABEL: half @f16_neg(half +// bit64-LABEL: half @f16_neg(half +// x86-sse-LABEL: <2 x i8> @f16_neg(half +// x86-nosse-LABEL: i16 @f16_neg(half #[no_mangle] pub fn f16_neg(a: f16) -> f16 { // CHECK: fneg half %{{.+}} @@ -144,17 +149,23 @@ pub fn f16_rem_assign(a: &mut f16, b: f16) { /* float to float conversions */ -// other-LABEL: half @f16_as_self( -// x86-LABEL: i16 @f16_as_self( +// bit32-LABEL: half @f16_as_self( +// bit64-LABEL: half @f16_as_self( +// x86-sse-LABEL: <2 x i8> @f16_as_self( +// x86-nosse-LABEL: i16 @f16_as_self( #[no_mangle] pub fn f16_as_self(a: f16) -> f16 { - // other-CHECK: ret half %{{.+}} - // x86-CHECK: bitcast half - // x86-CHECK: ret i16 + // bit32-CHECK: ret half %{{.+}} + // bit64-CHECK: ret half %{{.+}} + // x86-sse-CHECK: bitcast half + // x86-nosse-CHECK: bitcast half + // x86-sse-CHECK: ret i16 + // x86-nosse-CHECK: ret i16 a as f16 } -// x86-LABEL: i32 @f16_as_f32( +// x86-sse-LABEL: <4 x i8> @f16_as_f32( +// x86-nosse-LABEL: i32 @f16_as_f32( // bit32-LABEL: float @f16_as_f32( // bit64-LABEL: float @f16_as_f32( #[no_mangle] @@ -163,7 +174,8 @@ pub fn f16_as_f32(a: f16) -> f32 { a as f32 } -// x86-LABEL: void @f16_as_f64( +// x86-sse-LABEL: <8 x i8> @f16_as_f64( +// x86-nosse-LABEL: void @f16_as_f64({{.*}}sret([8 x i8]) // bit32-LABEL: double @f16_as_f64( // bit64-LABEL: double @f16_as_f64( #[no_mangle] @@ -172,7 +184,8 @@ pub fn f16_as_f64(a: f16) -> f64 { a as f64 } -// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f16_as_f128( +// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f16_as_f128( #[no_mangle] @@ -231,7 +244,8 @@ pub fn f16_as_u64(a: f16) -> u64 { a as u64 } -// x86-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f16_as_u128( #[no_mangle] @@ -267,7 +281,8 @@ pub fn f16_as_i64(a: f16) -> i64 { a as i64 } -// x86-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f16_as_i128( #[no_mangle] diff --git a/tests/codegen/intrinsics/transmute-x64.rs b/tests/codegen/intrinsics/transmute-x64.rs index fe68f18366701..be45e4db90fd9 100644 --- a/tests/codegen/intrinsics/transmute-x64.rs +++ b/tests/codegen/intrinsics/transmute-x64.rs @@ -6,15 +6,6 @@ use std::arch::x86_64::{__m128, __m128i, __m256i}; use std::mem::transmute; -// CHECK-LABEL: @check_sse_float_to_int( -#[no_mangle] -pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i { - // CHECK-NOT: alloca - // CHECK: %0 = load <4 x float>, ptr %x, align 16 - // CHECK: store <4 x float> %0, ptr %_0, align 16 - transmute(x) -} - // CHECK-LABEL: @check_sse_pair_to_avx( #[no_mangle] pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i { diff --git a/tests/codegen/issues/issue-32031.rs b/tests/codegen/issues/issue-32031.rs index 4d6895166f1cb..559e8d947fba5 100644 --- a/tests/codegen/issues/issue-32031.rs +++ b/tests/codegen/issues/issue-32031.rs @@ -1,7 +1,7 @@ //@ compile-flags: -C no-prepopulate-passes -Copt-level=0 // 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack. //@ revisions: x86 other -//@[x86] only-x86 +//@[x86] only-rustc_abi-x86-sse2 //@[other] ignore-x86 #![crate_type = "lib"] @@ -10,7 +10,7 @@ pub struct F32(f32); // other: define{{.*}}float @add_newtype_f32(float %a, float %b) -// x86: define{{.*}}i32 @add_newtype_f32(float %a, float %b) +// x86: define{{.*}}<4 x i8> @add_newtype_f32(float %a, float %b) #[inline(never)] #[no_mangle] pub fn add_newtype_f32(a: F32, b: F32) -> F32 { @@ -21,7 +21,7 @@ pub fn add_newtype_f32(a: F32, b: F32) -> F32 { pub struct F64(f64); // other: define{{.*}}double @add_newtype_f64(double %a, double %b) -// x86: define{{.*}}void @add_newtype_f64(ptr{{.*}}sret([8 x i8]){{.*}}%_0, double %a, double %b) +// x86: define{{.*}}<8 x i8> @add_newtype_f64(double %a, double %b) #[inline(never)] #[no_mangle] pub fn add_newtype_f64(a: F64, b: F64) -> F64 { diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs index 75f989d6e12c4..0d21d51055761 100644 --- a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs +++ b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs @@ -1,5 +1,14 @@ // //@ compile-flags: -C no-prepopulate-passes +// LLVM IR isn't very portable and the one tested here depends on the ABI +// which is different between x86 (where we use SSE registers) and others. +// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support +// taking the union of multiple `only` annotations. +//@ revisions: x86-64 x86-32-sse2 other +//@[x86-64] only-x86_64 +//@[x86-32-sse2] only-rustc_abi-x86-sse2 +//@[other] ignore-rustc_abi-x86-sse2 +//@[other] ignore-x86_64 #![crate_type = "lib"] #![allow(non_camel_case_types)] @@ -38,7 +47,9 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> { #[no_mangle] pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> { // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] - // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] + // x86-32: ret <4 x float> %[[VAL:.+]] + // x86-64: ret <4 x float> %[[VAL:.+]] + // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] unsafe { std::mem::transmute(x) } } @@ -53,6 +64,8 @@ pub fn build_array_t(x: [f32; 4]) -> T { #[no_mangle] pub fn build_array_transmute_t(x: [f32; 4]) -> T { // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] - // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] + // x86-32: ret <4 x float> %[[VAL:.+]] + // x86-64: ret <4 x float> %[[VAL:.+]] + // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] unsafe { std::mem::transmute(x) } } diff --git a/tests/codegen/simd/packed-simd.rs b/tests/codegen/simd/packed-simd.rs index 1df09c96e6cc0..a27d5e3af452a 100644 --- a/tests/codegen/simd/packed-simd.rs +++ b/tests/codegen/simd/packed-simd.rs @@ -1,4 +1,5 @@ //@ revisions:opt3 noopt +//@ only-x86_64 //@[opt3] compile-flags: -Copt-level=3 //@[noopt] compile-flags: -Cno-prepopulate-passes @@ -14,14 +15,14 @@ use core::{mem, ptr}; #[repr(simd, packed)] #[derive(Copy, Clone)] -pub struct Simd([T; N]); +pub struct PackedSimd([T; N]); #[repr(simd)] #[derive(Copy, Clone)] pub struct FullSimd([T; N]); // non-powers-of-two have padding and need to be expanded to full vectors -fn load(v: Simd) -> FullSimd { +fn load(v: PackedSimd) -> FullSimd { unsafe { let mut tmp = mem::MaybeUninit::>::uninit(); ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); @@ -29,18 +30,16 @@ fn load(v: Simd) -> FullSimd { } } -// CHECK-LABEL: square_packed_full -// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] -// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 +// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}}) #[no_mangle] -pub fn square_packed_full(x: Simd) -> FullSimd { - // CHECK-NEXT: start - // noopt: alloca [[RET_TYPE]], [[RET_ALIGN]] - // CHECK: load <3 x float> +pub fn square_packed_full(x: PackedSimd) -> FullSimd { + // The unoptimized version of this is not very interesting to check + // since `load` does not get inlined. + // opt3-NEXT: start: + // opt3-NEXT: load <3 x float> let x = load(x); - // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> - // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] - // CHECK-NEXT: ret void + // opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> + // opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]] unsafe { intrinsics::simd_mul(x, x) } } @@ -48,7 +47,7 @@ pub fn square_packed_full(x: Simd) -> FullSimd { // CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] // CHECK-SAME: ptr{{[a-z_ ]*}} align 4 #[no_mangle] -pub fn square_packed(x: Simd) -> Simd { +pub fn square_packed(x: PackedSimd) -> PackedSimd { // CHECK-NEXT: start // CHECK-NEXT: load <3 x float> // noopt-NEXT: load <3 x float> diff --git a/tests/codegen/union-abi.rs b/tests/codegen/union-abi.rs index 92d40d8ac14cb..16022fad8af7b 100644 --- a/tests/codegen/union-abi.rs +++ b/tests/codegen/union-abi.rs @@ -2,8 +2,11 @@ //@ compile-flags: -Copt-level=3 -C no-prepopulate-passes // 32-bit x86 returns `f32` differently to avoid the x87 stack. // 32-bit systems will return 128bit values using a return area pointer. -//@ revisions: x86 bit32 bit64 -//@[x86] only-x86 +//@ revisions: x86-sse x86-nosse bit32 bit64 +//@[x86-sse] only-x86 +//@[x86-sse] only-rustc_abi-x86-sse2 +//@[x86-nosse] only-x86 +//@[x86-nosse] ignore-rustc_abi-x86-sse2 //@[bit32] ignore-x86 //@[bit32] only-32bit //@[bit64] ignore-x86 @@ -75,7 +78,8 @@ pub union UnionF32 { a: f32, } -// x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1) +// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32(float %_1) +// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32(float %_1) // bit32: define {{(dso_local )?}}float @test_UnionF32(float %_1) // bit64: define {{(dso_local )?}}float @test_UnionF32(float %_1) #[no_mangle] @@ -88,7 +92,8 @@ pub union UnionF32F32 { b: f32, } -// x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1) +// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32F32(float %_1) +// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1) // bit32: define {{(dso_local )?}}float @test_UnionF32F32(float %_1) // bit64: define {{(dso_local )?}}float @test_UnionF32F32(float %_1) #[no_mangle] @@ -110,7 +115,8 @@ pub fn test_UnionF32U32(_: UnionF32U32) -> UnionF32U32 { pub union UnionU128 { a: u128, } -// x86: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) +// x86-sse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) +// x86-nosse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) // bit32: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) // bit64: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1) #[no_mangle] diff --git a/tests/ui/sse-abi-checks.rs b/tests/ui/sse-simd-abi-checks.rs similarity index 82% rename from tests/ui/sse-abi-checks.rs rename to tests/ui/sse-simd-abi-checks.rs index cb3128a890f79..396e9bf131880 100644 --- a/tests/ui/sse-abi-checks.rs +++ b/tests/ui/sse-simd-abi-checks.rs @@ -1,7 +1,8 @@ //! Ensure we trigger abi_unsupported_vector_types for target features that are usually enabled -//! on a target, but disabled in this file via a `-C` flag. +//! on a target via the base CPU, but disabled in this file via a `-C` flag. +//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu +//@ compile-flags: -Ctarget-cpu=pentium4 -C target-feature=-sse,-sse2 //@ add-core-stubs -//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu -C target-feature=-sse,-sse2 //@ build-pass //@ ignore-pass (test emits codegen-time warnings) //@ needs-llvm-components: x86 diff --git a/tests/ui/sse-abi-checks.stderr b/tests/ui/sse-simd-abi-checks.stderr similarity index 94% rename from tests/ui/sse-abi-checks.stderr rename to tests/ui/sse-simd-abi-checks.stderr index 9944aa093b4ed..95486f480d256 100644 --- a/tests/ui/sse-abi-checks.stderr +++ b/tests/ui/sse-simd-abi-checks.stderr @@ -1,5 +1,5 @@ warning: this function definition uses SIMD vector type `SseVector` which (with the chosen ABI) requires the `sse` target feature, which is not enabled - --> $DIR/sse-abi-checks.rs:19:1 + --> $DIR/sse-simd-abi-checks.rs:20:1 | LL | pub unsafe extern "C" fn f(_: SseVector) { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ function defined here @@ -13,7 +13,7 @@ warning: 1 warning emitted Future incompatibility report: Future breakage diagnostic: warning: this function definition uses SIMD vector type `SseVector` which (with the chosen ABI) requires the `sse` target feature, which is not enabled - --> $DIR/sse-abi-checks.rs:19:1 + --> $DIR/sse-simd-abi-checks.rs:20:1 | LL | pub unsafe extern "C" fn f(_: SseVector) { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ function defined here