Skip to content

Commit

Permalink
Test codegen for repr(packed,simd) -> repr(simd)
Browse files Browse the repository at this point in the history
  • Loading branch information
workingjubilee committed Jun 2, 2024
1 parent eda9d7f commit 485add7
Showing 1 changed file with 54 additions and 0 deletions.
54 changes: 54 additions & 0 deletions tests/codegen/simd/packed-simd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//@ revisions:opt3 noopt
//@[opt3] compile-flags: -Copt-level=3
//@[noopt] compile-flags: -Cno-prepopulate-passes

#![crate_type = "lib"]
#![no_std]
#![feature(repr_simd, core_intrinsics)]
use core::intrinsics::simd as intrinsics;
use core::{mem, ptr};

// Test codegen for not only "packed" but also "fully aligned" SIMD types, and conversion between
// A repr(packed,simd) type with 3 elements can't exceed its element alignment,
// whereas the same type as repr(simd) will instead have padding.

#[repr(simd, packed)]
pub struct Simd<T, const N: usize>([T; N]);

#[repr(simd)]
#[derive(Copy, Clone)]
pub struct FullSimd<T, const N: usize>([T; N]);

// non-powers-of-two have padding and need to be expanded to full vectors
fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
unsafe {
let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit();
ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
tmp.assume_init()
}
}

// CHECK-LABEL: square_packed
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
#[no_mangle]
pub fn square_packed(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
// CHECK-NEXT: start
// We don't particularly care about the allocas and @llvm.lifetime.{start,end}
// These merely serve as documentation for our current codegen.
// noopt-DAG: alloca [[RET_TYPE]], [[RET_ALIGN]]
// noopt-DAG: alloca{{.*}} align 4
// noopt-NEXT: @llvm.lifetime.start
// noopt-DAG: @llvm.lifetime.start
// noopt-DAG: @llvm.memcpy.{{.*}}
// noopt-DAG: @llvm.memcpy.{{.*}}ptr align 4 %{{[a-z0-9_]+}}, ptr align 4 %{{[a-z0-9_]+}}
// With optimizations, everything up to here gets SRoA'd away
// CHECK-NEXT: load <3 x float>
let x = load(x);
// noopt-NEXT: @llvm.lifetime.end
// noopt-NEXT: @llvm.lifetime.end
// CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
// CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
// CHECK-NEXT: ret void
unsafe { intrinsics::simd_mul(x, x) }
}

0 comments on commit 485add7

Please sign in to comment.