From 485add723821cfecdd32976642a9f26adec56724 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee@gmail.com>
Date: Sun, 2 Jun 2024 14:09:14 -0700
Subject: [PATCH] Test codegen for repr(packed,simd) -> repr(simd)

---
 tests/codegen/simd/packed-simd.rs | 54 +++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 tests/codegen/simd/packed-simd.rs

diff --git a/tests/codegen/simd/packed-simd.rs b/tests/codegen/simd/packed-simd.rs
new file mode 100644
index 0000000000000..ad0d5eb6b40ad
--- /dev/null
+++ b/tests/codegen/simd/packed-simd.rs
@@ -0,0 +1,54 @@
+//@ revisions:opt3 noopt
+//@[opt3] compile-flags: -Copt-level=3
+//@[noopt] compile-flags: -Cno-prepopulate-passes
+
+#![crate_type = "lib"]
+#![no_std]
+#![feature(repr_simd, core_intrinsics)]
+use core::intrinsics::simd as intrinsics;
+use core::{mem, ptr};
+
+// Test codegen for not only "packed" but also "fully aligned" SIMD types, and conversion between
+// A repr(packed,simd) type with 3 elements can't exceed its element alignment,
+// whereas the same type as repr(simd) will instead have padding.
+
+#[repr(simd, packed)]
+pub struct Simd<T, const N: usize>([T; N]);
+
+#[repr(simd)]
+#[derive(Copy, Clone)]
+pub struct FullSimd<T, const N: usize>([T; N]);
+
+// non-powers-of-two have padding and need to be expanded to full vectors
+fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
+    unsafe {
+        let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit();
+        ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
+        tmp.assume_init()
+    }
+}
+
+// CHECK-LABEL: square_packed
+// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
+// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
+#[no_mangle]
+pub fn square_packed(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
+    // CHECK-NEXT: start
+    // We don't particularly care about the allocas and @llvm.lifetime.{start,end}
+    // These merely serve as documentation for our current codegen.
+    // noopt-DAG: alloca [[RET_TYPE]], [[RET_ALIGN]]
+    // noopt-DAG: alloca{{.*}} align 4
+    // noopt-NEXT: @llvm.lifetime.start
+    // noopt-DAG: @llvm.lifetime.start
+    // noopt-DAG: @llvm.memcpy.{{.*}}
+    // noopt-DAG: @llvm.memcpy.{{.*}}ptr align 4 %{{[a-z0-9_]+}}, ptr align 4 %{{[a-z0-9_]+}}
+    // With optimizations, everything up to here gets SRoA'd away
+    // CHECK-NEXT: load <3 x float>
+    let x = load(x);
+    // noopt-NEXT: @llvm.lifetime.end
+    // noopt-NEXT: @llvm.lifetime.end
+    // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
+    // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
+    // CHECK-NEXT: ret void
+    unsafe { intrinsics::simd_mul(x, x) }
+}