[SystemZ] ABI support for single-element vector types

Support passing and returning values of single-element vector types (i.e. <1 x i128> and <1 x fp128>). Now that i128 is a legal type, supporting these types can be done simply by providing a getRegisterTypeForCallingConv implementation that handles them. Fixes llvm#61291
topperc · Dec 15, 2023 · 59f7f35 · 59f7f35
1 parent 7113c80
commit 59f7f35
Show file tree

Hide file tree

Showing 17 changed files with 195 additions and 120 deletions.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1429,24 +1429,6 @@ bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   return CI->isTailCall();
 }
 
-// We do not yet support 128-bit single-element vector types.  If the user
-// attempts to use such types as function argument or return type, prefer
-// to error out instead of emitting code violating the ABI.
-static void VerifyVectorType(MVT VT, EVT ArgVT) {
-  if (ArgVT.isVector() && !VT.isVector())
-    report_fatal_error("Unsupported vector argument or return type");
-}
-
-static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
-  for (unsigned i = 0; i < Ins.size(); ++i)
-    VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
-}
-
-static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
-  for (unsigned i = 0; i < Outs.size(); ++i)
-    VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
-}
-
 // Value is a value that has been passed to us in the location described by VA
 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
 // any loads onto Chain.
@@ -1586,10 +1568,6 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
   auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
-  // Detect unsupported vector argument types.
-  if (Subtarget.hasVector())
-    VerifyVectorTypes(Ins);
-
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
@@ -1890,12 +1868,6 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
   if (Subtarget.isTargetXPLINK64())
     IsTailCall = false;
 
-  // Detect unsupported vector argument and return types.
-  if (Subtarget.hasVector()) {
-    VerifyVectorTypes(Outs);
-    VerifyVectorTypes(Ins);
-  }
-
   // Analyze the operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
@@ -2139,10 +2111,6 @@ CanLowerReturn(CallingConv::ID CallConv,
                MachineFunction &MF, bool isVarArg,
                const SmallVectorImpl<ISD::OutputArg> &Outs,
                LLVMContext &Context) const {
-  // Detect unsupported vector return types.
-  if (Subtarget.hasVector())
-    VerifyVectorTypes(Outs);
-
   // Special case that we cannot easily detect in RetCC_SystemZ since
   // i128 may not be a legal type.
   for (auto &Out : Outs)
@@ -2162,10 +2130,6 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                    const SDLoc &DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
-  // Detect unsupported vector return types.
-  if (Subtarget.hasVector())
-    VerifyVectorTypes(Outs);
-
   // Assign locations to each returned value.
   SmallVector<CCValAssign, 16> RetLocs;
   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -443,6 +443,15 @@ class SystemZTargetLowering : public TargetLowering {
       return 1;
     return TargetLowering::getNumRegisters(Context, VT);
   }
+  MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+                                    EVT VT) const override {
+    // 128-bit single-element vector types are passed like other vectors,
+    // not like their element type.
+    if (VT.isVector() && VT.getSizeInBits() == 128 &&
+        VT.getVectorNumElements() == 1)
+      return MVT::v16i8;
+    return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+  }
   bool isCheapToSpeculateCtlz(Type *) const override { return true; }
   bool isCheapToSpeculateCttz(Type *) const override { return true; }
   bool preferZeroCompareBranch() const override { return true; }

diff --git a/llvm/test/CodeGen/SystemZ/vec-add-01.ll b/llvm/test/CodeGen/SystemZ/vec-add-01.ll
@@ -58,3 +58,12 @@ define double @f6(<2 x double> %val1, <2 x double> %val2) {
   %ret = fadd double %scalar1, %scalar2
   ret double %ret
 }
+
+; Test a v1i128 addition.
+define <1 x i128> @f7(<1 x i128> %dummy, <1 x i128> %val1, <1 x i128> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vaq %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = add <1 x i128> %val1, %val2
+  ret <1 x i128> %ret
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-add-02.ll b/llvm/test/CodeGen/SystemZ/vec-add-02.ll
@@ -22,3 +22,13 @@ define float @f2(<4 x float> %val1, <4 x float> %val2) {
   %ret = fadd float %scalar1, %scalar2
   ret float %ret
 }
+
+; Test a v1f128 addition.
+define <1 x fp128> @f3(<1 x fp128> %dummy, <1 x fp128> %val1,
+                       <1 x fp128> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfaxb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = fadd <1 x fp128> %val1, %val2
+  ret <1 x fp128> %ret
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-08.ll b/llvm/test/CodeGen/SystemZ/vec-args-08.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Verify that we handle single-element vector types correctly.
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define void @f1(<1 x i128> %a, ptr %ptr) {
+; CHECK-LABEL: f1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vst %v24, 0(%r2), 3
+; CHECK-NEXT:    br %r14
+  store <1 x i128> %a, ptr %ptr
+  ret void
+}
+
+define <1 x i128> @f2() {
+; CHECK-LABEL: f2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgbm %v24, 0
+; CHECK-NEXT:    br %r14
+  ret <1 x i128><i128 0>
+}
+
+declare void @bar3(<1 x i128>)
+
+define void @f3() {
+; CHECK-LABEL: f3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    vgbm %v24, 0
+; CHECK-NEXT:    brasl %r14, bar3@PLT
+; CHECK-NEXT:    lmg %r14, %r15, 272(%r15)
+; CHECK-NEXT:    br %r14
+  call void @bar3 (<1 x i128> <i128 0>)
+  ret void
+}
+
+declare <1 x i128> @bar4()
+
+define void @f4(ptr %ptr) {
+; CHECK-LABEL: f4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    brasl %r14, bar4@PLT
+; CHECK-NEXT:    vst %v24, 0(%r13), 3
+; CHECK-NEXT:    lmg %r13, %r15, 264(%r15)
+; CHECK-NEXT:    br %r14
+  %res = call <1 x i128> @bar4 ()
+  store <1 x i128> %res, ptr %ptr
+  ret void
+}
+
+define void @f5(<1 x fp128> %a, ptr %ptr) {
+; CHECK-LABEL: f5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vst %v24, 0(%r2), 3
+; CHECK-NEXT:    br %r14
+  store <1 x fp128> %a, ptr %ptr
+  ret void
+}
+
+define <1 x fp128> @f6() {
+; CHECK-LABEL: f6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lzxr %f0
+; CHECK-NEXT:    vmrhg %v24, %v0, %v2
+; CHECK-NEXT:    br %r14
+  ret <1 x fp128><fp128 0xL00000000000000000000000000000000>
+}
+
+declare void @bar7(<1 x fp128>)
+
+define void @f7() {
+; CHECK-LABEL: f7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    lzxr %f0
+; CHECK-NEXT:    vmrhg %v24, %v0, %v2
+; CHECK-NEXT:    brasl %r14, bar7@PLT
+; CHECK-NEXT:    lmg %r14, %r15, 272(%r15)
+; CHECK-NEXT:    br %r14
+  call void @bar7 (<1 x fp128> <fp128 0xL00000000000000000000000000000000>)
+  ret void
+}
+
+declare <1 x fp128> @bar8()
+
+define void @f8(ptr %ptr) {
+; CHECK-LABEL: f8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    lgr %r13, %r2
+; CHECK-NEXT:    brasl %r14, bar8@PLT
+; CHECK-NEXT:    vst %v24, 0(%r13), 3
+; CHECK-NEXT:    lmg %r13, %r15, 264(%r15)
+; CHECK-NEXT:    br %r14
+  %res = call <1 x fp128> @bar8 ()
+  store <1 x fp128> %res, ptr %ptr
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-01.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-01.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-02.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-02.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-03.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-03.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-04.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-04.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-05.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-05.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-06.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-06.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-07.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-07.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-args-error-08.ll b/llvm/test/CodeGen/SystemZ/vec-args-error-08.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-add-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-add-02.ll
@@ -4,6 +4,7 @@
 
 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
 declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <1 x fp128> @llvm.experimental.constrained.fadd.v1f128(<1 x fp128>, <1 x fp128>, metadata, metadata)
 
 ; Test a v4f32 addition.
 define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
@@ -31,3 +32,17 @@ define float @f2(<4 x float> %val1, <4 x float> %val2) strictfp {
                         metadata !"fpexcept.strict") strictfp
   ret float %ret
 }
+
+; Test a v1f128 addition.
+define <1 x fp128> @f3(<1 x fp128> %dummy, <1 x fp128> %val1,
+                       <1 x fp128> %val2) strictfp {
+; CHECK-LABEL: f3:
+; CHECK: wfaxb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <1 x fp128> @llvm.experimental.constrained.fadd.v1f128(
+                        <1 x fp128> %val1, <1 x fp128> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") strictfp
+  ret <1 x fp128> %ret
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-sub-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-sub-02.ll
@@ -4,6 +4,7 @@
 
 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
 declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <1 x fp128> @llvm.experimental.constrained.fsub.v1f128(<1 x fp128>, <1 x fp128>, metadata, metadata)
 
 ; Test a v4f32 subtraction.
 define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1,
@@ -32,4 +33,17 @@ define float @f7(<4 x float> %val1, <4 x float> %val2) #0 {
   ret float %ret
 }
 
+; Test a v1f128 subtraction.
+define <1 x fp128> @f8(<1 x fp128> %dummy, <1 x fp128> %val1,
+                       <1 x fp128> %val2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: wfsxb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <1 x fp128> @llvm.experimental.constrained.fsub.v1f128(
+                        <1 x fp128> %val1, <1 x fp128> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  ret <1 x fp128> %ret
+}
+
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll
@@ -145,3 +145,12 @@ define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) {
   %ret = fsub <2 x float> %val1, %val2
   ret <2 x float> %ret
 }
+
+; Test a v1i128 subtraction.
+define <1 x i128> @f15(<1 x i128> %dummy, <1 x i128> %val1, <1 x i128> %val2) {
+; CHECK-LABEL: f15:
+; CHECK: vsq %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = sub <1 x i128> %val1, %val2
+  ret <1 x i128> %ret
+}