Compiler's -S output and -save-temps=obj loses MVE feature which le…

…ads to assembler error for Arm V8.1m Cpus. At the moment clang is not generating the correct FPU directive for Arm V8.1m Mainline CPU's when assembled by -S. So when compiled with -S output and `-save-temps=obj` loses MVE feature which leads to assembler error. There is a change in the default FPU option that Arm V8.1-m CPUs defines and what the clang generates. For example, Clang generates fpv5-d16 for M85 and what defined in ARMTargetParser.def is FK_FP_ARMV8_FULLFP16_D16. This patch add cases for handling this FPU directive as well and thus makes clang generates the right FPU directives for Arm V8.1-m CPUs.
ARM-software · Sep 28, 2023 · e21b8fc · e21b8fc
1 parent ac09ed9
commit e21b8fc
Showing 1 changed file with 103 additions and 0 deletions.
diff --git a/patches/llvm-project.patch b/patches/llvm-project.patch
@@ -262,3 +262,106 @@ index 2f86f8e..9c866db 100644
 +- Match: -mfloat-abi=softfp
 +  Flags: [-mfloat-abi=soft]
 +...
+diff --git a/clang/test/CodeGen/arm-v8.1m-check-fpu.ll b/clang/test/CodeGen/arm-v8.1m-check-fpu.ll
+new file mode 100644
+index 000000000000..5cff0457a190
+--- /dev/null
++++ b/clang/test/CodeGen/arm-v8.1m-check-fpu.ll
+@@ -0,0 +1,55 @@
++; REQUIRES: arm-registered-target
++; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -save-temps=obj -S -o - %s | FileCheck %s
++; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -save-temps=obj -S -o - %s | FileCheck %s
++; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -O2 -c -mthumb -save-temps=obj %s
++; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -O2 -c -mthumb -save-temps=obj %s
++; CHECK: .fpu   fp-armv8-fullfp16-d16
++target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
++target triple = "thumbv8.1m.main-none-unknown-eabihf"
++
++%struct.dummy_t = type { float, float, float, float }
++
++define dso_local signext i8 @foo(ptr noundef %handle) #0 {
++entry:
++  %handle.addr = alloca ptr, align 4
++  store ptr %handle, ptr %handle.addr, align 4
++  %0 = load ptr, ptr %handle.addr, align 4
++  %a = getelementptr inbounds %struct.dummy_t, ptr %0, i32 0, i32 0
++  %1 = load float, ptr %a, align 4
++  %sub = fsub float 0x3F5439DE40000000, %1
++  %2 = load ptr, ptr %handle.addr, align 4
++  %a1 = getelementptr inbounds %struct.dummy_t, ptr %2, i32 0, i32 0
++  %3 = load float, ptr %a1, align 4
++  %4 = call float @llvm.fmuladd.f32(float 0x3F847AE140000000, float %sub, float %3)
++  store float %4, ptr %a1, align 4
++  %5 = load ptr, ptr %handle.addr, align 4
++  %b = getelementptr inbounds %struct.dummy_t, ptr %5, i32 0, i32 1
++  %6 = load float, ptr %b, align 4
++  %sub2 = fsub float 0x3F5439DE40000000, %6
++  %7 = load ptr, ptr %handle.addr, align 4
++  %b3 = getelementptr inbounds %struct.dummy_t, ptr %7, i32 0, i32 1
++  %8 = load float, ptr %b3, align 4
++  %9 = call float @llvm.fmuladd.f32(float 0x3F947AE140000000, float %sub2, float %8)
++  store float %9, ptr %b3, align 4
++  %10 = load ptr, ptr %handle.addr, align 4
++  %c = getelementptr inbounds %struct.dummy_t, ptr %10, i32 0, i32 2
++  %11 = load float, ptr %c, align 4
++  %sub4 = fsub float 0x3F5439DE40000000, %11
++  %12 = load ptr, ptr %handle.addr, align 4
++  %c5 = getelementptr inbounds %struct.dummy_t, ptr %12, i32 0, i32 2
++  %13 = load float, ptr %c5, align 4
++  %14 = call float @llvm.fmuladd.f32(float 0x3F9EB851E0000000, float %sub4, float %13)
++  store float %14, ptr %c5, align 4
++  %15 = load ptr, ptr %handle.addr, align 4
++  %d = getelementptr inbounds %struct.dummy_t, ptr %15, i32 0, i32 3
++  %16 = load float, ptr %d, align 4
++  %sub6 = fsub float 0x3F5439DE40000000, %16
++  %17 = load ptr, ptr %handle.addr, align 4
++  %d7 = getelementptr inbounds %struct.dummy_t, ptr %17, i32 0, i32 3
++  %18 = load float, ptr %d7, align 4
++  %19 = call float @llvm.fmuladd.f32(float 0x3FA47AE140000000, float %sub6, float %18)
++  store float %19, ptr %d7, align 4
++  ret i8 0
++}
++
++declare float @llvm.fmuladd.f32(float, float, float) #1
+diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+index 9c9af6068079..e7c37767c6ed 100644
+--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
++++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+@@ -996,6 +996,8 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
+
+   // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
+   // uses the FP_ARMV8_D16 build attribute.
++  case ARM::FK_FP_ARMV8_FULLFP16_SP_D16:
++  case ARM::FK_FP_ARMV8_FULLFP16_D16:
+   case ARM::FK_FPV5_SP_D16:
+   case ARM::FK_FPV5_D16:
+     S.setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8B,
+diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+index b65d1b24e63d..3b2c737664f3 100644
+--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
++++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+@@ -238,14 +238,18 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+                         ? ARMBuildAttrs::AllowNeonARMv8_1a
+                         : ARMBuildAttrs::AllowNeonARMv8);
+   } else {
+-    if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP))
++    if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP)) {
+       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+       // FPU, but there are two different names for it depending on the CPU.
+-      emitFPU(STI.hasFeature(ARM::FeatureD32)
+-                  ? ARM::FK_FP_ARMV8
+-                  : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
+-                                                      : ARM::FK_FPV5_SP_D16));
+-    else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
++      if (STI.hasFeature(ARM::FeatureD32))
++        emitFPU(ARM::FK_FP_ARMV8);
++      else if (STI.hasFeature(ARM::FeatureFP64))
++        emitFPU(STI.hasFeature(ARM::FeatureFullFP16)
++                    ? ARM::FK_FP_ARMV8_FULLFP16_D16
++                    : ARM::FK_FPV5_D16);
++      else
++        emitFPU(ARM::FK_FPV5_SP_D16);
++    } else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
+       emitFPU(STI.hasFeature(ARM::FeatureD32)
+                   ? ARM::FK_VFPV4
+                   : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16