Skip to content

Conversation

miloserdow
Copy link
Member

@miloserdow miloserdow commented Sep 23, 2025

Fix redundant AND/OR operations with all-true SVE predicates that were not being optimized.

Modified isAllActivePredicate to detect splat(i1 true) patterns and added InstCombine optimizations for sve.and_z and sve.orr_z intrinsics that apply logical identities:

  • svand_z(all-true, a, all-true) -> a
  • svorr_z(all-true, a, all-true) -> all-true

Fixes #160279.

Fix redundant AND/OR operations with all-true SVE predicates that were
not being optimized.

Modified isAllActivePredicate to detect splat(i1 true) patterns and
added InstCombine optimizations for sve.and_z and sve.orr_z intrinsics
that apply logical identities:
  - svand_z(all-true, a, all-true) -> a
  - svorr_z(all-true, a, all-true) -> all-true
@llvmbot llvmbot added backend:AArch64 llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Sep 23, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 23, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-transforms

Author: Vladimir Miloserdov (miloserdow)

Changes

Fix redundant AND/OR operations with all-true SVE predicates that were not being optimized.

Modified isAllActivePredicate to detect splat(i1 true) patterns and added InstCombine optimizations for sve.and_z and sve.orr_z intrinsics that apply logical identities:

  • svand_z(all-true, a, all-true) -> a
  • svorr_z(all-true, a, all-true) -> all-true

Full diff: https://github.com/llvm/llvm-project/pull/160408.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+39-2)
  • (added) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll (+80)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8c4b4f6e4d6de..57d69c356be28 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1587,8 +1587,21 @@ static bool isAllActivePredicate(Value *Pred) {
     if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
         cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
       Pred = UncastedPred;
-  auto *C = dyn_cast<Constant>(Pred);
-  return (C && C->isAllOnesValue());
+
+  // Also look through just convert.to.svbool if the input is an all-true splat
+  Value *ConvertArg;
+  if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+                      m_Value(ConvertArg))))
+    Pred = ConvertArg;
+  // Check for splat(i1 true) pattern used by svptrue intrinsics
+  if (auto *C = dyn_cast<Constant>(Pred)) {
+    if (C->isAllOnesValue())
+      return true;
+    if (auto *SplatVal = C->getSplatValue())
+      if (auto *CI = dyn_cast<ConstantInt>(SplatVal))
+        return CI->isOne();
+  }
+  return false;
 }
 
 // Simplify `V` by only considering the operations that affect active lanes.
@@ -2904,6 +2917,30 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVEUxt(IC, II, 32);
   case Intrinsic::aarch64_sme_in_streaming_mode:
     return instCombineInStreamingMode(IC, II);
+  case Intrinsic::aarch64_sve_and_z:
+  case Intrinsic::aarch64_sve_orr_z: {
+    Value *Pred = II.getArgOperand(0);
+    Value *Op1 = II.getArgOperand(1);
+    Value *Op2 = II.getArgOperand(2);
+    if (isAllActivePredicate(Pred)) {
+      if (IID == Intrinsic::aarch64_sve_and_z) {
+        // svand_z(all-true, a, all-true) -> a
+        if (isAllActivePredicate(Op2))
+          return IC.replaceInstUsesWith(II, Op1);
+        // svand_z(all-true, all-true, a) -> a
+        if (isAllActivePredicate(Op1))
+          return IC.replaceInstUsesWith(II, Op2);
+      } else { // orr_z
+        // svorr_z(all-true, a, all-true) -> all-true
+        if (isAllActivePredicate(Op2))
+          return IC.replaceInstUsesWith(II, Op2);
+        // svorr_z(all-true, all-true, a) -> all-true
+        if (isAllActivePredicate(Op1))
+          return IC.replaceInstUsesWith(II, Op1);
+      }
+    }
+    break;
+  }
   }
 
   return std::nullopt;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
new file mode 100644
index 0000000000000..9eff3acc12c99
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
@@ -0,0 +1,80 @@
+; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S -o - < %s | FileCheck %s
+;
+; Test AArch64-specific InstCombine optimizations for SVE logical operations
+; with all-true predicates.
+; - a AND true = a
+; - a OR true = true
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
+
+define <vscale x 16 x i1> @test_sve_and_z_all_true_right(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_all_true_right(
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[A:%.*]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_all_true_left(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_all_true_left(
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[A:%.*]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_orr_z_all_true_right(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_orr_z_all_true_right(
+; CHECK-NEXT:    [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[ALL_TRUE]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_orr_z_all_true_left(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_orr_z_all_true_left(
+; CHECK-NEXT:    [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[ALL_TRUE]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_original_bug_case(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %prev) {
+; CHECK-LABEL: @test_original_bug_case(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PREV:%.*]])
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP3]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP4]]
+  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %prev)
+  %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %2)
+  %4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %3)
+  %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %6 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %4, <vscale x 16 x i1> %5)
+  ret <vscale x 16 x i1> %6
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_not_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_not_all_true_predicate(
+; CHECK-NEXT:    [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[ALL_TRUE]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_no_all_true_operands(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @test_sve_and_z_no_all_true_operands(
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+  ret <vscale x 16 x i1> %result
+}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AArch64 llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[AArch64][SVE] Redundant svand_z with all-true masks is not optimized unlike GCC
2 participants