-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RegAlloc][NewPM] Plug Greedy RA in codegen pipeline #120557
base: users/Akshat-Oke/12-11-_codegen_newpm_port_regallocgreedy_to_npm
Are you sure you want to change the base?
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
c779444
to
6345993
Compare
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-aarch64 Author: Akshat Oke (optimisan) ChangesUse
Full diff: https://github.com/llvm/llvm-project/pull/120557.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 28768a72c83fa3..2dbcd7122f6aa8 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1059,7 +1059,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
-/// selection. But -regalloc=... always takes precedence.
+/// selection. But -regalloc-npm=... always takes precedence.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
@@ -1075,7 +1075,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
- // TODO: Parse Opt.RegAlloc to add register allocator.
+ // Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
+ if (Opt.RegAlloc > RegAllocType::Default) {
+ switch (Opt.RegAlloc) {
+ case RegAllocType::Fast:
+ addPass(RegAllocFastPass());
+ break;
+ case RegAllocType::Greedy:
+ addPass(RAGreedyPass());
+ break;
+ default:
+ report_fatal_error("Register allocator not supported yet.", false);
+ }
+ return;
+ }
+ // -regalloc=default or unspecified, so pick based on the optimization level.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
}
template <typename Derived, typename TargetMachineT>
@@ -1146,20 +1161,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass());
- if (derived().addRegAssignmentOptimized(addPass)) {
- // Allow targets to expand pseudo instructions depending on the choice of
- // registers before MachineCopyPropagation.
- derived().addPostRewrite(addPass);
+ if (auto E = derived().addRegAssignmentOptimized(addPass)) {
+ // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
+ // FIXME: This is not really an error.
+ return;
+ }
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
- // Copy propagate to forward register uses and try to eliminate COPYs that
- // were not coalesced.
- addPass(MachineCopyPropagationPass());
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(MachineLICMPass());
- }
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 5facdfa825e4cb..abcc248334ed5f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,12 +188,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
},
"filter=reg-filter;no-clear-vregs")
+// 'all' is the default filter
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"regallocgreedy", "RAGreedyPass",
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
[PB = this](StringRef Params) {
- // TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
- return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
+ return parseRegAllocGreedyFilterFunc(*PB, Params);
}, "reg-filter"
)
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 29bdb9c1746d3c..f146cc7b8028d8 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -20,7 +20,7 @@
namespace llvm {
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
-enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
+enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
// Not one-on-one but mostly corresponding to commandline options in
// TargetPassConfig.cpp.
@@ -53,7 +53,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
- StringRef RegAlloc = "default";
+ RegAllocType RegAlloc = RegAllocType::Unset;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f64b6e0adb2b32..f9a4af486631eb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1315,6 +1315,20 @@ parseBoundsCheckingOptions(StringRef Params) {
return Mode;
}
+Expected<RAGreedyPass::Options>
+parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
+ if (Params.empty() || Params == "all") {
+ return RAGreedyPass::Options();
+ }
+ std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
+ if (!Filter) {
+ return make_error<StringError>(
+ formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
+ inconvertibleErrorCode());
+ }
+ return RAGreedyPass::Options{*Filter, Params};
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7256eec89008a5..952bf479827f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -68,6 +68,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -2099,6 +2100,28 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}
+static const char RegAllocNPMNotSupportedMessage[] =
+ "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
+ "-wwm-regalloc-npm, and -vgpr-regalloc-npm";
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..197476a0f80574 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,8 @@ class AMDGPUCodeGenPassBuilder
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+ Error addRegAssignmentFast(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index 47aa34e3c01156..a168c2891c7d6f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a5d74ef75f0a0a..d9edda47638a3f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-ios -passes=regallocgreedy -o - %s | FileCheck %s
---
name: widget
diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir
index 910bfb858b50f6..e84f0ca2015ce5 100644
--- a/llvm/test/CodeGen/AArch64/pr51516.mir
+++ b/llvm/test/CodeGen/AArch64/pr51516.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=regallocgreedy -verify-machineinstrs -o - %s | FileCheck %s
# Check that we spill %31 and do not rematerialize it since the use operand
# of ADDXri is killed by the STRXui in this block.
diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index b1e7ebe3a7e82b..2773b5f19618a9 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
index 760ae6032230f5..42bba4d1504013 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
# Initially %2 starts out with 2 subranges (one for sub0, and one for
# the rest of the lanes). After %2 is split, after refineSubRanges the
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 52ad7e5355207d..a54fee3a0f964c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -12,8 +12,11 @@
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=fast -O0 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=basic -O3 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
+; REGALLOC-NPM: -regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, -wwm-regalloc-npm, and -vgpr-regalloc-npm
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir
index 75763c5389b09e..41dd98ff909b0c 100644
--- a/llvm/test/CodeGen/MIR/Generic/runPass.mir
+++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir
@@ -2,6 +2,7 @@
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
+# RUN: llc -passes=regallocgreedy -o - %s | FileCheck %s
# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
index 197c3d8551fc38..de0db97f14bf3c 100644
--- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
+++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
@@ -1,4 +1,5 @@
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
+#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=regallocgreedy
#PR34502. Check HoistSpill works properly after the live range of spilled
#virtual register is cleared.
--- |
diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
index 1402c7c2cbca36..e085e38ae5fe31 100644
--- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
+++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
@@ -3,6 +3,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
+# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocgreedy %s -o - | FileCheck %s
...
---
diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir
index 7ec0404e0f737c..eec8a3939151a6 100644
--- a/llvm/test/CodeGen/X86/limit-split-cost.mir
+++ b/llvm/test/CodeGen/X86/limit-split-cost.mir
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes=regallocgreedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# Check no global region split is needed because the live range to split is trivially rematerializable.
# CHECK-NOT: Compact region bundles
--- |
diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c0..66c9d8942f3da4 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -1,12 +1,17 @@
# REQUIRES: amdgpu-registered-target
-# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
-# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
+# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAFAST
+# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAFAST-BAD-FILTER
-# PASS: regallocfast<filter=sgpr>
-# PASS: regallocfast<filter=wwm>
-# PASS: regallocfast<filter=vgpr>
-# BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+# RUN: llc -mtriple=amdgcn -passes='regallocgreedy<sgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAGREEDY
+# RUN: not llc -mtriple=amdgcn -passes='regallocgreedy<bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAGREEDY-BAD-FILTER
+# RAFAST: regallocfast<filter=sgpr>
+# RAFAST: regallocfast<filter=wwm>
+# RAFAST: regallocfast<filter=vgpr>
+# RAFAST-BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+
+# RAGREEDY: regallocgreedy<sgpr>
+# RAGREEDY-BAD-FILTER: invalid regallocgreedy register filter 'bad-filter'
---
name: f
...
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 3892fbb8c74f78..0f7aa6284962a2 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -48,10 +48,17 @@
using namespace llvm;
-static cl::opt<std::string>
- RegAlloc("regalloc-npm",
- cl::desc("Register allocator to use for new pass manager"),
- cl::Hidden, cl::init("default"));
+static cl::opt<RegAllocType> RegAlloc(
+ "regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
+ cl::Hidden, cl::init(RegAllocType::Unset),
+ cl::values(
+ clEnumValN(RegAllocType::Default, "default",
+ "Default register allocator"),
+ clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"),
+ clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"),
+ clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"),
+ clEnumValN(RegAllocType::Greedy, "greedy",
+ "Greedy register allocator")));
static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
|
@llvm/pr-subscribers-backend-systemz Author: Akshat Oke (optimisan) ChangesUse
Full diff: https://github.com/llvm/llvm-project/pull/120557.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 28768a72c83fa3..2dbcd7122f6aa8 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1059,7 +1059,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
-/// selection. But -regalloc=... always takes precedence.
+/// selection. But -regalloc-npm=... always takes precedence.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
@@ -1075,7 +1075,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
- // TODO: Parse Opt.RegAlloc to add register allocator.
+ // Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
+ if (Opt.RegAlloc > RegAllocType::Default) {
+ switch (Opt.RegAlloc) {
+ case RegAllocType::Fast:
+ addPass(RegAllocFastPass());
+ break;
+ case RegAllocType::Greedy:
+ addPass(RAGreedyPass());
+ break;
+ default:
+ report_fatal_error("Register allocator not supported yet.", false);
+ }
+ return;
+ }
+ // -regalloc=default or unspecified, so pick based on the optimization level.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
}
template <typename Derived, typename TargetMachineT>
@@ -1146,20 +1161,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass());
- if (derived().addRegAssignmentOptimized(addPass)) {
- // Allow targets to expand pseudo instructions depending on the choice of
- // registers before MachineCopyPropagation.
- derived().addPostRewrite(addPass);
+ if (auto E = derived().addRegAssignmentOptimized(addPass)) {
+ // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
+ // FIXME: This is not really an error.
+ return;
+ }
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
- // Copy propagate to forward register uses and try to eliminate COPYs that
- // were not coalesced.
- addPass(MachineCopyPropagationPass());
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(MachineLICMPass());
- }
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 5facdfa825e4cb..abcc248334ed5f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,12 +188,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
},
"filter=reg-filter;no-clear-vregs")
+// 'all' is the default filter
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"regallocgreedy", "RAGreedyPass",
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
[PB = this](StringRef Params) {
- // TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
- return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
+ return parseRegAllocGreedyFilterFunc(*PB, Params);
}, "reg-filter"
)
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 29bdb9c1746d3c..f146cc7b8028d8 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -20,7 +20,7 @@
namespace llvm {
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
-enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
+enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
// Not one-on-one but mostly corresponding to commandline options in
// TargetPassConfig.cpp.
@@ -53,7 +53,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
- StringRef RegAlloc = "default";
+ RegAllocType RegAlloc = RegAllocType::Unset;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f64b6e0adb2b32..f9a4af486631eb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1315,6 +1315,20 @@ parseBoundsCheckingOptions(StringRef Params) {
return Mode;
}
+Expected<RAGreedyPass::Options>
+parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
+ if (Params.empty() || Params == "all") {
+ return RAGreedyPass::Options();
+ }
+ std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
+ if (!Filter) {
+ return make_error<StringError>(
+ formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
+ inconvertibleErrorCode());
+ }
+ return RAGreedyPass::Options{*Filter, Params};
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7256eec89008a5..952bf479827f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -68,6 +68,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -2099,6 +2100,28 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}
+static const char RegAllocNPMNotSupportedMessage[] =
+ "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
+ "-wwm-regalloc-npm, and -vgpr-regalloc-npm";
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..197476a0f80574 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,8 @@ class AMDGPUCodeGenPassBuilder
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+ Error addRegAssignmentFast(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index 47aa34e3c01156..a168c2891c7d6f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a5d74ef75f0a0a..d9edda47638a3f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-ios -passes=regallocgreedy -o - %s | FileCheck %s
---
name: widget
diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir
index 910bfb858b50f6..e84f0ca2015ce5 100644
--- a/llvm/test/CodeGen/AArch64/pr51516.mir
+++ b/llvm/test/CodeGen/AArch64/pr51516.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=regallocgreedy -verify-machineinstrs -o - %s | FileCheck %s
# Check that we spill %31 and do not rematerialize it since the use operand
# of ADDXri is killed by the STRXui in this block.
diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index b1e7ebe3a7e82b..2773b5f19618a9 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
index 760ae6032230f5..42bba4d1504013 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
# Initially %2 starts out with 2 subranges (one for sub0, and one for
# the rest of the lanes). After %2 is split, after refineSubRanges the
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 52ad7e5355207d..a54fee3a0f964c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -12,8 +12,11 @@
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=fast -O0 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=basic -O3 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
+; REGALLOC-NPM: -regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, -wwm-regalloc-npm, and -vgpr-regalloc-npm
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir
index 75763c5389b09e..41dd98ff909b0c 100644
--- a/llvm/test/CodeGen/MIR/Generic/runPass.mir
+++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir
@@ -2,6 +2,7 @@
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
+# RUN: llc -passes=regallocgreedy -o - %s | FileCheck %s
# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
index 197c3d8551fc38..de0db97f14bf3c 100644
--- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
+++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
@@ -1,4 +1,5 @@
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
+#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=regallocgreedy
#PR34502. Check HoistSpill works properly after the live range of spilled
#virtual register is cleared.
--- |
diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
index 1402c7c2cbca36..e085e38ae5fe31 100644
--- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
+++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
@@ -3,6 +3,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
+# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocgreedy %s -o - | FileCheck %s
...
---
diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir
index 7ec0404e0f737c..eec8a3939151a6 100644
--- a/llvm/test/CodeGen/X86/limit-split-cost.mir
+++ b/llvm/test/CodeGen/X86/limit-split-cost.mir
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes=regallocgreedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# Check no global region split is needed because the live range to split is trivially rematerializable.
# CHECK-NOT: Compact region bundles
--- |
diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c0..66c9d8942f3da4 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -1,12 +1,17 @@
# REQUIRES: amdgpu-registered-target
-# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
-# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
+# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAFAST
+# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAFAST-BAD-FILTER
-# PASS: regallocfast<filter=sgpr>
-# PASS: regallocfast<filter=wwm>
-# PASS: regallocfast<filter=vgpr>
-# BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+# RUN: llc -mtriple=amdgcn -passes='regallocgreedy<sgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAGREEDY
+# RUN: not llc -mtriple=amdgcn -passes='regallocgreedy<bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAGREEDY-BAD-FILTER
+# RAFAST: regallocfast<filter=sgpr>
+# RAFAST: regallocfast<filter=wwm>
+# RAFAST: regallocfast<filter=vgpr>
+# RAFAST-BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+
+# RAGREEDY: regallocgreedy<sgpr>
+# RAGREEDY-BAD-FILTER: invalid regallocgreedy register filter 'bad-filter'
---
name: f
...
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 3892fbb8c74f78..0f7aa6284962a2 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -48,10 +48,17 @@
using namespace llvm;
-static cl::opt<std::string>
- RegAlloc("regalloc-npm",
- cl::desc("Register allocator to use for new pass manager"),
- cl::Hidden, cl::init("default"));
+static cl::opt<RegAllocType> RegAlloc(
+ "regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
+ cl::Hidden, cl::init(RegAllocType::Unset),
+ cl::values(
+ clEnumValN(RegAllocType::Default, "default",
+ "Default register allocator"),
+ clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"),
+ clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"),
+ clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"),
+ clEnumValN(RegAllocType::Greedy, "greedy",
+ "Greedy register allocator")));
static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
|
addPass(RAGreedyPass()); | ||
break; | ||
default: | ||
report_fatal_error("Register allocator not supported yet.", false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
report_fatal_error("Register allocator not supported yet.", false); | |
report_fatal_error("register allocator not supported yet.", false); |
if (Params.empty() || Params == "all") { | ||
return RAGreedyPass::Options(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (Params.empty() || Params == "all") { | |
return RAGreedyPass::Options(); | |
} | |
if (Params.empty() || Params == "all") | |
return RAGreedyPass::Options(); |
if (!Filter) { | ||
return make_error<StringError>( | ||
formatv("invalid regallocgreedy register filter '{0}' ", Params).str(), | ||
inconvertibleErrorCode()); | ||
} | ||
return RAGreedyPass::Options{*Filter, Params}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Less indentation if you invert the order here
Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( | ||
AddMachinePass &addPass) const { | ||
if (Opt.RegAlloc != RegAllocType::Unset) | ||
report_fatal_error(RegAllocNPMNotSupportedMessage, false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why mix fatal errors and returned errors?
516482f
to
7f36763
Compare
5c2a6b2
to
513da6b
Compare
7f36763
to
686d1cd
Compare
513da6b
to
9ec74b7
Compare
f18aa9e
to
0294923
Compare
9ec74b7
to
0604cbd
Compare
06f0a25
to
dee6387
Compare
0604cbd
to
e2d881e
Compare
dee6387
to
1b48c6b
Compare
e2d881e
to
8f6b898
Compare
1b48c6b
to
acaba02
Compare
8f6b898
to
007935f
Compare
acaba02
to
8ed3dbf
Compare
007935f
to
e8a5d2d
Compare
8ed3dbf
to
7a53889
Compare
e8a5d2d
to
8721b1f
Compare
7a53889
to
4e2e8cb
Compare
8721b1f
to
6f49c89
Compare
4e2e8cb
to
df4bdd8
Compare
df4bdd8
to
56f7930
Compare
6f49c89
to
c31b0c4
Compare
Use
-passes="regallocgreedy<[all|sgpr|wwm|vgpr]>
to insert the greedy RA with a filter and-regalloc-npm=<type>
to control which RA to use in existing pipeline.-regalloc-npm=<type>
is not for AMDGPU. Adding options likesgpr-regalloc-npm
for it next.