Skip to content

Commit c1606b3

Browse files
AMDGPU: Fix correctness issues in Hot Block Register Renaming pass
Three critical correctness fixes for the Hot Block Register Renaming pass: Fix #0 (Kernel-Only): Restrict pass to kernel functions only. Post-RA passes cannot safely modify non-kernel functions because they have no mechanism to update RegMask operands in caller's call instructions, which would lead to inter-procedural register corruption. Fix #1a (Redefinitions): Check that target free register is not redefined by any instruction within the virtual register's live range. Without this check, moving a value to a register that gets overwritten mid-range causes segfaults. Fix #1b (Call Clobbers): Use LiveIntervals::checkRegMaskInterference() to verify that target register is not clobbered by any call instruction within the live range. Prevents incorrect register assignments across function calls. All fixes verified on aomp-complex test case (segfault fixed) and rocRAND MTGP32 kernel (117 values remapped, original optimization preserved).
1 parent 8cc658b commit c1606b3

File tree

1 file changed

+78
-7
lines changed

1 file changed

+78
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUHotBlockRegisterRenaming.cpp

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
4545
#include "llvm/CodeGen/MachineFunctionPass.h"
4646
#include "llvm/CodeGen/VirtRegMap.h"
47+
#include "llvm/IR/CallingConv.h"
4748
#include "llvm/InitializePasses.h"
4849
#include "llvm/Support/CommandLine.h"
4950
#include "llvm/Support/Debug.h"
@@ -57,6 +58,8 @@ STATISTIC(NumBlocksProcessed, "Number of hot blocks processed");
5758
STATISTIC(NumValuesRemapped, "Number of values remapped to reduce density");
5859
STATISTIC(NumBlocksSkipped,
5960
"Number of blocks skipped (no dense regs or no free regs)");
61+
STATISTIC(NumNonKernelsSkipped,
62+
"Number of non-kernel functions skipped for safety");
6063

6164
namespace {
6265

@@ -110,7 +113,8 @@ class AMDGPUHotBlockRegisterRenamingImpl {
110113

111114
/// Try to move a value from DenseReg to FreeReg
112115
bool tryMoveValue(MCRegister DenseReg, MCRegister FreeReg,
113-
MachineBasicBlock *MBB, SlotIndex BBStart, SlotIndex BBEnd);
116+
MachineBasicBlock *MBB, SlotIndex BBStart, SlotIndex BBEnd,
117+
const DenseMap<MCRegister, SmallVector<SlotIndex, 4>> &PhysRegDefs);
114118
};
115119

116120
class AMDGPUHotBlockRegisterRenamingLegacy : public MachineFunctionPass {
@@ -173,6 +177,17 @@ bool AMDGPUHotBlockRegisterRenamingImpl::run(MachineFunction &MF) {
173177
LLVM_DEBUG(dbgs() << "AMDGPUHotBlockRegisterRenaming: Processing "
174178
<< MF.getName() << "\n");
175179

180+
// Fix #0: Skip non-kernel functions to avoid RegMask corruption issues.
181+
// Post-RA pass cannot update RegMask operands in caller's call instructions,
182+
// which would lead to incorrect assumptions about clobbered registers.
183+
CallingConv::ID CC = MF.getFunction().getCallingConv();
184+
if (CC != CallingConv::AMDGPU_KERNEL) {
185+
LLVM_DEBUG(dbgs() << " Skipping non-kernel function (CC=" << CC
186+
<< "): Post-RA pass cannot safely modify callees\n");
187+
++NumNonKernelsSkipped;
188+
return false;
189+
}
190+
176191
TRI = ST->getRegisterInfo();
177192
MRI = &MF.getRegInfo();
178193

@@ -236,6 +251,33 @@ bool AMDGPUHotBlockRegisterRenamingImpl::processBasicBlock(
236251
<< " registers with values, " << FreeRegs.size()
237252
<< " free registers\n");
238253

254+
// Step 2a: Build PhysReg definitions cache (Fix #1a)
255+
// Track all SlotIndexes where each physical register is defined
256+
const TargetRegisterClass *VGPR_32_RC =
257+
TRI->getRegClass(AMDGPU::VGPR_32RegClassID);
258+
DenseMap<MCRegister, SmallVector<SlotIndex, 4>> PhysRegDefs;
259+
260+
for (MachineInstr &MI : *MBB) {
261+
SlotIndex Idx = LIS->getInstructionIndex(MI);
262+
for (const MachineOperand &MO : MI.operands()) {
263+
if (MO.isReg() && MO.isDef() && MO.getReg().isPhysical()) {
264+
MCRegister PhysReg = MO.getReg();
265+
if (VGPR_32_RC->contains(PhysReg)) {
266+
PhysRegDefs[PhysReg].push_back(Idx);
267+
// Also track superregs for aliasing
268+
for (MCRegister Super : TRI->superregs(PhysReg)) {
269+
PhysRegDefs[Super].push_back(Idx);
270+
}
271+
}
272+
}
273+
}
274+
}
275+
276+
LLVM_DEBUG({
277+
dbgs() << " Built PhysRegDefs cache: " << PhysRegDefs.size()
278+
<< " registers have definitions in this BB\n";
279+
});
280+
239281
// Step 3: Create max heap of dense registers
240282
auto Comparator = [&ValueDensity](MCRegister A, MCRegister B) {
241283
return ValueDensity[A] < ValueDensity[B]; // max heap
@@ -266,7 +308,7 @@ bool AMDGPUHotBlockRegisterRenamingImpl::processBasicBlock(
266308

267309
MCRegister FreeReg = FreeRegs[FreeRegIdx++];
268310

269-
if (tryMoveValue(DenseReg, FreeReg, MBB, BBStart, BBEnd)) {
311+
if (tryMoveValue(DenseReg, FreeReg, MBB, BBStart, BBEnd, PhysRegDefs)) {
270312
Changed = true;
271313
++NumValuesRemapped;
272314

@@ -450,11 +492,10 @@ bool AMDGPUHotBlockRegisterRenamingImpl::isVirtRegMovable(Register VirtReg,
450492
return true;
451493
}
452494

453-
bool AMDGPUHotBlockRegisterRenamingImpl::tryMoveValue(MCRegister DenseReg,
454-
MCRegister FreeReg,
455-
MachineBasicBlock *MBB,
456-
SlotIndex BBStart,
457-
SlotIndex BBEnd) {
495+
bool AMDGPUHotBlockRegisterRenamingImpl::tryMoveValue(
496+
MCRegister DenseReg, MCRegister FreeReg, MachineBasicBlock *MBB,
497+
SlotIndex BBStart, SlotIndex BBEnd,
498+
const DenseMap<MCRegister, SmallVector<SlotIndex, 4>> &PhysRegDefs) {
458499
// Find a movable local value in DenseReg
459500
for (MCRegUnit Unit : TRI->regunits(DenseReg)) {
460501
LiveIntervalUnion &LIU = LRM->getLiveUnions()[Unit];
@@ -512,6 +553,36 @@ bool AMDGPUHotBlockRegisterRenamingImpl::tryMoveValue(MCRegister DenseReg,
512553
continue;
513554
}
514555

556+
// Fix #1a: Check that FreeReg is not redefined in VirtReg's live range
557+
auto DefIt = PhysRegDefs.find(FreeReg);
558+
if (DefIt != PhysRegDefs.end()) {
559+
bool HasConflict = false;
560+
for (SlotIndex DefIdx : DefIt->second) {
561+
// Check if definition is strictly inside the live range (not at endpoints)
562+
if (DefIdx > SegStart && DefIdx < SegEnd) {
563+
LLVM_DEBUG(dbgs() << " Cannot move to " << printReg(FreeReg, TRI)
564+
<< ": redefined at " << DefIdx << " inside live range ["
565+
<< SegStart << ", " << SegEnd << ")\n");
566+
HasConflict = true;
567+
break;
568+
}
569+
}
570+
if (HasConflict)
571+
continue; // Try next VirtReg
572+
}
573+
574+
// Fix #1b: Check that FreeReg is not clobbered by any call in the live range
575+
BitVector UsableRegs;
576+
if (LIS->checkRegMaskInterference(VirtRegLI, UsableRegs)) {
577+
// checkRegMaskInterference returns true if LI crosses RegMask instructions
578+
// UsableRegs now contains registers NOT clobbered by any RegMask
579+
if (!UsableRegs.test(FreeReg)) {
580+
LLVM_DEBUG(dbgs() << " Cannot move to " << printReg(FreeReg, TRI)
581+
<< ": clobbered by call RegMask in live range\n");
582+
continue; // Try next VirtReg
583+
}
584+
}
585+
515586
// This VirtReg is movable! Perform the remap
516587
LLVM_DEBUG(dbgs() << " Moving " << printReg(VirtReg, TRI) << " from "
517588
<< printReg(DenseReg, TRI) << " to "

0 commit comments

Comments
 (0)