4444#include " llvm/CodeGen/MachineBlockFrequencyInfo.h"
4545#include " llvm/CodeGen/MachineFunctionPass.h"
4646#include " llvm/CodeGen/VirtRegMap.h"
47+ #include " llvm/IR/CallingConv.h"
4748#include " llvm/InitializePasses.h"
4849#include " llvm/Support/CommandLine.h"
4950#include " llvm/Support/Debug.h"
@@ -57,6 +58,8 @@ STATISTIC(NumBlocksProcessed, "Number of hot blocks processed");
5758STATISTIC (NumValuesRemapped, " Number of values remapped to reduce density" );
5859STATISTIC (NumBlocksSkipped,
5960 " Number of blocks skipped (no dense regs or no free regs)" );
61+ STATISTIC (NumNonKernelsSkipped,
62+ " Number of non-kernel functions skipped for safety" );
6063
6164namespace {
6265
@@ -110,7 +113,8 @@ class AMDGPUHotBlockRegisterRenamingImpl {
110113
111114 // / Try to move a value from DenseReg to FreeReg
112115 bool tryMoveValue (MCRegister DenseReg, MCRegister FreeReg,
113- MachineBasicBlock *MBB, SlotIndex BBStart, SlotIndex BBEnd);
116+ MachineBasicBlock *MBB, SlotIndex BBStart, SlotIndex BBEnd,
117+ const DenseMap<MCRegister, SmallVector<SlotIndex, 4 >> &PhysRegDefs);
114118};
115119
116120class AMDGPUHotBlockRegisterRenamingLegacy : public MachineFunctionPass {
@@ -173,6 +177,17 @@ bool AMDGPUHotBlockRegisterRenamingImpl::run(MachineFunction &MF) {
173177 LLVM_DEBUG (dbgs () << " AMDGPUHotBlockRegisterRenaming: Processing "
174178 << MF.getName () << " \n " );
175179
180+ // Fix #0: Skip non-kernel functions to avoid RegMask corruption issues.
181+ // Post-RA pass cannot update RegMask operands in caller's call instructions,
182+ // which would lead to incorrect assumptions about clobbered registers.
183+ CallingConv::ID CC = MF.getFunction ().getCallingConv ();
184+ if (CC != CallingConv::AMDGPU_KERNEL) {
185+ LLVM_DEBUG (dbgs () << " Skipping non-kernel function (CC=" << CC
186+ << " ): Post-RA pass cannot safely modify callees\n " );
187+ ++NumNonKernelsSkipped;
188+ return false ;
189+ }
190+
176191 TRI = ST->getRegisterInfo ();
177192 MRI = &MF.getRegInfo ();
178193
@@ -236,6 +251,33 @@ bool AMDGPUHotBlockRegisterRenamingImpl::processBasicBlock(
236251 << " registers with values, " << FreeRegs.size ()
237252 << " free registers\n " );
238253
254+ // Step 2a: Build PhysReg definitions cache (Fix #1a)
255+ // Track all SlotIndexes where each physical register is defined
256+ const TargetRegisterClass *VGPR_32_RC =
257+ TRI->getRegClass (AMDGPU::VGPR_32RegClassID);
258+ DenseMap<MCRegister, SmallVector<SlotIndex, 4 >> PhysRegDefs;
259+
260+ for (MachineInstr &MI : *MBB) {
261+ SlotIndex Idx = LIS->getInstructionIndex (MI);
262+ for (const MachineOperand &MO : MI.operands ()) {
263+ if (MO.isReg () && MO.isDef () && MO.getReg ().isPhysical ()) {
264+ MCRegister PhysReg = MO.getReg ();
265+ if (VGPR_32_RC->contains (PhysReg)) {
266+ PhysRegDefs[PhysReg].push_back (Idx);
267+ // Also track superregs for aliasing
268+ for (MCRegister Super : TRI->superregs (PhysReg)) {
269+ PhysRegDefs[Super].push_back (Idx);
270+ }
271+ }
272+ }
273+ }
274+ }
275+
276+ LLVM_DEBUG ({
277+ dbgs () << " Built PhysRegDefs cache: " << PhysRegDefs.size ()
278+ << " registers have definitions in this BB\n " ;
279+ });
280+
239281 // Step 3: Create max heap of dense registers
240282 auto Comparator = [&ValueDensity](MCRegister A, MCRegister B) {
241283 return ValueDensity[A] < ValueDensity[B]; // max heap
@@ -266,7 +308,7 @@ bool AMDGPUHotBlockRegisterRenamingImpl::processBasicBlock(
266308
267309 MCRegister FreeReg = FreeRegs[FreeRegIdx++];
268310
269- if (tryMoveValue (DenseReg, FreeReg, MBB, BBStart, BBEnd)) {
311+ if (tryMoveValue (DenseReg, FreeReg, MBB, BBStart, BBEnd, PhysRegDefs )) {
270312 Changed = true ;
271313 ++NumValuesRemapped;
272314
@@ -450,11 +492,10 @@ bool AMDGPUHotBlockRegisterRenamingImpl::isVirtRegMovable(Register VirtReg,
450492 return true ;
451493}
452494
453- bool AMDGPUHotBlockRegisterRenamingImpl::tryMoveValue (MCRegister DenseReg,
454- MCRegister FreeReg,
455- MachineBasicBlock *MBB,
456- SlotIndex BBStart,
457- SlotIndex BBEnd) {
495+ bool AMDGPUHotBlockRegisterRenamingImpl::tryMoveValue (
496+ MCRegister DenseReg, MCRegister FreeReg, MachineBasicBlock *MBB,
497+ SlotIndex BBStart, SlotIndex BBEnd,
498+ const DenseMap<MCRegister, SmallVector<SlotIndex, 4 >> &PhysRegDefs) {
458499 // Find a movable local value in DenseReg
459500 for (MCRegUnit Unit : TRI->regunits (DenseReg)) {
460501 LiveIntervalUnion &LIU = LRM->getLiveUnions ()[Unit];
@@ -512,6 +553,36 @@ bool AMDGPUHotBlockRegisterRenamingImpl::tryMoveValue(MCRegister DenseReg,
512553 continue ;
513554 }
514555
556+ // Fix #1a: Check that FreeReg is not redefined in VirtReg's live range
557+ auto DefIt = PhysRegDefs.find (FreeReg);
558+ if (DefIt != PhysRegDefs.end ()) {
559+ bool HasConflict = false ;
560+ for (SlotIndex DefIdx : DefIt->second ) {
561+ // Check if definition is strictly inside the live range (not at endpoints)
562+ if (DefIdx > SegStart && DefIdx < SegEnd) {
563+ LLVM_DEBUG (dbgs () << " Cannot move to " << printReg (FreeReg, TRI)
564+ << " : redefined at " << DefIdx << " inside live range ["
565+ << SegStart << " , " << SegEnd << " )\n " );
566+ HasConflict = true ;
567+ break ;
568+ }
569+ }
570+ if (HasConflict)
571+ continue ; // Try next VirtReg
572+ }
573+
574+ // Fix #1b: Check that FreeReg is not clobbered by any call in the live range
575+ BitVector UsableRegs;
576+ if (LIS->checkRegMaskInterference (VirtRegLI, UsableRegs)) {
577+ // checkRegMaskInterference returns true if LI crosses RegMask instructions
578+ // UsableRegs now contains registers NOT clobbered by any RegMask
579+ if (!UsableRegs.test (FreeReg)) {
580+ LLVM_DEBUG (dbgs () << " Cannot move to " << printReg (FreeReg, TRI)
581+ << " : clobbered by call RegMask in live range\n " );
582+ continue ; // Try next VirtReg
583+ }
584+ }
585+
515586 // This VirtReg is movable! Perform the remap
516587 LLVM_DEBUG (dbgs () << " Moving " << printReg (VirtReg, TRI) << " from "
517588 << printReg (DenseReg, TRI) << " to "
0 commit comments