diff --git a/visa/G4_Kernel.cpp b/visa/G4_Kernel.cpp index 40f9e3c4d90b..0d4e62de64c3 100644 --- a/visa/G4_Kernel.cpp +++ b/visa/G4_Kernel.cpp @@ -635,7 +635,8 @@ bool G4_Kernel::updateKernelToLargerGRF() { // // Updates kernel's related structures based on register pressure // -void G4_Kernel::updateKernelByRegPressure(unsigned regPressure) { +void G4_Kernel::updateKernelByRegPressure(unsigned regPressure, + bool forceGRFModeUp) { unsigned largestInputReg = getLargestInputRegister(); if (m_kernelAttrs->isKernelAttrSet(Attributes::ATTR_MaxRegThreadDispatch)) { unsigned maxRegPayloadDispatch = m_kernelAttrs->getInt32KernelAttr( @@ -643,7 +644,7 @@ void G4_Kernel::updateKernelByRegPressure(unsigned regPressure) { largestInputReg = std::max(largestInputReg, maxRegPayloadDispatch); } - unsigned newGRF = grfMode.setModeByRegPressure(regPressure, largestInputReg); + unsigned newGRF = grfMode.setModeByRegPressure(regPressure, largestInputReg, forceGRFModeUp); if (newGRF == numRegTotal) return; @@ -2167,10 +2168,15 @@ GRFMode::GRFMode(const TARGET_PLATFORM platform, Options *op) : options(op) { unsigned maxGRF = op->getuInt32Option(vISA_MaxGRFNum); upperBoundGRF = maxGRF > 0 ? maxGRF : configs.back().numGRF; vISA_ASSERT(isValidNumGRFs(upperBoundGRF), "Invalid upper bound for GRF number"); + + // Select higher GRF + GRFModeUpValue = op->getuInt32Option(vISA_ForceGRFModeUp); + vISA_ASSERT(GRFModeUpValue >= 0 && GRFModeUpValue <= configs.size(), + "Invalid value for selecting a higher GRF mode"); } -unsigned GRFMode::setModeByRegPressure(unsigned maxRP, - unsigned largestInputReg) { +unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg, + bool forceGRFModeUp) { unsigned size = configs.size(), i = 0; bool spillAllowed = 0; spillAllowed = options->getuInt32Option(vISA_SpillAllowed) > 256; @@ -2184,6 +2190,13 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, // those blocked for kernel input. This helps cases // where an 8 GRF variable shows up in entry BB. (largestInputReg + 8) <= configs[i].numGRF) { + if (forceGRFModeUp && GRFModeUpValue > 0) { + // Check if user is force a higher GRF mode + unsigned newGRFMode = currentMode + GRFModeUpValue; + unsigned maxGRFMode = getMaxGRFMode(); + currentMode = newGRFMode < maxGRFMode ? newGRFMode : maxGRFMode; + } + if (spillAllowed && currentMode > 0) return configs[--currentMode].numGRF; else diff --git a/visa/G4_Kernel.hpp b/visa/G4_Kernel.hpp index f5a73311eb7f..7c61ba1537e7 100644 --- a/visa/G4_Kernel.hpp +++ b/visa/G4_Kernel.hpp @@ -152,7 +152,8 @@ class GRFMode { return iter != configs.end(); } - unsigned setModeByRegPressure(unsigned maxRP, unsigned largestInputReg); + unsigned setModeByRegPressure(unsigned maxRP, unsigned largestInputReg, + bool forceGRFModedUp = false); bool hasLargerGRFSameThreads() const; unsigned getNumGRF() const { return configs[currentMode].numGRF; } @@ -189,6 +190,14 @@ class GRFMode { return found->numGRF; } + unsigned getMaxGRFMode() const { + auto found = + std::find_if(configs.rbegin(), configs.rend(), [this](const Config &c) { + return c.VRTEnable && c.numGRF <= upperBoundGRF; + }); + return configs.size() - std::distance(configs.rbegin(), found) - 1; + } + // Get GRF number for initial kernel creation unsigned getInitalGRFNum() const { // Max GRF number is used when GRF selection is enabled. @@ -255,6 +264,7 @@ class GRFMode { unsigned currentMode; unsigned lowerBoundGRF; unsigned upperBoundGRF; + unsigned GRFModeUpValue; Options *options; }; @@ -712,7 +722,8 @@ class G4_Kernel { const char *getName() const { return name; } bool updateKernelToLargerGRF(); - void updateKernelByRegPressure(unsigned regPressure); + void updateKernelByRegPressure(unsigned regPressure, + bool forceGRFModeUp = false); bool updateKernelFromNumGRFAttr(); void evalAddrExp(); diff --git a/visa/LocalScheduler/G4_Sched.cpp b/visa/LocalScheduler/G4_Sched.cpp index 77ea256a4647..bb0ebe30a523 100644 --- a/visa/LocalScheduler/G4_Sched.cpp +++ b/visa/LocalScheduler/G4_Sched.cpp @@ -747,7 +747,7 @@ bool preRA_Scheduler::runWithGRFSelection(unsigned &KernelPressure) { KernelPressure = rp.getMaxRP(); } - kernel.updateKernelByRegPressure(KernelPressure); + kernel.updateKernelByRegPressure(KernelPressure, true); return Changed; } diff --git a/visa/Optimizer.h b/visa/Optimizer.h index f786fc3f51a5..30e42e25ee09 100644 --- a/visa/Optimizer.h +++ b/visa/Optimizer.h @@ -157,7 +157,10 @@ class Optimizer { unsigned KernelPressure = 0; preRA_Scheduler Sched(kernel); if (kernel.useAutoGRFSelection()) { + unsigned InitialGRFNumber = kernel.getNumRegTotal(); Sched.runWithGRFSelection(KernelPressure); + if (InitialGRFNumber != kernel.getNumRegTotal()) + Sched.run(KernelPressure); } else { Sched.run(KernelPressure); } diff --git a/visa/include/VISAOptionsDefs.h b/visa/include/VISAOptionsDefs.h index 97b6fc88c7c6..11e6a322410d 100644 --- a/visa/include/VISAOptionsDefs.h +++ b/visa/include/VISAOptionsDefs.h @@ -168,6 +168,11 @@ DEF_VISA_OPTION( "Spill size allowed without increasing GRF number in VRT." "0 means VRT will always bump up the GRF number to avoid spills", 256) +DEF_VISA_OPTION(vISA_ForceGRFModeUp, ET_INT32, "-forceGRFModeUp", + "USAGE: -forceGRFModeUp .\n" + "Set the GRF mode k higher than the one selected by default" + "heuristics. 0 means no increase in GRF mode.", + 0) DEF_VISA_OPTION(vISA_ScalarPipe, ET_INT32, "-scalarPipe", "USAGE: -scalarPipe \n", 0) DEF_VISA_OPTION(vISA_LVN, ET_BOOL, "-nolvn", UNUSED, true)