Skip to content

Commit

Permalink
SWDEV-451380 - Disable kernel args for non-XGMI if HDP flush register…
Browse files Browse the repository at this point in the history
… is invalid

Change-Id: I227e046e2b9cb25476a50240f5d070adbd558f21
  • Loading branch information
iassiour authored and yanyao-wang committed Apr 3, 2024
1 parent 25c3b47 commit a8dbc0c
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 20 deletions.
14 changes: 11 additions & 3 deletions hipamd/src/hip_graph_internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,17 @@ hipError_t GraphExec::CaptureAQLPackets() {
}

if (device_kernarg_pool_ && !device->isXgmi()) {
*device->info().hdpMemFlushCntl = 1u;
if (*device->info().hdpMemFlushCntl != UINT32_MAX) {
LogError("Unexpected HDP Register readback value!");
if (device->info().hdpMemFlushCntl != nullptr) {
*device->info().hdpMemFlushCntl = 1u;
if (*device->info().hdpMemFlushCntl != UINT32_MAX) {
LogError("Unexpected HDP Register readback value!");
}
} else {
amd::Command* command = new amd::Marker(*stream, true);
if (command != nullptr) {
command->enqueue();
command->release();
}
}
}

Expand Down
35 changes: 22 additions & 13 deletions rocclr/device/rocm/rocdevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,27 @@ bool Device::create() {
return false;
}

setupCpuAgent();

// Get Agent HDP Flush Register Memory
hsa_amd_hdp_flush_t hdpInfo;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) {
LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name);
return false;
}

info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL;
info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL;

bool device_kernel_args = true;
if (!isXgmi_ && ((info_.hdpMemFlushCntl == nullptr) || (info_.hdpRegFlushCntl == nullptr))) {
LogWarning("Unable to determine HDP flush register address. "
"Device kernel arguments are not supported");
device_kernel_args = false;
}

// Create HSA settings
assert(!settings_);
roc::Settings* hsaSettings = new roc::Settings();
Expand All @@ -712,7 +733,7 @@ bool Device::create() {
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(),
isa->versionMinor(), isa->versionStepping(),
isa->xnack() == amd::Isa::Feature::Enabled,
coop_groups)) {
coop_groups, device_kernel_args)) {
LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
Expand Down Expand Up @@ -753,17 +774,6 @@ bool Device::create() {
}
info_.pciDomainID = pci_domain_id;

// Get Agent HDP Flush Register Memory
hsa_amd_hdp_flush_t hdpInfo;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) {
LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name);
return false;
}
info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL;
info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL;

if (populateOCLDeviceConstants() == false) {
LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
Expand Down Expand Up @@ -1256,7 +1266,6 @@ bool Device::populateOCLDeviceConstants() {
engineAssignMap_[1 << i] = 0;
}

setupCpuAgent();

checkAtomicSupport();

Expand Down
5 changes: 3 additions & 2 deletions rocclr/device/rocm/rocsettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ Settings::Settings() {

// ================================================================================================
bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor,
uint32_t gfxStepping, bool enableXNACK, bool coop_groups) {
uint32_t gfxStepping, bool enableXNACK, bool coop_groups,
bool device_kernel_args) {
customHostAllocator_ = false;

if (fullProfile) {
Expand Down Expand Up @@ -167,7 +168,7 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor
// Enable device kernel args for MI300* for now
if (gfxipMajor == 9 && gfxipMinor == 4 &&
(gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2)) {
device_kernel_args_ = HIP_FORCE_DEV_KERNARG;
device_kernel_args_ = HIP_FORCE_DEV_KERNARG && device_kernel_args;
}

if (gfxipMajor >= 10) {
Expand Down
3 changes: 2 additions & 1 deletion rocclr/device/rocm/rocsettings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ class Settings : public device::Settings {

//! Creates settings
bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxStepping,
bool enableXNACK, bool coop_groups = false);
bool enableXNACK, bool coop_groups = false,
bool device_kernel_args = true);

private:
//! Disable copy constructor
Expand Down
4 changes: 3 additions & 1 deletion rocclr/device/rocm/rocvirtual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3211,7 +3211,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,
}
}

const auto pcieKernargs = !dev().isXgmi() && dev().settings().device_kernel_args_;
const auto pcieKernargs = !dev().isXgmi() &&
dev().settings().device_kernel_args_ &&
roc_device_.info().largeBar_;
address argBuffer = hidden_arguments;
bool isGraphCapture = vcmd != nullptr && vcmd->getCapturingState();

Expand Down

0 comments on commit a8dbc0c

Please sign in to comment.