Skip to content

Commit

Permalink
Deltas and Hash-return handled
Browse files Browse the repository at this point in the history
  • Loading branch information
Solomon Bekele committed Oct 22, 2024
1 parent 720ccd0 commit 320a81e
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 93 deletions.
203 changes: 115 additions & 88 deletions ze/btx_zeinterval_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,8 +765,8 @@ static void zeEventDestroy_exit_callback(void *btx_handle, void *usr_data, int64
* Sampling
*/

DeviceHash get_device_hash(void *usr_data, const char *hostname, int64_t vpid,
ze_device_handle_t hDevice) {
std::optional<DeviceHash> get_device_hash(void *usr_data, const char *hostname, int64_t vpid,
ze_device_handle_t hDevice) {
auto *data = static_cast<data_t *>(usr_data);
const auto it0 = data->sampling_device_property.find({hostname, vpid, hDevice});
if (it0 != data->sampling_device_property.cend()) {
Expand All @@ -778,9 +778,23 @@ DeviceHash get_device_hash(void *usr_data, const char *hostname, int64_t vpid,
hash *= 0x100000001b3; // FNV prime
}

return {hash, deviceIdx};
return DeviceHash{hash, deviceIdx}; // Return tuple optional
}
return std::nullopt; // Return empty optional if not found
}

uint64_t calculate_delta(uint64_t current_val, uint64_t prev_val) {
// return zero if equal
if (current_val == prev_val) {
return 0;
}

// calculate the delta, considering overflow
if (current_val >= prev_val) {
return current_val - prev_val;
} else {
return current_val + (UINT64_MAX - prev_val) + 1;
}
return {0, 0}; // Return 0 values if not found
}

static void lttng_ust_ze_sampling_fabricPort_callback(
Expand Down Expand Up @@ -817,25 +831,30 @@ static void lttng_ust_ze_sampling_fabricPort_callback(
/* Per doc: When taking the delta, the difference between timestamp samples
* could be 0, if the frequency of sampling the snapshots is higher than the
* frequency of the timestamp update. */
if (pFabricPortThroughput_val->timestamp == prev_throughput.timestamp)
double time_delta = static_cast<double>(
calculate_delta(pFabricPortThroughput_val->timestamp, prev_throughput.timestamp));
if (time_delta == 0)
return;

double time_diff =
static_cast<double>(pFabricPortThroughput_val->timestamp - prev_throughput.timestamp);
// Calculate the RX and TX throughput
double rxThroughput =
static_cast<double>(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) /
time_diff;
double txThroughput =
static_cast<double>(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) /
time_diff;
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (rxThroughput != 0 || txThroughput != 0)
btx_push_message_sampling_fabricPort(
btx_handle, hostname, prev_ts, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort,
subDevice, fabricId, remotePortId, rxThroughput, txThroughput, rxSpeed, txSpeed);
// Update the stored values
it->second = {*pFabricPortThroughput_val, ts};
double rxThroughput = static_cast<double>(calculate_delta(pFabricPortThroughput_val->rxCounter,
prev_throughput.rxCounter)) /
time_delta;
double txThroughput = static_cast<double>(calculate_delta(pFabricPortThroughput_val->txCounter,
prev_throughput.txCounter)) /
time_delta;
auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (uuid_idx) {
auto [hash, deviceIdx] = *uuid_idx;
if (rxThroughput != 0 || txThroughput != 0)
btx_push_message_sampling_fabricPort(
btx_handle, hostname, prev_ts, hash, deviceIdx, (uint64_t)hFabricPort, subDevice,
fabricId, remotePortId, rxThroughput, txThroughput, rxSpeed, txSpeed);
// Update the stored values
it->second = {*pFabricPortThroughput_val, ts};
} else {
std::cerr << "Device property not found!" << std::endl;
}
} else {
std::cerr << "Fabricport property not found!" << std::endl;
}
Expand All @@ -859,35 +878,39 @@ static void lttng_ust_ze_sampling_memStats_callback(

// Previous bandwidth data
auto &[prev_bandwidth, prev_ts] = it->second;

if (pMemBandwidth_val->timestamp == prev_bandwidth.timestamp)
double time_delta = static_cast<double>(
calculate_delta(pMemBandwidth_val->timestamp, prev_bandwidth.timestamp));
// if no progress on timestamp, return (can happen when sampling frequency is higher than the
// timestamp counter update frequency)
if (time_delta == 0)
return;

// Calculate the RD and WT bandwidth
// https://spec.oneapi.io/level-zero/1.9.3/sysman/api.html#zes-mem-bandwidth-t

double allocation = static_cast<double>(pMemState_val->size - pMemState_val->free) * 100.0 /
static_cast<double>(pMemState_val->size);
double time_diff = static_cast<double>(pMemBandwidth_val->timestamp - prev_bandwidth.timestamp);
// percentage bandwidth based on the manual
double pBandwidth =
static_cast<double>(1e6 *
((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) +
(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter))) /
(time_diff * pMemBandwidth_val->maxBandwidth);
uint64_t rdBytes_delta =
calculate_delta(pMemBandwidth_val->readCounter, prev_bandwidth.readCounter);
uint64_t wtBytes_delta =
calculate_delta(pMemBandwidth_val->writeCounter, prev_bandwidth.writeCounter);
double pBandwidth = static_cast<double>(1e6 * (rdBytes_delta + wtBytes_delta) /
(time_delta * pMemBandwidth_val->maxBandwidth));
// rd and wt bandwidth if further drilling needed (place holder for now)
double rdBandwidth =
static_cast<double>(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 /
(time_diff);
double wtBandwidth =
static_cast<double>(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter) * 1e6 /
(time_diff);
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
btx_push_message_sampling_memModule(btx_handle, hostname, prev_ts, uuid_idx.hash,
uuid_idx.deviceIdx, (uint64_t)hMemModule, subDevice,
pBandwidth, rdBandwidth, wtBandwidth, allocation);
// Update the stored values
it->second = {*pMemBandwidth_val, ts};
double rdBandwidth = static_cast<double>(rdBytes_delta) * 1e6 / time_delta;
double wtBandwidth = static_cast<double>(wtBytes_delta) * 1e6 / time_delta;
auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (uuid_idx) {
auto [hash, deviceIdx] = *uuid_idx;
btx_push_message_sampling_memModule(btx_handle, hostname, prev_ts, hash, deviceIdx,
(uint64_t)hMemModule, subDevice, pBandwidth, rdBandwidth,
wtBandwidth, allocation);
// Update the stored values
it->second = {*pMemBandwidth_val, ts};
} else {
std::cerr << "Device property not found!" << std::endl;
}
} else {
std::cerr << "Memory property not found!" << std::endl;
}
Expand All @@ -912,33 +935,29 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u
if (inserted)
return;
auto &[prev_engineStats, prev_ts] = it->second;
if (pEngineStats_val->timestamp == prev_engineStats.timestamp)
double time_delta = static_cast<double>(
calculate_delta(pEngineStats_val->timestamp, prev_engineStats.timestamp));
// if no progress on timestam, return (can happen when sampling frequency is higher than the
// timestamp counter update frequency)
if (time_delta == 0)
return;
double time_diff =
pEngineStats_val->timestamp >= prev_engineStats.timestamp
? static_cast<double>(pEngineStats_val->timestamp - prev_engineStats.timestamp)
: static_cast<double>(pEngineStats_val->timestamp +
(UINT64_MAX - prev_engineStats.timestamp) + 1);
double activeTime = 0;
if (pEngineStats_val->activeTime > prev_engineStats.activeTime)
activeTime =
static_cast<double>(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 /
time_diff;
else
activeTime = static_cast<double>((UINT64_MAX - prev_engineStats.activeTime) +
pEngineStats_val->activeTime + 1) *
100 / time_diff;
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) {
btx_push_message_sampling_computeEU(btx_handle, hostname, prev_ts, uuid_idx.hash,
uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice,
int(activeTime));
} else if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) {
btx_push_message_sampling_copyEU(btx_handle, hostname, prev_ts, uuid_idx.hash,
uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice,
int(activeTime));
double activeTime = static_cast<double>(calculate_delta(pEngineStats_val->activeTime,
prev_engineStats.activeTime)) *
100 / time_delta;
auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (uuid_idx) {
auto [hash, deviceIdx] = *uuid_idx;
if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) {
btx_push_message_sampling_computeEU(btx_handle, hostname, prev_ts, hash, deviceIdx,
(uint64_t)hEngine, subDevice, activeTime);
} else if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) {
btx_push_message_sampling_copyEU(btx_handle, hostname, prev_ts, hash, deviceIdx,
(uint64_t)hEngine, subDevice, activeTime);
}
it->second = {*pEngineStats_val, ts};
} else {
std::cerr << "Device property not found! " << std::endl;
}
it->second = {*pEngineStats_val, ts};
}
} else {
std::cerr << "Engine property not found for device: " << hDevice << std::endl;
Expand All @@ -956,26 +975,39 @@ static void lttng_ust_ze_sampling_gpu_energy_callback(
return;

auto &[prev_EnergyCounter, prev_ts] = it->second;
if (pEnergyCounter_val->timestamp == prev_EnergyCounter.timestamp)
double time_delta = static_cast<double>(
calculate_delta(pEnergyCounter_val->timestamp, prev_EnergyCounter.timestamp));
// if no progress on timestamp, return (can happen when sampling frequency is higher than the
// timestamp counter update frequency)
if (time_delta == 0)
return;

double time_diff =
static_cast<double>(pEnergyCounter_val->timestamp - prev_EnergyCounter.timestamp);
double power =
static_cast<double>(pEnergyCounter_val->energy - prev_EnergyCounter.energy) / time_diff;
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
btx_push_message_sampling_power(btx_handle, hostname, prev_ts, uuid_idx.hash, uuid_idx.deviceIdx,
(uint64_t)hPower, (thapi_domain_idx)domainIdx, power);
it->second = {*pEnergyCounter_val, ts};
static_cast<double>(calculate_delta(pEnergyCounter_val->energy, prev_EnergyCounter.energy)) /
time_delta;
auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (uuid_idx) {
auto [hash, deviceIdx] = *uuid_idx;
btx_push_message_sampling_power(btx_handle, hostname, prev_ts, hash, deviceIdx,
(uint64_t)hPower, (thapi_domain_idx)domainIdx, power);
it->second = {*pEnergyCounter_val, ts};
} else {
std::cerr << "Device property not found! " << std::endl;
}
}

static void lttng_ust_ze_sampling_gpu_frequency_callback(
void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_freq_handle_t hFrequency, uint32_t domainIdx,
size_t _pFreqState_val_length, zes_freq_state_t *pFreqState_val) {
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
btx_push_message_sampling_frequency(btx_handle, hostname, ts, uuid_idx.hash, uuid_idx.deviceIdx,
(uint64_t)hFrequency, domainIdx, pFreqState_val->actual);
auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (uuid_idx) {
auto [hash, deviceIdx] = *uuid_idx;
btx_push_message_sampling_frequency(btx_handle, hostname, ts, hash, deviceIdx,
(uint64_t)hFrequency, domainIdx, pFreqState_val->actual);
} else {
std::cerr << "Device property not found! " << std::endl;
}
}

// Properties
Expand All @@ -1002,44 +1034,39 @@ static void lttng_ust_ze_sampling_fabricPortProperties_callback(
size_t _pFabricPortProperties_val_length,
zes_fabric_port_properties_t *pFabricPortProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->fabricPort_property[{hostname, vpid, (ze_device_handle_t)hDevice,
(zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val;
data->fabricPort_property[{hostname, vpid, hDevice, hFabricPort}] = *pFabricPortProperties_val;
}

static void lttng_ust_ze_sampling_memoryProperties_callback(
void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_mem_handle_t hMemModule,
size_t _pMemModuleProperties_val_length, zes_mem_properties_t *pMemModuleProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->memModule_property[{hostname, vpid, (ze_device_handle_t)hDevice,
(zes_mem_handle_t)hMemModule}] = *pMemModuleProperties_val;
data->memModule_property[{hostname, vpid, hDevice, hMemModule}] = *pMemModuleProperties_val;
}

static void lttng_ust_ze_sampling_powerProperties_callback(
void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_pwr_handle_t hPower, size_t _pPowerProperties_val_length,
zes_power_properties_t *pPowerProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->power_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_pwr_handle_t)hPower}] =
*pPowerProperties_val;
data->power_property[{hostname, vpid, hDevice, hPower}] = *pPowerProperties_val;
}

static void lttng_ust_ze_sampling_freqProperties_callback(
void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_freq_handle_t hFrequency, size_t _pfreqProperties_val_length,
zes_freq_properties_t *pFreqProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->frequency_property[{hostname, vpid, (ze_device_handle_t)hDevice,
(zes_freq_handle_t)hFrequency}] = *pFreqProperties_val;
data->frequency_property[{hostname, vpid, hDevice, hFrequency}] = *pFreqProperties_val;
}

static void lttng_ust_ze_sampling_engineProperties_callback(
void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_engine_handle_t hEngine, size_t _pEngineProperties_val_length,
zes_engine_properties_t *pEngineProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->engine_property[{hostname, vpid, (ze_device_handle_t)hDevice,
(zes_engine_handle_t)hEngine}] = *pEngineProperties_val;
data->engine_property[{hostname, vpid, hDevice, hEngine}] = *pEngineProperties_val;
}

/*
Expand Down Expand Up @@ -1068,7 +1095,7 @@ void btx_register_usr_callbacks(void *btx_handle) {
/* Device and Subdevice property */
btx_register_callbacks_lttng_ust_ze_properties_device(btx_handle, &property_device_callback);
btx_register_callbacks_lttng_ust_ze_properties_subdevice(btx_handle,
&property_subdevice_callback);
&property_subdevice_callback);

/* Map command list to device and to command queue dist*/
btx_register_callbacks_lttng_ust_ze_zeCommandListCreateImmediate_entry(
Expand Down Expand Up @@ -1098,7 +1125,7 @@ void btx_register_usr_callbacks(void *btx_handle) {

/* Drift */
btx_register_callbacks_lttng_ust_ze_properties_device_timer(btx_handle,
&property_device_timer_callback);
&property_device_timer_callback);

/* Profiling Command (everything who signal an event on completion)
*/
Expand Down
6 changes: 1 addition & 5 deletions ze/btx_zeinterval_callbacks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,7 @@ using btx_event_desct_t =
using btx_command_list_desc_t =
std::tuple<ze_command_queue_desc_t, ze_device_handle_t, bool /*hCommandListIsImmediate*/>;

struct DeviceHash {
uint64_t hash;
uint32_t deviceIdx;
};

using DeviceHash = std::tuple<uint64_t, uint32_t>;

struct data_s {
/* Host */
Expand Down

0 comments on commit 320a81e

Please sign in to comment.