diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 6e855a51..21831d5e 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -765,8 +765,8 @@ static void zeEventDestroy_exit_callback(void *btx_handle, void *usr_data, int64 * Sampling */ -DeviceHash get_device_hash(void *usr_data, const char *hostname, int64_t vpid, - ze_device_handle_t hDevice) { +std::optional get_device_hash(void *usr_data, const char *hostname, int64_t vpid, + ze_device_handle_t hDevice) { auto *data = static_cast(usr_data); const auto it0 = data->sampling_device_property.find({hostname, vpid, hDevice}); if (it0 != data->sampling_device_property.cend()) { @@ -778,9 +778,23 @@ DeviceHash get_device_hash(void *usr_data, const char *hostname, int64_t vpid, hash *= 0x100000001b3; // FNV prime } - return {hash, deviceIdx}; + return DeviceHash{hash, deviceIdx}; // Return tuple optional + } + return std::nullopt; // Return empty optional if not found +} + +uint64_t calculate_delta(uint64_t current_val, uint64_t prev_val) { + // return zero if equal + if (current_val == prev_val) { + return 0; + } + + // calculate the delta, considering overflow + if (current_val >= prev_val) { + return current_val - prev_val; + } else { + return current_val + (UINT64_MAX - prev_val) + 1; } - return {0, 0}; // Return 0 values if not found } static void lttng_ust_ze_sampling_fabricPort_callback( @@ -817,25 +831,30 @@ static void lttng_ust_ze_sampling_fabricPort_callback( /* Per doc: When taking the delta, the difference between timestamp samples * could be 0, if the frequency of sampling the snapshots is higher than the * frequency of the timestamp update. */ - if (pFabricPortThroughput_val->timestamp == prev_throughput.timestamp) + double time_delta = static_cast( + calculate_delta(pFabricPortThroughput_val->timestamp, prev_throughput.timestamp)); + if (time_delta == 0) return; - double time_diff = - static_cast(pFabricPortThroughput_val->timestamp - prev_throughput.timestamp); // Calculate the RX and TX throughput - double rxThroughput = - static_cast(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / - time_diff; - double txThroughput = - static_cast(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / - time_diff; - DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); - if (rxThroughput != 0 || txThroughput != 0) - btx_push_message_sampling_fabricPort( - btx_handle, hostname, prev_ts, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort, - subDevice, fabricId, remotePortId, rxThroughput, txThroughput, rxSpeed, txSpeed); - // Update the stored values - it->second = {*pFabricPortThroughput_val, ts}; + double rxThroughput = static_cast(calculate_delta(pFabricPortThroughput_val->rxCounter, + prev_throughput.rxCounter)) / + time_delta; + double txThroughput = static_cast(calculate_delta(pFabricPortThroughput_val->txCounter, + prev_throughput.txCounter)) / + time_delta; + auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + if (uuid_idx) { + auto [hash, deviceIdx] = *uuid_idx; + if (rxThroughput != 0 || txThroughput != 0) + btx_push_message_sampling_fabricPort( + btx_handle, hostname, prev_ts, hash, deviceIdx, (uint64_t)hFabricPort, subDevice, + fabricId, remotePortId, rxThroughput, txThroughput, rxSpeed, txSpeed); + // Update the stored values + it->second = {*pFabricPortThroughput_val, ts}; + } else { + std::cerr << "Device property not found!" << std::endl; + } } else { std::cerr << "Fabricport property not found!" << std::endl; } @@ -859,8 +878,11 @@ static void lttng_ust_ze_sampling_memStats_callback( // Previous bandwidth data auto &[prev_bandwidth, prev_ts] = it->second; - - if (pMemBandwidth_val->timestamp == prev_bandwidth.timestamp) + double time_delta = static_cast( + calculate_delta(pMemBandwidth_val->timestamp, prev_bandwidth.timestamp)); + // if no progress on timestamp, return (can happen when sampling frequency is higher than the + // timestamp counter update frequency) + if (time_delta == 0) return; // Calculate the RD and WT bandwidth @@ -868,26 +890,27 @@ static void lttng_ust_ze_sampling_memStats_callback( double allocation = static_cast(pMemState_val->size - pMemState_val->free) * 100.0 / static_cast(pMemState_val->size); - double time_diff = static_cast(pMemBandwidth_val->timestamp - prev_bandwidth.timestamp); // percentage bandwidth based on the manual - double pBandwidth = - static_cast(1e6 * - ((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) + - (pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter))) / - (time_diff * pMemBandwidth_val->maxBandwidth); + uint64_t rdBytes_delta = + calculate_delta(pMemBandwidth_val->readCounter, prev_bandwidth.readCounter); + uint64_t wtBytes_delta = + calculate_delta(pMemBandwidth_val->writeCounter, prev_bandwidth.writeCounter); + double pBandwidth = static_cast(1e6 * (rdBytes_delta + wtBytes_delta) / + (time_delta * pMemBandwidth_val->maxBandwidth)); // rd and wt bandwidth if further drilling needed (place holder for now) - double rdBandwidth = - static_cast(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 / - (time_diff); - double wtBandwidth = - static_cast(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter) * 1e6 / - (time_diff); - DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); - btx_push_message_sampling_memModule(btx_handle, hostname, prev_ts, uuid_idx.hash, - uuid_idx.deviceIdx, (uint64_t)hMemModule, subDevice, - pBandwidth, rdBandwidth, wtBandwidth, allocation); - // Update the stored values - it->second = {*pMemBandwidth_val, ts}; + double rdBandwidth = static_cast(rdBytes_delta) * 1e6 / time_delta; + double wtBandwidth = static_cast(wtBytes_delta) * 1e6 / time_delta; + auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + if (uuid_idx) { + auto [hash, deviceIdx] = *uuid_idx; + btx_push_message_sampling_memModule(btx_handle, hostname, prev_ts, hash, deviceIdx, + (uint64_t)hMemModule, subDevice, pBandwidth, rdBandwidth, + wtBandwidth, allocation); + // Update the stored values + it->second = {*pMemBandwidth_val, ts}; + } else { + std::cerr << "Device property not found!" << std::endl; + } } else { std::cerr << "Memory property not found!" << std::endl; } @@ -912,33 +935,29 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u if (inserted) return; auto &[prev_engineStats, prev_ts] = it->second; - if (pEngineStats_val->timestamp == prev_engineStats.timestamp) + double time_delta = static_cast( + calculate_delta(pEngineStats_val->timestamp, prev_engineStats.timestamp)); + // if no progress on timestam, return (can happen when sampling frequency is higher than the + // timestamp counter update frequency) + if (time_delta == 0) return; - double time_diff = - pEngineStats_val->timestamp >= prev_engineStats.timestamp - ? static_cast(pEngineStats_val->timestamp - prev_engineStats.timestamp) - : static_cast(pEngineStats_val->timestamp + - (UINT64_MAX - prev_engineStats.timestamp) + 1); - double activeTime = 0; - if (pEngineStats_val->activeTime > prev_engineStats.activeTime) - activeTime = - static_cast(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 / - time_diff; - else - activeTime = static_cast((UINT64_MAX - prev_engineStats.activeTime) + - pEngineStats_val->activeTime + 1) * - 100 / time_diff; - DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); - if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) { - btx_push_message_sampling_computeEU(btx_handle, hostname, prev_ts, uuid_idx.hash, - uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice, - int(activeTime)); - } else if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { - btx_push_message_sampling_copyEU(btx_handle, hostname, prev_ts, uuid_idx.hash, - uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice, - int(activeTime)); + double activeTime = static_cast(calculate_delta(pEngineStats_val->activeTime, + prev_engineStats.activeTime)) * + 100 / time_delta; + auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + if (uuid_idx) { + auto [hash, deviceIdx] = *uuid_idx; + if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) { + btx_push_message_sampling_computeEU(btx_handle, hostname, prev_ts, hash, deviceIdx, + (uint64_t)hEngine, subDevice, activeTime); + } else if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { + btx_push_message_sampling_copyEU(btx_handle, hostname, prev_ts, hash, deviceIdx, + (uint64_t)hEngine, subDevice, activeTime); + } + it->second = {*pEngineStats_val, ts}; + } else { + std::cerr << "Device property not found! " << std::endl; } - it->second = {*pEngineStats_val, ts}; } } else { std::cerr << "Engine property not found for device: " << hDevice << std::endl; @@ -956,26 +975,39 @@ static void lttng_ust_ze_sampling_gpu_energy_callback( return; auto &[prev_EnergyCounter, prev_ts] = it->second; - if (pEnergyCounter_val->timestamp == prev_EnergyCounter.timestamp) + double time_delta = static_cast( + calculate_delta(pEnergyCounter_val->timestamp, prev_EnergyCounter.timestamp)); + // if no progress on timestamp, return (can happen when sampling frequency is higher than the + // timestamp counter update frequency) + if (time_delta == 0) return; - double time_diff = - static_cast(pEnergyCounter_val->timestamp - prev_EnergyCounter.timestamp); double power = - static_cast(pEnergyCounter_val->energy - prev_EnergyCounter.energy) / time_diff; - DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); - btx_push_message_sampling_power(btx_handle, hostname, prev_ts, uuid_idx.hash, uuid_idx.deviceIdx, - (uint64_t)hPower, (thapi_domain_idx)domainIdx, power); - it->second = {*pEnergyCounter_val, ts}; + static_cast(calculate_delta(pEnergyCounter_val->energy, prev_EnergyCounter.energy)) / + time_delta; + auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + if (uuid_idx) { + auto [hash, deviceIdx] = *uuid_idx; + btx_push_message_sampling_power(btx_handle, hostname, prev_ts, hash, deviceIdx, + (uint64_t)hPower, (thapi_domain_idx)domainIdx, power); + it->second = {*pEnergyCounter_val, ts}; + } else { + std::cerr << "Device property not found! " << std::endl; + } } static void lttng_ust_ze_sampling_gpu_frequency_callback( void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, ze_device_handle_t hDevice, zes_freq_handle_t hFrequency, uint32_t domainIdx, size_t _pFreqState_val_length, zes_freq_state_t *pFreqState_val) { - DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); - btx_push_message_sampling_frequency(btx_handle, hostname, ts, uuid_idx.hash, uuid_idx.deviceIdx, - (uint64_t)hFrequency, domainIdx, pFreqState_val->actual); + auto uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + if (uuid_idx) { + auto [hash, deviceIdx] = *uuid_idx; + btx_push_message_sampling_frequency(btx_handle, hostname, ts, hash, deviceIdx, + (uint64_t)hFrequency, domainIdx, pFreqState_val->actual); + } else { + std::cerr << "Device property not found! " << std::endl; + } } // Properties @@ -1002,8 +1034,7 @@ static void lttng_ust_ze_sampling_fabricPortProperties_callback( size_t _pFabricPortProperties_val_length, zes_fabric_port_properties_t *pFabricPortProperties_val) { auto *data = static_cast(usr_data); - data->fabricPort_property[{hostname, vpid, (ze_device_handle_t)hDevice, - (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val; + data->fabricPort_property[{hostname, vpid, hDevice, hFabricPort}] = *pFabricPortProperties_val; } static void lttng_ust_ze_sampling_memoryProperties_callback( @@ -1011,8 +1042,7 @@ static void lttng_ust_ze_sampling_memoryProperties_callback( ze_device_handle_t hDevice, zes_mem_handle_t hMemModule, size_t _pMemModuleProperties_val_length, zes_mem_properties_t *pMemModuleProperties_val) { auto *data = static_cast(usr_data); - data->memModule_property[{hostname, vpid, (ze_device_handle_t)hDevice, - (zes_mem_handle_t)hMemModule}] = *pMemModuleProperties_val; + data->memModule_property[{hostname, vpid, hDevice, hMemModule}] = *pMemModuleProperties_val; } static void lttng_ust_ze_sampling_powerProperties_callback( @@ -1020,8 +1050,7 @@ static void lttng_ust_ze_sampling_powerProperties_callback( ze_device_handle_t hDevice, zes_pwr_handle_t hPower, size_t _pPowerProperties_val_length, zes_power_properties_t *pPowerProperties_val) { auto *data = static_cast(usr_data); - data->power_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_pwr_handle_t)hPower}] = - *pPowerProperties_val; + data->power_property[{hostname, vpid, hDevice, hPower}] = *pPowerProperties_val; } static void lttng_ust_ze_sampling_freqProperties_callback( @@ -1029,8 +1058,7 @@ static void lttng_ust_ze_sampling_freqProperties_callback( ze_device_handle_t hDevice, zes_freq_handle_t hFrequency, size_t _pfreqProperties_val_length, zes_freq_properties_t *pFreqProperties_val) { auto *data = static_cast(usr_data); - data->frequency_property[{hostname, vpid, (ze_device_handle_t)hDevice, - (zes_freq_handle_t)hFrequency}] = *pFreqProperties_val; + data->frequency_property[{hostname, vpid, hDevice, hFrequency}] = *pFreqProperties_val; } static void lttng_ust_ze_sampling_engineProperties_callback( @@ -1038,8 +1066,7 @@ static void lttng_ust_ze_sampling_engineProperties_callback( ze_device_handle_t hDevice, zes_engine_handle_t hEngine, size_t _pEngineProperties_val_length, zes_engine_properties_t *pEngineProperties_val) { auto *data = static_cast(usr_data); - data->engine_property[{hostname, vpid, (ze_device_handle_t)hDevice, - (zes_engine_handle_t)hEngine}] = *pEngineProperties_val; + data->engine_property[{hostname, vpid, hDevice, hEngine}] = *pEngineProperties_val; } /* @@ -1068,7 +1095,7 @@ void btx_register_usr_callbacks(void *btx_handle) { /* Device and Subdevice property */ btx_register_callbacks_lttng_ust_ze_properties_device(btx_handle, &property_device_callback); btx_register_callbacks_lttng_ust_ze_properties_subdevice(btx_handle, - &property_subdevice_callback); + &property_subdevice_callback); /* Map command list to device and to command queue dist*/ btx_register_callbacks_lttng_ust_ze_zeCommandListCreateImmediate_entry( @@ -1098,7 +1125,7 @@ void btx_register_usr_callbacks(void *btx_handle) { /* Drift */ btx_register_callbacks_lttng_ust_ze_properties_device_timer(btx_handle, - &property_device_timer_callback); + &property_device_timer_callback); /* Profiling Command (everything who signal an event on completion) */ diff --git a/ze/btx_zeinterval_callbacks.hpp b/ze/btx_zeinterval_callbacks.hpp index 920b6fb1..6a323b4e 100644 --- a/ze/btx_zeinterval_callbacks.hpp +++ b/ze/btx_zeinterval_callbacks.hpp @@ -69,11 +69,7 @@ using btx_event_desct_t = using btx_command_list_desc_t = std::tuple; -struct DeviceHash { - uint64_t hash; - uint32_t deviceIdx; -}; - +using DeviceHash = std::tuple; struct data_s { /* Host */