From b76ca86f628390f07a8c24f5ab43f893287e72c6 Mon Sep 17 00:00:00 2001 From: sbekele Date: Wed, 10 Jul 2024 16:51:38 +0000 Subject: [PATCH] uuid based timeline --- utils/xprof_utils.hpp | 6 +-- xprof/btx_interval_model.yaml | 25 ++++++++++ xprof/btx_timeline.cpp | 82 ++++++++++++++++----------------- ze/btx_zeinterval_callbacks.cpp | 67 +++++++++++++++++++++------ ze/btx_zeinterval_callbacks.hpp | 17 +++++-- ze/tracer_ze_helpers.include.c | 36 ++++++++++++++- ze/ze_events.yaml | 18 ++++++++ 7 files changed, 188 insertions(+), 63 deletions(-) diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index 6a91e9d5..fcfa3b6f 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -57,11 +57,9 @@ typedef std::tuple hp_device_t; typedef std::tuple h_device_t; -typedef std::tuple hpd_fabricPort_t; typedef std::tuple hp_dsd_t; -typedef std::tuple hp_ddomain_t; -typedef std::tuple hp_dsdev_t; -typedef std::tuple hp_dfsdev_t; +typedef std::tuple hp_ddomain_t; +typedef std::tuple hp_dfsdev_t; typedef std::tuple sd_t; typedef std::tuple tfn_ts_t; typedef std::tuple fn_ts_t; diff --git a/xprof/btx_interval_model.yaml b/xprof/btx_interval_model.yaml index 8cc6fb09..eec61cac 100644 --- a/xprof/btx_interval_model.yaml +++ b/xprof/btx_interval_model.yaml @@ -102,6 +102,11 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: deviceIdx + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t - :name: hFrequency :field_class: :type: integer_unsigned @@ -126,6 +131,11 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: deviceIdx + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t - :name: hPower :field_class: :type: integer_unsigned @@ -150,6 +160,11 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: deviceIdx + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t - :name: hEngine :field_class: :type: integer_unsigned @@ -173,6 +188,11 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: deviceIdx + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t - :name: hEngine :field_class: :type: integer_unsigned @@ -196,6 +216,11 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: deviceIdx + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t - :name: hFabricPort :field_class: :type: integer_unsigned diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index b9f1321f..cfde74ae 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -43,7 +43,7 @@ struct FabricDetails { }; using timeline_dispatch_t = struct timeline_dispatch_s; -using uuid_getter_t = perfetto_uuid_t (*)(timeline_dispatch_t *, const std::string &, uint64_t, uint64_t, uint64_t, +using uuid_getter_t = perfetto_uuid_t (*)(timeline_dispatch_t *, const std::string &, uint64_t, uint64_t, uint32_t, uint64_t, uint32_t, std::optional); static perfetto_uuid_t gen_perfetto_uuid() { @@ -54,7 +54,7 @@ static perfetto_uuid_t gen_perfetto_uuid() { static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, - thapi_device_id did) { + thapi_device_id did, uint32_t deviceIdx) { perfetto_uuid_t hp_uuid = 0; auto [it, inserted] = dispatch->hp_device2countertracks.insert({{hostname, did}, hp_uuid}); @@ -77,7 +77,7 @@ static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispat auto *process = track_descriptor->mutable_process(); process->set_pid(hp_uuid); std::ostringstream oss; - oss << "Hostname " << hostname << " | Device " << did; + oss << "Hostname " << hostname << " | Device " << deviceIdx; // oss << " | " << track_name << " | uuid "; process->set_process_name(oss.str()); return hp_uuid; @@ -86,29 +86,29 @@ static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispat static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, std::unordered_map &counter_tracks, const std::string &track_name, const std::string &hostname, uint64_t process_id, - thapi_device_id did, uint64_t tHandle, thapi_domain_idx domain, + thapi_device_id did, uint32_t deviceIdx, uint64_t tHandle, thapi_domain_idx domain, std::optional details = std::nullopt, std::unordered_map *counter_tracks_fp = nullptr) { perfetto_uuid_t hp_dev_uuid = 0; perfetto_uuid_t hp_uuid; if (details && counter_tracks_fp != nullptr) { - auto [it, inserted] = counter_tracks_fp->insert({{hostname, process_id, did, tHandle, details->RxTx}, + auto [it, inserted] = counter_tracks_fp->insert({{hostname, process_id, did, tHandle, domain, details->RxTx}, hp_dev_uuid}); auto &potential_uuid = it->second; if (!inserted) return potential_uuid; - hp_uuid = get_parent_counter_track_uuid(dispatch, hostname, process_id, did); + hp_uuid = get_parent_counter_track_uuid(dispatch, hostname, process_id, did, deviceIdx); hp_dev_uuid = gen_perfetto_uuid(); potential_uuid = hp_dev_uuid; } else { - auto [it, inserted] = counter_tracks.insert({{hostname, process_id, did, tHandle}, hp_dev_uuid}); + auto [it, inserted] = counter_tracks.insert({{hostname, process_id, did, domain, tHandle}, hp_dev_uuid}); auto &potential_uuid = it->second; if (!inserted) return potential_uuid; - hp_uuid = get_parent_counter_track_uuid(dispatch, hostname, process_id, did); + hp_uuid = get_parent_counter_track_uuid(dispatch, hostname, process_id, did, deviceIdx); hp_dev_uuid = gen_perfetto_uuid(); potential_uuid = hp_dev_uuid; } @@ -136,38 +136,38 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, } static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t did, uint64_t hEngine, uint32_t subDevice, std::optional options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "CopyEngine (%)", hostname, process_id, did, hEngine, subDevice); + uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "CopyEngine (%)", hostname, process_id, did,deviceIdx, hEngine, subDevice); } static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t did, uint64_t hEngine, uint32_t subDevice, std::optional options ) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "ComputeEngine (%)", hostname, process_id, did, hEngine, subDevice); + uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, std::optional options ) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "ComputeEngine (%)", hostname, process_id, did, deviceIdx, hEngine, subDevice); } static perfetto_uuid_t get_fpThroughput_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t did, uint64_t hFabricPort, uint32_t subDevice, std::optional options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "FabricT", hostname, process_id, did, hFabricPort, subDevice, options, &dispatch->hp_dfsdev2fptracks); + uint64_t did, uint32_t deviceIdx, uint64_t hFabricPort, uint32_t subDevice, std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "FabricT", hostname, process_id, did, deviceIdx, hFabricPort, subDevice, options, &dispatch->hp_dfsdev2fptracks); } static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t did, uint64_t hPower, uint32_t subDevice, std::optional options) { + uint64_t did, uint32_t deviceIdx, uint64_t hPower, uint32_t subDevice, std::optional options) { // Extra space to maintain track sequence in the timeline - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Power", hostname, process_id, did, hPower, subDevice); + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Power", hostname, process_id, did, deviceIdx, hPower, subDevice); } static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t did, uint64_t hFrequency, uint32_t subDevice, std::optional options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Ferquency", hostname, process_id, did, hFrequency, subDevice); + uint64_t did, uint32_t deviceIdx, uint64_t hFrequency, uint32_t subDevice, std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Ferquency", hostname, process_id, did, deviceIdx, hFrequency, subDevice); } static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t thread_id, uint64_t did, uint64_t tHandle, uint32_t subDevice, + uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uint64_t tHandle, uint32_t subDevice, uint64_t timestamp, float value, uuid_getter_t uuid_getter, const std::string &eventName, std::optional options = std::nullopt) { perfetto_uuid_t track_uuid; - track_uuid = uuid_getter(dispatch, hostname, process_id, did, tHandle, subDevice, options); + track_uuid = uuid_getter(dispatch, hostname, process_id, did, deviceIdx, tHandle, subDevice, options); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(TRUSTED_PACKED_SEQUENCE_ID); @@ -179,44 +179,44 @@ static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::strin } static void add_event_fabricPort( timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uint64_t did, uintptr_t hFabricPort, + uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uintptr_t hFabricPort, uint32_t subDevice, uint64_t timestamp, uint32_t fabricId, uint32_t remotePortId, float rxThroughput, float txThroughput, float rxSpeed, float txSpeed) { // Define details for RX throughput. FabricDetails details = {false, fabricId, remotePortId}; - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, hFabricPort, subDevice, timestamp, + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFabricPort, subDevice, timestamp, rxThroughput, get_fpThroughput_track_uuuid, "Fabric ThroughputRX", details); details.RxTx = true; - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, hFabricPort, subDevice, timestamp, + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFabricPort, subDevice, timestamp, txThroughput, get_fpThroughput_track_uuuid, "Fabric ThroughputTX", details); } static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uint64_t did, uint64_t hFrequency, + uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uint64_t hFrequency, uint32_t subDevice, uint64_t timestamp, float frequency) { - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, hFrequency, subDevice, timestamp, + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFrequency, subDevice, timestamp, frequency, get_frequency_track_uuuid, "Frequency"); } static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uint64_t did, uint64_t hPower, + uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uint64_t hPower, uint32_t subDevice, uint64_t timestamp, float power) { - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, hPower, subDevice, timestamp, + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hPower, subDevice, timestamp, power, get_power_track_uuuid, "Power"); } static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uint64_t did, uint64_t hEngine, + uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, uint64_t timestamp, float activeTime) { - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, hEngine, subDevice, timestamp, + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hEngine, subDevice, timestamp, activeTime, get_computeEU_track_uuuid, "ComputeEngine"); } static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uint64_t did, uint64_t hEngine, + uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, uint64_t timestamp, float activeTime) { - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, hEngine, subDevice, timestamp, + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hEngine, subDevice, timestamp, activeTime, get_copyEU_track_uuuid, "CopyEngine"); } @@ -431,39 +431,39 @@ static void device_usr_callback(void *btx_handle, void *usr_data, const char *ho static void frequency_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint64_t hFrequency, uint32_t domain, uint64_t frequency) { + uint64_t did, uint32_t deviceIdx, uint64_t hFrequency, uint32_t domain, uint64_t frequency) { auto *dispatch = static_cast(usr_data); - add_event_frequency(dispatch, hostname, vpid, vtid, did, hFrequency, domain, ts, frequency); + add_event_frequency(dispatch, hostname, vpid, vtid, did, deviceIdx, hFrequency, domain, ts, frequency); } static void power_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, - uint64_t vtid, int64_t ts, int64_t backend, uint64_t did, uint64_t hPower, + uint64_t vtid, int64_t ts, int64_t backend, uint64_t did, uint32_t deviceIdx, uint64_t hPower, uint32_t domain, uint64_t power) { auto *dispatch = static_cast(usr_data); - add_event_power(dispatch, hostname, vpid, vtid, did, hPower, domain, ts, power); + add_event_power(dispatch, hostname, vpid, vtid, did, hPower, deviceIdx, domain, ts, power); } static void computeEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint64_t hEngine, uint32_t subDevice, float activeTime) { + uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, float activeTime) { auto *dispatch = static_cast(usr_data); - add_event_computeEU(dispatch, hostname, vpid, vtid, did, hEngine, subDevice, ts, activeTime); + add_event_computeEU(dispatch, hostname, vpid, vtid, did, deviceIdx, hEngine, subDevice, ts, activeTime); } static void copyEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint64_t hEngine, uint32_t subDevice, float activeTime) { + uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, float activeTime) { auto *dispatch = static_cast(usr_data); - add_event_copyEU(dispatch, hostname, vpid, vtid, did, hEngine, subDevice, ts, activeTime); + add_event_copyEU(dispatch, hostname, vpid, vtid, did, deviceIdx, hEngine, subDevice, ts, activeTime); } static void fabricPort_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint64_t hFabricPort, uint32_t subDevice, + uint64_t did, uint32_t deviceIdx, uint64_t hFabricPort, uint32_t subDevice, uint32_t fabricId, uint32_t remotePortId, float rxThroughput, float txThroughput, float rxSpeed, float txSpeed) { auto *dispatch = static_cast(usr_data); - add_event_fabricPort(dispatch, hostname, vpid, vtid, did, hFabricPort, subDevice, ts, fabricId, + add_event_fabricPort(dispatch, hostname, vpid, vtid, did, deviceIdx, hFabricPort, subDevice, ts, fabricId, remotePortId, rxThroughput, txThroughput, rxSpeed, txSpeed); } diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 3e25e2b2..37b56feb 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -765,6 +765,23 @@ static void zeEventDestroy_exit_callback(void *btx_handle, void *usr_data, int64 * Sampling */ +DeviceHash get_device_hash(void *usr_data, const char *hostname, int64_t vpid, ze_device_handle_t hDevice) { + auto *data = static_cast(usr_data); + const auto it0 = data->sampling_device_property.find({hostname, vpid, hDevice}); + if (it0 != data->sampling_device_property.cend()) { + const auto& [deviceProp, deviceIdx] = it0->second; + + uint64_t hash = 0xcbf29ce484222325; // FNV offset basis + for (int i = 0; i < ZE_MAX_DEVICE_UUID_SIZE; i++) { + hash ^= (uint64_t)deviceProp.uuid.id[i]; + hash *= 0x100000001b3; // FNV prime + } + + return {hash, deviceIdx}; + } + return {0, 0}; // Return 0 values if not found +} + static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, ze_device_handle_t hDevice, @@ -801,11 +818,11 @@ static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *us double time_diff = static_cast(pFabricPortThroughput_val->timestamp - prev_throughput.timestamp); // Calculate the RX and TX throughput - double rxThroughput = static_cast(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / double(time_diff); - double txThroughput = static_cast(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / double(time_diff); - + double rxThroughput = static_cast(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / time_diff; + double txThroughput = static_cast(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / time_diff; + DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, - (uint64_t)hDevice,(uint64_t)hFabricPort, subDevice, + uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort, subDevice, fabricId, remotePortId, rxThroughput, txThroughput, rxSpeed, txSpeed); // Update the stored values @@ -825,7 +842,7 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u const auto it0 = data->engine_property.find({hostname, vpid, hDevice, hEngine}); if (it0 != data->engine_property.cend()) { const auto& engineProps = it0->second; - uint32_t subDevice = (engineProps.onSubdevice) ? engineProps.subdeviceId : 0; + uint32_t subDevice = engineProps.subdeviceId; // (engineProps.onSubdevice) ? engineProps.subdeviceId : 0; if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL || engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { auto [it, inserted] = data->device_engines_ref.insert( @@ -838,13 +855,13 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u double time_diff = static_cast(pEngineStats_val->timestamp - prev_engineStats.timestamp); double activeTime = static_cast(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 / time_diff; - + DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) { btx_push_message_lttng_computeEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, - (uint64_t)hDevice, (uint64_t)hEngine, subDevice, int(activeTime)); + uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice, int(activeTime)); } if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { btx_push_message_lttng_copyEU(btx_handle, hostname,0, 0, prev_ts, BACKEND_ZE, - (uint64_t)hDevice, (uint64_t)hEngine, subDevice, int(activeTime)); + uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice, int(activeTime)); } it->second = {*pEngineStats_val, ts}; } @@ -871,24 +888,40 @@ static void lttng_ust_ze_sampling_gpu_energy_callback(void *btx_handle, void *us double time_diff = static_cast(pEnergyCounter_val->timestamp - prev_EnergyCounter.timestamp); double power = static_cast(pEnergyCounter_val->energy - prev_EnergyCounter.energy) / time_diff; - btx_push_message_lttng_power(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, (uint64_t)hDevice, (uint64_t)hPower, + DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + btx_push_message_lttng_power(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hPower, (thapi_domain_idx)domainIdx, power); it->second = {*pEnergyCounter_val, ts}; } - static void lttng_ust_ze_sampling_gpu_frequency_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, ze_device_handle_t hDevice, zes_freq_handle_t hFrequency, uint32_t domainIdx, size_t _pFreqState_val_length, zes_freq_state_t *pFreqState_val) { - printf("frequency: %f\n", pFreqState_val->actual); - btx_push_message_lttng_frequency(btx_handle, hostname, 0, 0, ts, BACKEND_ZE, (uint64_t)hDevice, (uint64_t)hFrequency, - (thapi_domain_idx)domainIdx, pFreqState_val->actual); + DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + btx_push_message_lttng_frequency(btx_handle, hostname, 0, 0, ts, BACKEND_ZE, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFrequency, + domainIdx, pFreqState_val->actual); } // Properties +static void lttng_ust_ze_sampling_deviceProperties_callback(void *btx_handle, void *usr_data, int64_t ts, + const char *hostname, int64_t vpid, uint64_t vtid, + ze_device_handle_t hDevice, uint32_t deviceIdx,size_t _pDeviceProperties_val_length, + ze_device_properties_t *pDeviceProperties_val) { +auto *data = static_cast(usr_data); + data->sampling_device_property[{hostname, vpid, hDevice}] = {*pDeviceProperties_val, deviceIdx}; +} + +static void lttng_ust_ze_sampling_subDeviceProperties_callback(void *btx_handle, void *usr_data, int64_t ts, + const char *hostname, int64_t vpid, uint64_t vtid, + ze_device_handle_t hDevice, ze_device_handle_t hSubDevice, size_t _pSubDeviceProperties_val_length, + ze_device_properties_t *pSubDeviceProperties_val) { + auto *data = static_cast(usr_data); + data->sampling_sub_device_property[{hostname, vpid, (ze_device_handle_t)hSubDevice}] = *pSubDeviceProperties_val; +} + static void lttng_ust_ze_sampling_fabricPortProperties_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, ze_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort, @@ -1023,6 +1056,12 @@ void btx_register_usr_callbacks(void *btx_handle) { &zeCommandListReset_exit_callback); /* Sampling */ + + //Properties + btx_register_callbacks_lttng_ust_ze_sampling_deviceProperties( + btx_handle, <tng_ust_ze_sampling_deviceProperties_callback); + btx_register_callbacks_lttng_ust_ze_sampling_subDeviceProperties( + btx_handle, <tng_ust_ze_sampling_subDeviceProperties_callback); btx_register_callbacks_lttng_ust_ze_sampling_fabricPortProperties( btx_handle, <tng_ust_ze_sampling_fabricPortProperties_callback); btx_register_callbacks_lttng_ust_ze_sampling_powerProperties( @@ -1031,6 +1070,8 @@ void btx_register_usr_callbacks(void *btx_handle) { btx_handle, <tng_ust_ze_sampling_engineProperties_callback); btx_register_callbacks_lttng_ust_ze_sampling_freqProperties( btx_handle, <tng_ust_ze_sampling_freqProperties_callback); + + // Telemetries btx_register_callbacks_lttng_ust_ze_sampling_fabricPort( btx_handle, <tng_ust_ze_sampling_fabricPort_callback); btx_register_callbacks_lttng_ust_ze_sampling_gpu_energy( diff --git a/ze/btx_zeinterval_callbacks.hpp b/ze/btx_zeinterval_callbacks.hpp index bf0a8ad9..53b49569 100644 --- a/ze/btx_zeinterval_callbacks.hpp +++ b/ze/btx_zeinterval_callbacks.hpp @@ -11,6 +11,8 @@ #include #include +#define ZE_MAX_DEVICE_UUID_SIZE 16 + typedef std::tuple hp_event_t; typedef std::tuple hp_kernel_t; typedef std::tuple hp_command_list_t; @@ -25,7 +27,9 @@ typedef std::tuple fabricPort_timestamp_ typedef std::tuple energy_timestamp_t; typedef std::tuple engines_timestamp_t; +typedef std::tuple deviceProperty_id_t; +typedef std::tuple hpd_t; typedef std::tuple hpdd_t; typedef std::tuple hpdsd_t; @@ -35,7 +39,7 @@ typedef std::tuple hpdeng_t; typedef std::tuple hpdfsd_t; -typedef std::tuple hpdengsd_t; +typedef std::tuple hpdesd_t; typedef std::tuple hpdpwrd_t; using btx_kernel_group_size_t = std::tuple; @@ -62,6 +66,12 @@ using btx_event_desct_t = using btx_command_list_desc_t = std::tuple; +struct DeviceHash { + uint64_t hash; + uint32_t deviceIdx; +}; + + struct data_s { /* Host */ EntryState entry_state; @@ -90,14 +100,15 @@ struct data_s { std::unordered_map device_timestamps_pair_ref; /* Sampling */ - + std::unordered_map sampling_device_property; + std::unordered_map sampling_sub_device_property; std::unordered_map fabricPort_property; std::unordered_map power_property; std::unordered_map frequency_property; std::unordered_map engine_property; std::unordered_map device_energy_ref; - std::unordered_map device_engines_ref; + std::unordered_map device_engines_ref; std::unordered_map device_fabricPort_ref; }; typedef struct data_s data_t; diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index d1c1d017..446970cf 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -796,6 +796,7 @@ static int _sampling_engines_initialized = 0; // Static handles to stay throughout the execution static ze_driver_handle_t* _sampling_hDrivers = NULL; static ze_device_handle_t** _sampling_hDevices = NULL; +static ze_device_handle_t*** _sampling_hSubDevices = NULL; static zes_freq_handle_t*** _sampling_hFrequencies = NULL; static zes_pwr_handle_t*** _sampling_hPowers = NULL; static zes_engine_handle_t*** _sampling_engineHandles = NULL; @@ -807,6 +808,7 @@ static uint32_t** _sampling_freqDomainCounts = NULL; static uint32_t** _sampling_fabricPortCount = NULL; static uint32_t** _sampling_powerDomainCounts = NULL; static uint32_t** _sampling_engineCounts = NULL; + static void intializeFrequency() { ze_result_t res; _sampling_hFrequencies = (zes_freq_handle_t***) calloc(_sampling_driverCount, sizeof(zes_freq_handle_t**)); @@ -995,6 +997,7 @@ static int initializeHandles() { _sampling_deviceCount = (uint32_t*) calloc(_sampling_driverCount, sizeof(uint32_t)); _sampling_subDeviceCount = (uint32_t**) calloc(_sampling_driverCount, sizeof(uint32_t*)); _sampling_hDevices = (ze_device_handle_t**) calloc(_sampling_driverCount, sizeof(ze_device_handle_t*)); + _sampling_hSubDevices = (ze_device_handle_t***) calloc(_sampling_driverCount, sizeof(ze_device_handle_t**)); // Query device count for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { _sampling_deviceCount[driverIdx] = 0; @@ -1013,15 +1016,43 @@ static int initializeHandles() { } //Get no sub-devices _sampling_subDeviceCount[driverIdx] = (uint32_t*) calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hSubDevices[driverIdx] = (ze_device_handle_t**) calloc(_sampling_deviceCount[driverIdx], sizeof(ze_device_handle_t*)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + ze_device_properties_t deviceProps = {0}; + deviceProps.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + deviceProps.pNext = NULL; + res = ZE_DEVICE_GET_PROPERTIES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &deviceProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZE_DEVICE_GET_PROPERTIES_PTR", res); + } + do_tracepoint(lttng_ust_ze_sampling, deviceProperties, (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], deviceIdx, + &deviceProps ); + _sampling_subDeviceCount[driverIdx][deviceIdx] = 0; res = ZE_DEVICE_GET_SUB_DEVICES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_subDeviceCount[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZE_DEVICE_GET_SUB_DEVICES_PTR", res); _sampling_subDeviceCount[driverIdx][deviceIdx] = 0; } - if (_sampling_subDeviceCount[driverIdx][deviceIdx] == 0) { - _sampling_subDeviceCount[driverIdx][deviceIdx] = 1; + if (_sampling_subDeviceCount[driverIdx][deviceIdx] > 0) { + _sampling_hSubDevices[driverIdx][deviceIdx] = (ze_device_handle_t*) calloc(_sampling_subDeviceCount[driverIdx][deviceIdx], sizeof(ze_device_handle_t)); + res = ZE_DEVICE_GET_SUB_DEVICES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_subDeviceCount[driverIdx][deviceIdx], _sampling_hSubDevices[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZE_DEVICE_GET_SUB_DEVICES_PTR", res); + free(_sampling_hSubDevices[driverIdx][deviceIdx]); + _sampling_hSubDevices[driverIdx][deviceIdx] = NULL; + _sampling_subDeviceCount[driverIdx][deviceIdx] = 0; + } + for (uint32_t subDeviceIdx = 0; subDeviceIdx < _sampling_subDeviceCount[driverIdx][deviceIdx]; subDeviceIdx++) { + ze_device_properties_t subDeviceProps = {0}; + subDeviceProps.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + subDeviceProps.pNext = NULL; + res = ZE_DEVICE_GET_PROPERTIES_PTR(_sampling_hSubDevices[driverIdx][deviceIdx][subDeviceIdx], &subDeviceProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZE_DEVICE_GET_PROPERTIES_PTR", res); + } + do_tracepoint(lttng_ust_ze_sampling, subDeviceProperties, (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], (ze_device_handle_t)_sampling_hSubDevices[driverIdx][deviceIdx][subDeviceIdx], &subDeviceProps); + } } } } @@ -1081,6 +1112,7 @@ static void readEnergy_dump(uint32_t driverIdx, uint32_t deviceIdx) { do_tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], domainIdx, &energyCounter); } } + static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx){ if (!_sampling_engines_initialized) return; ze_result_t result; diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index 1ef212b4..c9109254 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -1,6 +1,24 @@ --- lttng_ust_ze_sampling: events: + - name: deviceProperties + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, deviceIdx ] + - [ ze_device_properties_t *, pDeviceProperties ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, deviceIdx, "deviceIdx" ] + - [ ctf_sequence_text, uint8_t, pDeviceProperties_val, pDeviceProperties, size_t, "sizeof(ze_device_properties_t)" ] + - name: subDeviceProperties + args: + - [ ze_device_handle_t, hDevice ] + - [ ze_device_handle_t, hSubDevice ] + - [ ze_device_properties_t *, pSubDeviceProperties ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer_hex, uintptr_t, hSubDevice, "(uintptr_t)hSubDevice" ] + - [ ctf_sequence_text, uint8_t, pSubDeviceProperties_val, pSubDeviceProperties, size_t, "sizeof(ze_device_properties_t)" ] - name: engineProperties args: - [ ze_device_handle_t, hDevice ]