From 50a28e621f973c1703dd62908f3dfbb12591f40c Mon Sep 17 00:00:00 2001 From: Solomon Bekele Date: Wed, 16 Oct 2024 14:36:50 +0000 Subject: [PATCH] PR corrections --- xprof/btx_timeline.cpp | 108 +++++++++++++++----------------- ze/btx_zeinterval_callbacks.cpp | 9 ++- 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 6b9142f0..405a3df8 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -32,22 +32,17 @@ struct timeline_dispatch_s { std::unordered_map hp_device2countertracks; std::unordered_map hp_ddomain2telmtracks; - std::unordered_map hp_ddomain2cpytracks; + std::unordered_map hp_ddomain2cpyalloctracks; std::unordered_map hp_dfsdev2fptracks; perfetto_pruned::Trace trace; }; - -//Parameters that are unique to some of the telemetries that we need on the timeline -struct Details { - bool RxTx; - uint32_t fabricId; - uint32_t remotePortId; -}; +// Keeps extra parameters that does not fit the default getter +using Extras = std::tuple; using timeline_dispatch_t = struct timeline_dispatch_s; using uuid_getter_t = perfetto_uuid_t (*)(timeline_dispatch_t *, const std::string &, uint64_t, uint64_t, uint32_t, uint64_t, uint32_t, - std::optional
); + std::optional); static perfetto_uuid_t gen_perfetto_uuid() { // Start at one, Look like UUID 0 is special @@ -90,14 +85,15 @@ static perfetto_uuid_t get_counter_track_uuuid( std::unordered_map &counter_tracks, const std::string &track_name, const std::string &hostname, uint64_t process_id, thapi_device_id did, uint32_t deviceIdx, uint64_t tHandle, thapi_domain_idx domain, - std::optional
details = std::nullopt, + std::optional fabricExtras = std::nullopt, std::unordered_map *counter_tracks_fp = nullptr) { perfetto_uuid_t hp_dev_uuid = 0; perfetto_uuid_t hp_uuid; - if (details && counter_tracks_fp != nullptr) { + if (fabricExtras && counter_tracks_fp != nullptr) { + bool RxTx = std::get<0>(*fabricExtras); auto [it, inserted] = counter_tracks_fp->insert( - {{hostname, process_id, did, tHandle, domain, details->RxTx}, hp_dev_uuid}); + {{hostname, process_id, did, tHandle, domain, RxTx}, hp_dev_uuid}); auto &potential_uuid = it->second; if (!inserted) return potential_uuid; @@ -124,13 +120,15 @@ static perfetto_uuid_t get_counter_track_uuuid( track_descriptor->set_uuid(hp_dev_uuid); track_descriptor->set_parent_uuid(hp_uuid); std::ostringstream oss; - if (track_name == "FabricT" && details) { + if (track_name == "FabricT" && fabricExtras) { + bool RxTx = std::get<0>(*fabricExtras); + uint32_t fabricId = std::get<1>(*fabricExtras); + uint32_t remotePortId = std::get<2>(*fabricExtras); oss << track_name << " | SD " << domain; - oss << " | " << details->fabricId << "<->" << details->remotePortId << " | " - << (details->RxTx ? " TX" : " RX"); - } else if (track_name == " Memory" && details) { - oss << track_name << " Module " << domain; - oss << " | " << (details->RxTx ? "WR BW" : "RD BW"); + oss << " | " << fabricId << "<->" << remotePortId << " | " + << (RxTx ? " TX" : " RX"); + } else if (track_name == " Memory") { + oss << track_name << " BW (%) | Module " << domain; } else if (track_name == "Allocated Memory (%)") { oss << track_name << " Module " << domain; } else if (track_name == " CopyEngine (%)" || track_name == " ComputeEngine (%)") { @@ -150,8 +148,8 @@ static perfetto_uuid_t get_counter_track_uuuid( static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, uint32_t deviceIdx, uint64_t hEngine, - uint32_t subDevice, std::optional
options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2cpytracks, " CopyEngine (%)", + uint32_t subDevice, std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2cpyalloctracks, " CopyEngine (%) ", hostname, process_id, did, deviceIdx, hEngine, subDevice); } @@ -159,25 +157,25 @@ static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, uint32_t deviceIdx, uint64_t hEngine, uint32_t subDevice, - std::optional
options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " ComputeEngine (%)", + std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " ComputeEngine (%) ", hostname, process_id, did, deviceIdx, hEngine, subDevice); } static perfetto_uuid_t get_fpThroughput_track_uuuid( timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, - uint32_t deviceIdx, uint64_t hFabricPort, uint32_t subDevice, std::optional
options) { + uint32_t deviceIdx, uint64_t hFabricPort, uint32_t subDevice, std::optional fabricExtras) { return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "FabricT", hostname, - process_id, did, deviceIdx, hFabricPort, subDevice, options, + process_id, did, deviceIdx, hFabricPort, subDevice, fabricExtras, &dispatch->hp_dfsdev2fptracks); } static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, uint32_t deviceIdx, uint64_t hPower, - uint32_t subDevice, std::optional
options) { + uint32_t subDevice, std::optional options) { // Extra space to maintain track sequence in the timeline - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Power", hostname, + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Power ", hostname, process_id, did, deviceIdx, hPower, subDevice); } @@ -185,28 +183,27 @@ static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, uint32_t deviceIdx, uint64_t hFrequency, uint32_t subDevice, - std::optional
options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Ferquency", hostname, + std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Frequency ", hostname, process_id, did, deviceIdx, hFrequency, subDevice); } -static perfetto_uuid_t get_Bandwidth_track_uuuid(timeline_dispatch_t *dispatch, +static perfetto_uuid_t get_bandwidth_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, uint32_t subDevice, - std::optional
options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Memory", hostname, - process_id, did, deviceIdx, hMemModule, subDevice, options, - &dispatch->hp_dfsdev2fptracks); + std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "Memory", hostname, + process_id, did, deviceIdx, hMemModule, subDevice); } -static perfetto_uuid_t get_Occupancy_track_uuuid(timeline_dispatch_t *dispatch, - const std::string &hostname, uint64_t process_id, - uint64_t did, uint32_t deviceIdx, - uint64_t hMemModule, uint32_t subDevice, - std::optional
options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "Allocated Memory (%)", - hostname, process_id, did, deviceIdx, hMemModule, subDevice); +static perfetto_uuid_t get_allocation_track_uuuid(timeline_dispatch_t *dispatch, + const std::string &hostname, uint64_t process_id, + uint64_t did, uint32_t deviceIdx, + uint64_t hMemModule, uint32_t subDevice, + std::optional options) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2cpyalloctracks, "Allocated Memory (%)", + hostname, process_id, did, deviceIdx, hMemModule, subDevice); } static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::string &hostname, @@ -214,7 +211,7 @@ static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::strin uint32_t deviceIdx, uint64_t tHandle, uint32_t subDevice, uint64_t timestamp, float value, uuid_getter_t uuid_getter, const std::string &eventName, - std::optional
options = std::nullopt) { + std::optional options = std::nullopt) { perfetto_uuid_t track_uuid; track_uuid = uuid_getter(dispatch, hostname, process_id, did, deviceIdx, tHandle, subDevice, options); @@ -232,19 +229,14 @@ static void add_event_memModule(timeline_dispatch_t *dispatch, std::string hostn uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uintptr_t hMemModule, uint32_t subDevice, uint64_t timestamp, float pBandwidth, float rdBandwidth, - float wtBandwidth, float occupancy) { + float wtBandwidth, float allocation) { // Define details for RX throughput. - Details details = {false, 0, 0}; - add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule, - subDevice, timestamp, rdBandwidth, get_Bandwidth_track_uuuid, - "Memory Read BW", details); - details.RxTx = true; add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule, - subDevice, timestamp, wtBandwidth, get_Bandwidth_track_uuuid, - "Memory Write BW", details); + subDevice, timestamp, pBandwidth, get_bandwidth_track_uuuid, "Memory BW"); + add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule, - subDevice, timestamp, occupancy, get_Occupancy_track_uuuid, - "Memory Occupancy", details); + subDevice, timestamp, allocation, get_allocation_track_uuuid, + "Memory Allocation"); } static void add_event_fabricPort(timeline_dispatch_t *dispatch, std::string hostname, @@ -253,16 +245,16 @@ static void add_event_fabricPort(timeline_dispatch_t *dispatch, std::string host uint64_t timestamp, uint32_t fabricId, uint32_t remotePortId, float rxThroughput, float txThroughput, float rxSpeed, float txSpeed) { - // Define details for RX throughput. - Details details = {false, fabricId, remotePortId}; + // Define. + Extras fabricExtras = {false, fabricId, remotePortId}; add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFabricPort, subDevice, timestamp, rxThroughput, get_fpThroughput_track_uuuid, - "Fabric ThroughputRX", details); + "Fabric ThroughputRX", fabricExtras); - details.RxTx = true; + fabricExtras = {true, fabricId, remotePortId}; add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFabricPort, subDevice, timestamp, txThroughput, get_fpThroughput_track_uuuid, - "Fabric ThroughputTX", details); + "Fabric ThroughputTX", fabricExtras); } static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, @@ -557,10 +549,10 @@ static void memModule_usr_callback(void *btx_handle, void *usr_data, const char int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, uint32_t subDevice, float pBandwidth, float rdBandwidth, - float wtBandwidth, float occupancy) { + float wtBandwidth, float allocation) { auto *dispatch = static_cast(usr_data); add_event_memModule(dispatch, hostname, vpid, vtid, did, deviceIdx, hMemModule, subDevice, ts, - pBandwidth, rdBandwidth, wtBandwidth, occupancy); + pBandwidth, rdBandwidth, wtBandwidth, allocation); } void btx_register_usr_callbacks(void *btx_handle) { diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 556dca0e..3ee35c0e 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -796,7 +796,8 @@ static void lttng_ust_ze_sampling_fabricPort_callback( auto subDevice = it0->second.subdeviceId; auto fabricId = it0->second.portId.fabricId; auto remotePortId = pFabricPortState_val->remotePortId.fabricId; - // Current Speed (not used currently in the timeline) + // Current Speed (bytes/sec) place holder (not used currently in the timeline) + // https://spec.oneapi.io/level-zero/1.9.3/sysman/PROG.html#operations-on-fabric-ports double rxSpeed = static_cast(pFabricPortState_val->rxSpeed.bitRate * pFabricPortState_val->rxSpeed.width) / 8.0; @@ -869,10 +870,12 @@ static void lttng_ust_ze_sampling_memStats_callback( double allocation = static_cast(pMemState_val->size - pMemState_val->free) * 100.0 / static_cast(pMemState_val->size); double time_diff = static_cast(pMemBandwidth_val->timestamp - prev_bandwidth.timestamp); + //percentage bandwidth based on the manual double pBandwidth = - static_cast((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) + + 100.0 * static_cast((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) + (pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter)) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth); + // rd and wt bandwidth if further drilling needed (place holder for now) double rdBandwidth = static_cast(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 / (time_diff); @@ -931,7 +934,7 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice, int(activeTime)); } - if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { + else if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { btx_push_message_lttng_copyEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hEngine, subDevice, int(activeTime));