Skip to content

Commit

Permalink
PR corrections
Browse files Browse the repository at this point in the history
  • Loading branch information
Solomon Bekele committed Oct 16, 2024
1 parent 6fb44fb commit 50a28e6
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 61 deletions.
108 changes: 50 additions & 58 deletions xprof/btx_timeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,17 @@ struct timeline_dispatch_s {

std::unordered_map<h_device_t, perfetto_uuid_t> hp_device2countertracks;
std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2telmtracks;
std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2cpytracks;
std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2cpyalloctracks;
std::unordered_map<hp_dfsdev_t, perfetto_uuid_t> hp_dfsdev2fptracks;
perfetto_pruned::Trace trace;
};

//Parameters that are unique to some of the telemetries that we need on the timeline
struct Details {
bool RxTx;
uint32_t fabricId;
uint32_t remotePortId;
};
// Keeps extra parameters that does not fit the default getter
using Extras = std::tuple<bool, uint32_t, uint32_t>;

using timeline_dispatch_t = struct timeline_dispatch_s;
using uuid_getter_t = perfetto_uuid_t (*)(timeline_dispatch_t *, const std::string &, uint64_t,
uint64_t, uint32_t, uint64_t, uint32_t,
std::optional<Details>);
std::optional<Extras>);

static perfetto_uuid_t gen_perfetto_uuid() {
// Start at one, Look like UUID 0 is special
Expand Down Expand Up @@ -90,14 +85,15 @@ static perfetto_uuid_t get_counter_track_uuuid(
std::unordered_map<hp_ddomain_t, perfetto_uuid_t> &counter_tracks,
const std::string &track_name, const std::string &hostname, uint64_t process_id,
thapi_device_id did, uint32_t deviceIdx, uint64_t tHandle, thapi_domain_idx domain,
std::optional<Details> details = std::nullopt,
std::optional<Extras> fabricExtras = std::nullopt,
std::unordered_map<hp_dfsdev_t, perfetto_uuid_t> *counter_tracks_fp = nullptr) {
perfetto_uuid_t hp_dev_uuid = 0;
perfetto_uuid_t hp_uuid;

if (details && counter_tracks_fp != nullptr) {
if (fabricExtras && counter_tracks_fp != nullptr) {
bool RxTx = std::get<0>(*fabricExtras);
auto [it, inserted] = counter_tracks_fp->insert(
{{hostname, process_id, did, tHandle, domain, details->RxTx}, hp_dev_uuid});
{{hostname, process_id, did, tHandle, domain, RxTx}, hp_dev_uuid});
auto &potential_uuid = it->second;
if (!inserted)
return potential_uuid;
Expand All @@ -124,13 +120,15 @@ static perfetto_uuid_t get_counter_track_uuuid(
track_descriptor->set_uuid(hp_dev_uuid);
track_descriptor->set_parent_uuid(hp_uuid);
std::ostringstream oss;
if (track_name == "FabricT" && details) {
if (track_name == "FabricT" && fabricExtras) {
bool RxTx = std::get<0>(*fabricExtras);
uint32_t fabricId = std::get<1>(*fabricExtras);
uint32_t remotePortId = std::get<2>(*fabricExtras);
oss << track_name << " | SD " << domain;
oss << " | " << details->fabricId << "<->" << details->remotePortId << " | "
<< (details->RxTx ? " TX" : " RX");
} else if (track_name == " Memory" && details) {
oss << track_name << " Module " << domain;
oss << " | " << (details->RxTx ? "WR BW" : "RD BW");
oss << " | " << fabricId << "<->" << remotePortId << " | "
<< (RxTx ? " TX" : " RX");
} else if (track_name == " Memory") {
oss << track_name << " BW (%) | Module " << domain;
} else if (track_name == "Allocated Memory (%)") {
oss << track_name << " Module " << domain;
} else if (track_name == " CopyEngine (%)" || track_name == " ComputeEngine (%)") {
Expand All @@ -150,71 +148,70 @@ static perfetto_uuid_t get_counter_track_uuuid(
static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx, uint64_t hEngine,
uint32_t subDevice, std::optional<Details> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2cpytracks, " CopyEngine (%)",
uint32_t subDevice, std::optional<Extras> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2cpyalloctracks, " CopyEngine (%) ",
hostname, process_id, did, deviceIdx, hEngine, subDevice);
}

static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx, uint64_t hEngine,
uint32_t subDevice,
std::optional<Details> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " ComputeEngine (%)",
std::optional<Extras> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " ComputeEngine (%) ",
hostname, process_id, did, deviceIdx, hEngine, subDevice);
}

static perfetto_uuid_t get_fpThroughput_track_uuuid(
timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did,
uint32_t deviceIdx, uint64_t hFabricPort, uint32_t subDevice, std::optional<Details> options) {
uint32_t deviceIdx, uint64_t hFabricPort, uint32_t subDevice, std::optional<Extras> fabricExtras) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "FabricT", hostname,
process_id, did, deviceIdx, hFabricPort, subDevice, options,
process_id, did, deviceIdx, hFabricPort, subDevice, fabricExtras,
&dispatch->hp_dfsdev2fptracks);
}

static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx, uint64_t hPower,
uint32_t subDevice, std::optional<Details> options) {
uint32_t subDevice, std::optional<Extras> options) {
// Extra space to maintain track sequence in the timeline
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Power", hostname,
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Power ", hostname,
process_id, did, deviceIdx, hPower, subDevice);
}

static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx,
uint64_t hFrequency, uint32_t subDevice,
std::optional<Details> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Ferquency", hostname,
std::optional<Extras> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Frequency ", hostname,
process_id, did, deviceIdx, hFrequency, subDevice);
}

static perfetto_uuid_t get_Bandwidth_track_uuuid(timeline_dispatch_t *dispatch,
static perfetto_uuid_t get_bandwidth_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx,
uint64_t hMemModule, uint32_t subDevice,
std::optional<Details> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Memory", hostname,
process_id, did, deviceIdx, hMemModule, subDevice, options,
&dispatch->hp_dfsdev2fptracks);
std::optional<Extras> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "Memory", hostname,
process_id, did, deviceIdx, hMemModule, subDevice);
}

static perfetto_uuid_t get_Occupancy_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx,
uint64_t hMemModule, uint32_t subDevice,
std::optional<Details> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "Allocated Memory (%)",
hostname, process_id, did, deviceIdx, hMemModule, subDevice);
static perfetto_uuid_t get_allocation_track_uuuid(timeline_dispatch_t *dispatch,
const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx,
uint64_t hMemModule, uint32_t subDevice,
std::optional<Extras> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2cpyalloctracks, "Allocated Memory (%)",
hostname, process_id, did, deviceIdx, hMemModule, subDevice);
}

static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::string &hostname,
uint64_t process_id, uint64_t thread_id, uint64_t did,
uint32_t deviceIdx, uint64_t tHandle, uint32_t subDevice,
uint64_t timestamp, float value, uuid_getter_t uuid_getter,
const std::string &eventName,
std::optional<Details> options = std::nullopt) {
std::optional<Extras> options = std::nullopt) {
perfetto_uuid_t track_uuid;
track_uuid =
uuid_getter(dispatch, hostname, process_id, did, deviceIdx, tHandle, subDevice, options);
Expand All @@ -232,19 +229,14 @@ static void add_event_memModule(timeline_dispatch_t *dispatch, std::string hostn
uint64_t process_id, uint64_t thread_id, uint64_t did,
uint32_t deviceIdx, uintptr_t hMemModule, uint32_t subDevice,
uint64_t timestamp, float pBandwidth, float rdBandwidth,
float wtBandwidth, float occupancy) {
float wtBandwidth, float allocation) {
// Define details for RX throughput.
Details details = {false, 0, 0};
add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule,
subDevice, timestamp, rdBandwidth, get_Bandwidth_track_uuuid,
"Memory Read BW", details);
details.RxTx = true;
add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule,
subDevice, timestamp, wtBandwidth, get_Bandwidth_track_uuuid,
"Memory Write BW", details);
subDevice, timestamp, pBandwidth, get_bandwidth_track_uuuid, "Memory BW");

add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule,
subDevice, timestamp, occupancy, get_Occupancy_track_uuuid,
"Memory Occupancy", details);
subDevice, timestamp, allocation, get_allocation_track_uuuid,
"Memory Allocation");
}

static void add_event_fabricPort(timeline_dispatch_t *dispatch, std::string hostname,
Expand All @@ -253,16 +245,16 @@ static void add_event_fabricPort(timeline_dispatch_t *dispatch, std::string host
uint64_t timestamp, uint32_t fabricId, uint32_t remotePortId,
float rxThroughput, float txThroughput, float rxSpeed,
float txSpeed) {
// Define details for RX throughput.
Details details = {false, fabricId, remotePortId};
// Define.
Extras fabricExtras = {false, fabricId, remotePortId};
add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFabricPort,
subDevice, timestamp, rxThroughput, get_fpThroughput_track_uuuid,
"Fabric ThroughputRX", details);
"Fabric ThroughputRX", fabricExtras);

details.RxTx = true;
fabricExtras = {true, fabricId, remotePortId};
add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hFabricPort,
subDevice, timestamp, txThroughput, get_fpThroughput_track_uuuid,
"Fabric ThroughputTX", details);
"Fabric ThroughputTX", fabricExtras);
}

static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname,
Expand Down Expand Up @@ -557,10 +549,10 @@ static void memModule_usr_callback(void *btx_handle, void *usr_data, const char
int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend,
uint64_t did, uint32_t deviceIdx, uint64_t hMemModule,
uint32_t subDevice, float pBandwidth, float rdBandwidth,
float wtBandwidth, float occupancy) {
float wtBandwidth, float allocation) {
auto *dispatch = static_cast<timeline_dispatch_t *>(usr_data);
add_event_memModule(dispatch, hostname, vpid, vtid, did, deviceIdx, hMemModule, subDevice, ts,
pBandwidth, rdBandwidth, wtBandwidth, occupancy);
pBandwidth, rdBandwidth, wtBandwidth, allocation);
}

void btx_register_usr_callbacks(void *btx_handle) {
Expand Down
9 changes: 6 additions & 3 deletions ze/btx_zeinterval_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,8 @@ static void lttng_ust_ze_sampling_fabricPort_callback(
auto subDevice = it0->second.subdeviceId;
auto fabricId = it0->second.portId.fabricId;
auto remotePortId = pFabricPortState_val->remotePortId.fabricId;
// Current Speed (not used currently in the timeline)
// Current Speed (bytes/sec) place holder (not used currently in the timeline)
// https://spec.oneapi.io/level-zero/1.9.3/sysman/PROG.html#operations-on-fabric-ports
double rxSpeed = static_cast<double>(pFabricPortState_val->rxSpeed.bitRate *
pFabricPortState_val->rxSpeed.width) /
8.0;
Expand Down Expand Up @@ -869,10 +870,12 @@ static void lttng_ust_ze_sampling_memStats_callback(
double allocation = static_cast<double>(pMemState_val->size - pMemState_val->free) * 100.0 /
static_cast<double>(pMemState_val->size);
double time_diff = static_cast<double>(pMemBandwidth_val->timestamp - prev_bandwidth.timestamp);
//percentage bandwidth based on the manual
double pBandwidth =
static_cast<double>((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) +
100.0 * static_cast<double>((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) +
(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter)) *
1e6 / (time_diff * pMemBandwidth_val->maxBandwidth);
// rd and wt bandwidth if further drilling needed (place holder for now)
double rdBandwidth =
static_cast<double>(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 /
(time_diff);
Expand Down Expand Up @@ -931,7 +934,7 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u
uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hEngine,
subDevice, int(activeTime));
}
if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) {
else if (engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) {
btx_push_message_lttng_copyEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hEngine,
subDevice, int(activeTime));
Expand Down

0 comments on commit 50a28e6

Please sign in to comment.