Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Intel GPU Energy APIs #563

Draft
wants to merge 3 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/variorum/Intel_GPU/GPU.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,43 @@ int intel_gpu_get_power_limit(int long_ver)
}
return 0;
}

int intel_gpu_get_energy(int long_ver)
{
char *val = getenv("VARIORUM_LOG");
if (val != NULL && atoi(val) == 1)
{
printf("Running %s\n", __FUNCTION__);
}

unsigned iter = 0;
unsigned nsockets = 0;
#ifdef VARIORUM_WITH_INTEL_GPU
variorum_get_topology(&nsockets, NULL, NULL, P_INTEL_GPU_IDX);
#endif
for (iter = 0; iter < nsockets; iter++)
{
get_energy_data(iter, long_ver, stdout);
}
return 0;
}

int intel_gpu_get_energy_json(json_t *get_energy_obj)
{
char *val = getenv("VARIORUM_LOG");
if (val != NULL && atoi(val) == 1)
{
printf("Running %s\n", __FUNCTION__);
}

unsigned iter = 0;
unsigned nsockets;
variorum_get_topology(&nsockets, NULL, NULL, P_INTEL_GPU_IDX);

for (iter = 0; iter < nsockets; iter++)
{
get_energy_json(iter, get_energy_obj);
}

return 0;
}
11 changes: 11 additions & 0 deletions src/variorum/Intel_GPU/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#ifndef INTEL_GPU_H_INCLUDE
#define INTEL_GPU_H_INCLUDE

#include <jansson.h>

extern int intel_gpu_get_power(
int long_ver
);
Expand All @@ -26,4 +28,13 @@ extern int intel_gpu_get_power_limit(
int long_ver
);


extern int intel_gpu_get_energy(
int long_ver
);

extern int intel_gpu_get_energy_json(
json_t *get_energy_obj_str
);

#endif
2 changes: 2 additions & 0 deletions src/variorum/Intel_GPU/config_intel_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ int set_intel_gpu_func_ptrs(int idx)
g_platform[idx].variorum_cap_each_gpu_power_limit =
intel_gpu_cap_each_gpu_power_limit;
g_platform[idx].variorum_print_power_limit = intel_gpu_get_power_limit;
g_platform[idx].variorum_print_energy = intel_gpu_get_energy;
g_platform[idx].variorum_get_energy_json = intel_gpu_get_energy_json;
}
else
{
Expand Down
136 changes: 136 additions & 0 deletions src/variorum/Intel_GPU/intel_gpu_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
static unsigned m_total_unit_devices;
static unsigned m_gpus_per_socket;
static char m_hostname[1024];
static double *m_initial_energy_for_gpu;
static int *m_init_energy;

void releaseInitialEnergyForGPU()
{
free(m_initial_energy_for_gpu);
free(m_init_energy);
}

void initAPMIDG(void)
{
Expand All @@ -35,6 +43,16 @@ void initAPMIDG(void)
#endif
m_gpus_per_socket = m_total_unit_devices / m_num_package;

static int init = 0;
if (!init)
{
m_initial_energy_for_gpu = (double *) malloc(sizeof(double) *
m_total_unit_devices);
m_init_energy = (int *) calloc(m_num_package, sizeof(int));
atexit(releaseInitialEnergyForGPU);
init = 1;
}

/* Save hostname */
gethostname(m_hostname, sizeof(m_hostname));
}
Expand Down Expand Up @@ -288,3 +306,121 @@ void get_power_limit_data(int chipid, int verbose, FILE *output)
cflush();
#endif
}

void get_energy_data(int chipid, int verbose, FILE *output)
{
uint64_t energy_uj;
double value = 0.0;
int d;
static int init_output = 0;

//Iterate over all GPU device handles for this socket and print power
for (d = chipid * (int)m_gpus_per_socket;
d < (chipid + 1) * (int)m_gpus_per_socket; ++d)
{
int pi = 0; // only report the global power domain
apmidg_readenergy(d, pi, &energy_uj, NULL);
if (!m_init_energy[chipid])
{
m_initial_energy_for_gpu[d] = (double)energy_uj * 1.e-6;
value = 0;
}
else
{
value = (double)energy_uj * 1.e-6;
value -= m_initial_energy_for_gpu[d];
}

if (verbose)
{
fprintf(output, "%s: %s, %s: %d, %s: %d, %s: %lf J\n",
"_INTEL_GPU_ENERGY_USAGE Host", m_hostname,
"Socket", chipid,
"DeviceID", d, "Energy", value);
}
else
{
if (!init_output)
{
#ifdef LIBJUSTIFY_FOUND
cfprintf(output, "%s %s %s %s %s\n",
"_INTEL_GPU_ENERGY_USAGE", "Host",
"Socket", "DeviceID", "Energy");
#else
fprintf(output, "%s %s %s %s %s\n",
"_INTEL_GPU_ENERGY_USAGE", "Host",
"Socket", "DeviceID", "Energy");
#endif
init_output = 1;
}
#ifdef LIBJUSTIFY_FOUND
cfprintf(output, "%s %s %d %d %lf\n",
"_INTEL_GPU_ENERGY_USAGE", m_hostname, chipid, d, value);
#else
fprintf(output, "%s %s %d %d %lf\n",
"_INTEL_GPU_ENERGY_USAGE", m_hostname, chipid, d, value);

#endif
}
}
m_init_energy[chipid] = 1;
}

void get_energy_json(int chipid, json_t *get_energy_obj)
{
uint64_t energy_uj;
double value = 0.0;
double total_energy_gpu = 0.0;
int d;
static size_t devIDlen = 24; // Long enough to avoid format truncation.
char devID[devIDlen];
char socket_id[12];
snprintf(socket_id, 12, "socket_%d", chipid);

json_object_set_new(get_energy_obj, "num_gpus_per_socket",
json_integer(m_gpus_per_socket));

//try to find socket object in node object, set new object if not found
json_t *socket_obj = json_object_get(get_energy_obj, socket_id);
if (socket_obj == NULL)
{
socket_obj = json_object();
json_object_set_new(get_energy_obj, socket_id, socket_obj);
}

//create new json object for GPU
json_t *gpu_obj = json_object();
json_object_set_new(socket_obj, "energy_gpu_joules", gpu_obj);

for (d = chipid * (int)m_gpus_per_socket;
d < (chipid + 1) * (int)m_gpus_per_socket; ++d)
{
int pi = 0; // only report the global power domain
apmidg_readenergy(d, pi, &energy_uj, NULL);
if (!m_init_energy[chipid])
{
m_initial_energy_for_gpu[d] = (double)energy_uj * 1.e-6;
value = 0;
}
else
{
value = (double)energy_uj * 1.e-6;
value -= m_initial_energy_for_gpu[d];
}
snprintf(devID, devIDlen, "GPU_%d", d);
json_object_set_new(gpu_obj, devID, json_real(value));
total_energy_gpu += value;
}

m_init_energy[chipid] = 1;

// If we have an existing CPU object with power_node_watts, update its value.
if (json_object_get(get_energy_obj, "energy_node_joules") != NULL)
{
double energy_node;
energy_node = json_real_value(json_object_get(get_energy_obj,
"energy_node_joules"));
json_object_set(get_energy_obj, "energy_node_joules",
json_real(energy_node + total_energy_gpu));
}
}
13 changes: 13 additions & 0 deletions src/variorum/Intel_GPU/intel_gpu_power_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <stdint.h>
#include <stdio.h>

#include <jansson.h>

#include <libapmidg.h>

void initAPMIDG(
Expand Down Expand Up @@ -48,4 +50,15 @@ void get_power_limit_data(
FILE *output
);

void get_energy_data(
int chipid,
int verbose,
FILE *output
);

void get_energy_json(
int chipid,
json_t *output
);

#endif
Loading
Loading