Skip to content

Commit

Permalink
replace powmon with var_monitor throughout
Browse files Browse the repository at this point in the history
  • Loading branch information
slabasan committed Mar 12, 2024
1 parent 82fddb3 commit 02f166a
Show file tree
Hide file tree
Showing 12 changed files with 33 additions and 33 deletions.
6 changes: 3 additions & 3 deletions scripts/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
r"^src/examples/using-with-cmake/c/.*\.c$",
r"^src/examples/using-with-cmake/c\+\+/.*\.c$",
r"^src/examples/using-with-make/c/.*\.c$",
# variorum powmon
r"^src/powmon/.*CMakeLists.txt$",
r"^src/powmon/.*\.[ch]$",
# variorum monitoring utility
r"^src/var_monitor/.*CMakeLists.txt$",
r"^src/var_monitor/.*\.[ch]$",
# variorum tests
r"^src/tests/.*CMakeLists.txt$",
r"^src/tests/.*\.cpp$",
Expand Down
6 changes: 3 additions & 3 deletions src/docs/sphinx/VarMonitor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ The resulting data is written to two files:

.. code:: bash
hostname.power.dat
hostname.power.summary
hostname.var_monitor.dat
hostname.var_monitor.summary
Here, ``hostname`` will change based on the node where the monitoring is
occurring. The ``summary`` file contains global information such as execution
time. The ``dat`` file contains the time sampled data, such as power, thermals,
and performance counters in a column-delimited format. The output differs on
each platform based on available counters.

``Powmon`` also supports profiling across multiple nodes with the help of
``var_monitor`` also supports profiling across multiple nodes with the help of
resource manager commands (such as ``srun`` or ``jsrun``) or MPI commands (such
as ``mpirun``). As shown in the example below, the user can specify the number
of nodes through ``mpirun`` and utilize ``var_monitor`` with their application.
Expand Down
8 changes: 4 additions & 4 deletions src/var_monitor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ VAR_MONITOR
===========
This directory contains three Variorum-based power monitors. The resulting
data is written to two files:
* hostname.power.dat
* hostname.power.summary
* hostname.var_monitor.dat
* hostname.var_monitor.summary

`hostname` will change based on the node where the monitoring is occurring. The
`summary` file contains global information such as execution time. The `dat`
Expand All @@ -26,10 +26,10 @@ seconds:

$ var_monitor -a "sleep 10"

Powmon also allows sampling of utilization. The example below will sample
The var_monitor also allows sampling of utilization. The example below will sample
utilization metrics as well as power while executing a sleep for 10 seconds:

$ powmon -u -a "sleep 10"
$ var_monitor -u -a "sleep 10"

power_wrapper_static
--------------------
Expand Down
2 changes: 1 addition & 1 deletion src/var_monitor/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void parse_json_power_obj(char *s, int num_sockets)
}

// If we're on a CPU-only build, we don't have num_gpus_per_socket.
// Powmon doesn't need to print this, but needs to know this value.
// var-monitor doesn't need to print this, but needs to know this value.
if (json_object_get(node_obj, "num_gpus_per_socket") != NULL)
{
num_gpus_per_socket = json_integer_value(json_object_get(node_obj,
Expand Down
2 changes: 1 addition & 1 deletion src/var_monitor/power_wrapper_dynamic.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ int main(int argc, char **argv)
char hostname[64];
gethostname(hostname, 64);

rc = asprintf(&fname_dat, "%s.power.dat", hostname);
rc = asprintf(&fname_dat, "%s.var_monitor.dat", hostname);
if (rc == -1)
{
fprintf(stderr,
Expand Down
2 changes: 1 addition & 1 deletion src/var_monitor/power_wrapper_static.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ int main(int argc, char **argv)
char hostname[64];
gethostname(hostname, 64);

rc = asprintf(&fname_dat, "%s.power.dat", hostname);
rc = asprintf(&fname_dat, "%s.var_monitor.dat", hostname);
if (rc == -1)
{
fprintf(stderr,
Expand Down
10 changes: 5 additions & 5 deletions src/var_monitor/scripts/var_monitor-ibm-post-process.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ if [ $# -eq 0 ]; then
echo "$0 <path-to-var_monitor-dat>"
exit 1
fi
POWMON_DAT_FILE=$1
VAR_MONITOR_DAT_FILE=$1

NAME=$(ls ${POWMON_DAT_FILE} | cut -d "." -f 1)
END=$(ls ${POWMON_DAT_FILE} | cut -d "." -f 2-3)
NAME=$(ls ${VAR_MONITOR_DAT_FILE} | cut -d "." -f 1)
END=$(ls ${VAR_MONITOR_DAT_FILE} | cut -d "." -f 2-3)

NEW_F1=${NAME}-socket0.${END}
NEW_F2=${NAME}-socket1.${END}

grep _IBMPOWER0 ${POWMON_DAT_FILE} > ${NEW_F1}
grep _IBMPOWER1 ${POWMON_DAT_FILE} > ${NEW_F2}
grep _IBMPOWER0 ${VAR_MONITOR_DAT_FILE} > ${NEW_F1}
grep _IBMPOWER1 ${VAR_MONITOR_DAT_FILE} > ${NEW_F2}

Rscript --vanilla var_monitor-ibm-plot.R ${NEW_F1} ${NEW_F2}

Expand Down
6 changes: 3 additions & 3 deletions src/var_monitor/scripts/var_monitor-plot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# This script plots power data colloect by 'var_monitor'. It reads the CSV files of a single
# This script plots power data collected by 'var_monitor'. It reads the CSV files of a single
# run, where each file holds power data of a node run the tasks. It plots power data per
# node, where there is a figure for each node. It also plots the descriptive stats of
# the power data of all nodes.
Expand All @@ -22,7 +22,7 @@
# --type or -t: optional to select what to plot.
# per-node: plot power data per node only. If the app were run on 4 nodes, 4 plots
# are created.
# aggregate: plot discriptive stats of the power data of all nodes. There should be
# aggregate: plot descriptive stats of the power data of all nodes. There should be
# four plots, mean, max, min, median.
# --description or -d: required to add a title of the figure.
#
Expand Down Expand Up @@ -116,7 +116,7 @@ def plotAggregatedData(aggData, outputPath, dStat):
# ---------------------------------------------------------------------
def plotPowData(df, host, outputPath, desc):
plt.figure(figsize=(11, 7))
# compute diff of timstamps
# compute diff of timestamps
# make first row 1 instead of nan
# them compute the cumulative sum of timestamps
# TODO: needs to redo in a simplest way
Expand Down
4 changes: 2 additions & 2 deletions src/var_monitor/var_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ int main(int argc, char **argv)
if (logpath)
{
/* Output trace data into the specified location. */
rc = asprintf(&fname_dat, "%s/%s.power.dat", logpath, hostname);
rc = asprintf(&fname_dat, "%s/%s.var_monitor.dat", logpath, hostname);
if (rc == -1)
{
fprintf(stderr,
Expand All @@ -229,7 +229,7 @@ int main(int argc, char **argv)
else
{
/* Output trace data into the default location. */
rc = asprintf(&fname_dat, "%s.power.dat", hostname);
rc = asprintf(&fname_dat, "%s.var_monitor.dat", hostname);
if (rc == -1)
{
fprintf(stderr,
Expand Down
2 changes: 1 addition & 1 deletion src/variorum/ARM/juno_r2_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ int arm_cpu_juno_r2_json_get_power_data(json_t *get_power_obj)
json_object_set_new(get_power_obj, "power_node_watts",
json_real((double)(sys_power_val) / 1000000.0f));
// While number of GPUs is 1, it is only resident on the first socket.
// Powmon won't print GPU power as a result,
// var_monitor won't print GPU power as a result,
// but GPU power is available in the JSON object.
json_object_set_new(get_power_obj, "num_gpus_per_socket",
json_integer(-1));
Expand Down
8 changes: 4 additions & 4 deletions src/variorum/Intel/counters_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -1339,10 +1339,10 @@ void get_all_power_data_fixed(FILE *writedest, off_t msr_pkg_power_limit,
snprintf(thread_strs[i][4], max_str_len, "TSC%d", i);
}

cfprintf(writedest, "%-s %s ", "_POWMON", "time");
cfprintf(writedest, "%-s %s ", "_VAR_MONITOR", "time");
#else

fprintf(writedest, "_POWMON time");
fprintf(writedest, "_VAR_MONITOR time");
#endif

for (i = 0; i < nsockets; i++)
Expand Down Expand Up @@ -1411,9 +1411,9 @@ void get_all_power_data_fixed(FILE *writedest, off_t msr_pkg_power_limit,
rlim_idx = 0;

#ifdef LIBJUSTIFY_FOUND
cfprintf(writedest, "%-s %ld ", "_POWMON", now_ms());
cfprintf(writedest, "%-s %ld ", "_VAR_MONITOR", now_ms());
#else
fprintf(writedest, "%s %ld", "_POWMON", now_ms());
fprintf(writedest, "%s %ld", "_VAR_MONITOR", now_ms());
#endif

for (i = 0; i < nsockets; i++)
Expand Down
10 changes: 5 additions & 5 deletions src/variorum/Intel/intel_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -1500,12 +1500,12 @@ void get_all_power_data(FILE *writedest, off_t msr_pkg_power_limit,

rapl_storage(&rapl);
#ifdef LIBJUSTIFY_FOUND
cfprintf(writedest, "%s %s ", "_POWMON", "time");
cfprintf(writedest, "%s %s ", "_VAR_MONITOR", "time");
int pkglabels = 5;
int max_str_len = 128;
char pkg_strs[nsockets][pkglabels][max_str_len];
#else
fprintf(writedest, "_POWMON time");
fprintf(writedest, "_VAR_MONITOR time");
#endif

for (i = 0; i < nsockets; i++)
Expand Down Expand Up @@ -1548,11 +1548,11 @@ void get_all_power_data(FILE *writedest, off_t msr_pkg_power_limit,
#ifdef LIBJUSTIFY_FOUND
//cfprintf(writedest, "\n");
cfprintf(writedest, "\n");
cfprintf(writedest, "%s %lf ", "_POWMON", now_ms());
cfprintf(writedest, "%s %lf ", "_VAR_MONITOR", now_ms());
//cflush();
#else
fprintf(writedest, "\n");
fprintf(writedest, "%s %ld", "_POWMON", now_ms());
fprintf(writedest, "%s %ld", "_VAR_MONITOR", now_ms());

#endif
}
Expand All @@ -1574,7 +1574,7 @@ void get_all_power_data(FILE *writedest, off_t msr_pkg_power_limit,
}
#ifdef LIBJUSTIFY_FOUND
cfprintf(writedest, "\n");
cfprintf(writedest, "%s %lf ", "_POWMON", now_ms());
cfprintf(writedest, "%s %lf ", "_VAR_MONITOR", now_ms());
//cflush();
#else
fprintf(writedest, "\n");
Expand Down

0 comments on commit 02f166a

Please sign in to comment.