From 8f537984a6cee1d929fc29f21893093248979aec Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Wed, 6 Nov 2024 16:53:10 +0000 Subject: [PATCH 01/20] Add scripts from paper repo --- plots-cgo2025-ae/__init__.py | 0 .../config/cycles/all_barchart.json | 36 +++ .../config/cycles/all_barchart.mplstyle | 74 +++++ .../config/cycles/xdsl_barchart.json | 36 +++ .../config/cycles/xdsl_barchart.mplstyle | 74 +++++ plots-cgo2025-ae/config/gridplot.mplstyle | 70 +++++ plots-cgo2025-ae/cycles.py | 47 ++++ plots-cgo2025-ae/data.py | 258 ++++++++++++++++++ plots-cgo2025-ae/fp_throughput.py | 68 +++++ plots-cgo2025-ae/fpu.py | 74 +++++ plots-cgo2025-ae/heatmap.py | 141 ++++++++++ plots-cgo2025-ae/low_level_representation.py | 83 ++++++ plots-cgo2025-ae/max_util.py | 23 ++ plots-cgo2025-ae/opt_pipeline.py | 115 ++++++++ plots-cgo2025-ae/pass_improvements.py | 70 +++++ plots-cgo2025-ae/pass_improvements_stacked.py | 133 +++++++++ plots-cgo2025-ae/plot.py | 50 ++++ plots-cgo2025-ae/plot_utils.py | 241 ++++++++++++++++ plots-cgo2025-ae/regalloc.py | 63 +++++ plots-cgo2025-ae/throughput.py | 57 ++++ 20 files changed, 1713 insertions(+) create mode 100644 plots-cgo2025-ae/__init__.py create mode 100644 plots-cgo2025-ae/config/cycles/all_barchart.json create mode 100644 plots-cgo2025-ae/config/cycles/all_barchart.mplstyle create mode 100644 plots-cgo2025-ae/config/cycles/xdsl_barchart.json create mode 100644 plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle create mode 100644 plots-cgo2025-ae/config/gridplot.mplstyle create mode 100644 plots-cgo2025-ae/cycles.py create mode 100644 plots-cgo2025-ae/data.py create mode 100644 plots-cgo2025-ae/fp_throughput.py create mode 100644 plots-cgo2025-ae/fpu.py create mode 100644 plots-cgo2025-ae/heatmap.py create mode 100644 plots-cgo2025-ae/low_level_representation.py create mode 100644 plots-cgo2025-ae/max_util.py create mode 100644 plots-cgo2025-ae/opt_pipeline.py create mode 100644 plots-cgo2025-ae/pass_improvements.py create mode 100644 plots-cgo2025-ae/pass_improvements_stacked.py create mode 100644 plots-cgo2025-ae/plot.py create mode 100644 plots-cgo2025-ae/plot_utils.py create mode 100644 plots-cgo2025-ae/regalloc.py create mode 100644 plots-cgo2025-ae/throughput.py diff --git a/plots-cgo2025-ae/__init__.py b/plots-cgo2025-ae/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plots-cgo2025-ae/config/cycles/all_barchart.json b/plots-cgo2025-ae/config/cycles/all_barchart.json new file mode 100644 index 00000000..44762471 --- /dev/null +++ b/plots-cgo2025-ae/config/cycles/all_barchart.json @@ -0,0 +1,36 @@ +{ + "axis": { + "xlabel": { + "label": "" + }, + "ylabel": { + "label": "Cycles", + "rotation": "horizontal", + "position": [1.0, 1.05], + "horizontalalignment": "left", + "verticalalignment": "bottom" + }, + "ylim": [ + 0, + 5000 + ], + "xticks": { + "labelrotation": 45 + } + }, + "legend": { + "ncol": 50, + "bbox_to_anchor": [0, 1, 1, 0], + "loc": "lower right" + }, + "spines" : { + "left": { + "color": "black", + "linewidth": 0.4 + }, + "bottom": { + "color": "black", + "linewidth": 0.4 + } + } +} diff --git a/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle b/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle new file mode 100644 index 00000000..980b425c --- /dev/null +++ b/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle @@ -0,0 +1,74 @@ +# vim: ft=config + +## Use TrueType fonts instead of Type 3 fonts +# +# Type 3 fonts embed bitmaps and are not allowed in camera-ready submissions +# for many conferences. TrueType fonts look better and are accepted. +# This follows: https://www.conference-publishing.com/Help.php +pdf.fonttype: 42 +ps.fonttype: 42 + +font.size: 12 +#font.family: serif +font.family: sans-serif +#font.sans-serif: ["Helvetica"] + +text.usetex: True + +## Enable tight_layout by default +# +# This ensures the plot has always sufficient space for legends, ... +# Without this sometimes parts of the figure would be cut off. +figure.autolayout: True + +#figure.figsize: 3, 2.5 +figure.figsize: 4, 2.5 + +legend.frameon: False +legend.fontsize: 8 +legend.loc: upper right +legend.fancybox: False +legend.framealpha: 1.0 +legend.shadow: False +legend.borderaxespad: 0 +legend.edgecolor: gray +legend.handleheight: 1 +legend.labelspacing: 0.2 +legend.columnspacing: 0.4 +legend.handletextpad: 0.2 + +patch.edgecolor: black +patch.force_edgecolor: False +patch.linewidth: 0.4 + +xtick.top: False +xtick.bottom: True +xtick.major.size: 3 +xtick.major.width: 0.4 + +ytick.left: True +ytick.right: False +ytick.direction: out +ytick.major.size: 3 +ytick.major.width: 0.4 + +axes.grid.axis: y +axes.grid: True + +# Hide the right and top spines +# +# This reduces the number of lines in the plot. Lines typically catch +# a readers attention and distract the reader from the actual content. +# By removing unnecessary spines, we help the reader to focus on +# the figures in the graph. +axes.spines.right: False +axes.spines.top: False + +grid.color: black +grid.alpha: 0.2 +grid.linewidth: 0.4 +grid.linestyle: dotted + + +savefig.bbox: tight +savefig.pad_inches: 0.05 diff --git a/plots-cgo2025-ae/config/cycles/xdsl_barchart.json b/plots-cgo2025-ae/config/cycles/xdsl_barchart.json new file mode 100644 index 00000000..918e08fe --- /dev/null +++ b/plots-cgo2025-ae/config/cycles/xdsl_barchart.json @@ -0,0 +1,36 @@ +{ + "axis": { + "xlabel": { + "label": "" + }, + "ylabel": { + "label": "Cycles", + "rotation": "horizontal", + "position": [1.0, 1.05], + "horizontalalignment": "left", + "verticalalignment": "bottom" + }, + "ylim": [ + 0, + 5000 + ], + "xticks": { + "labelrotation": 0 + } + }, + "legend": { + "ncol": 50, + "bbox_to_anchor": [0, 1, 1, 0], + "loc": "lower right" + }, + "spines" : { + "left": { + "color": "black", + "linewidth": 0.4 + }, + "bottom": { + "color": "black", + "linewidth": 0.4 + } + } +} diff --git a/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle b/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle new file mode 100644 index 00000000..163c89d5 --- /dev/null +++ b/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle @@ -0,0 +1,74 @@ +# vim: ft=config + +## Use TrueType fonts instead of Type 3 fonts +# +# Type 3 fonts embed bitmaps and are not allowed in camera-ready submissions +# for many conferences. TrueType fonts look better and are accepted. +# This follows: https://www.conference-publishing.com/Help.php +pdf.fonttype: 42 +ps.fonttype: 42 + +font.size: 12 +#font.family: serif +font.family: sans-serif +#font.sans-serif: ["Helvetica"] + +text.usetex: True + +## Enable tight_layout by default +# +# This ensures the plot has always sufficient space for legends, ... +# Without this sometimes parts of the figure would be cut off. +figure.autolayout: True + +#figure.figsize: 3, 2.5 +figure.figsize: 3, 2 + +legend.frameon: False +legend.fontsize: 8 +legend.loc: upper right +legend.fancybox: False +legend.framealpha: 1.0 +legend.shadow: False +legend.borderaxespad: 0 +legend.edgecolor: gray +legend.handleheight: 1 +legend.labelspacing: 0.2 +legend.columnspacing: 0.4 +legend.handletextpad: 0.2 + +patch.edgecolor: black +patch.force_edgecolor: False +patch.linewidth: 0.4 + +xtick.top: False +xtick.bottom: True +xtick.major.size: 3 +xtick.major.width: 0.4 + +ytick.left: True +ytick.right: False +ytick.direction: out +ytick.major.size: 3 +ytick.major.width: 0.4 + +axes.grid.axis: y +axes.grid: True + +# Hide the right and top spines +# +# This reduces the number of lines in the plot. Lines typically catch +# a readers attention and distract the reader from the actual content. +# By removing unnecessary spines, we help the reader to focus on +# the figures in the graph. +axes.spines.right: False +axes.spines.top: False + +grid.color: black +grid.alpha: 0.2 +grid.linewidth: 0.4 +grid.linestyle: dotted + + +savefig.bbox: tight +savefig.pad_inches: 0.05 diff --git a/plots-cgo2025-ae/config/gridplot.mplstyle b/plots-cgo2025-ae/config/gridplot.mplstyle new file mode 100644 index 00000000..1ace4cb8 --- /dev/null +++ b/plots-cgo2025-ae/config/gridplot.mplstyle @@ -0,0 +1,70 @@ +# vim: ft=config + +## Use TrueType fonts instead of Type 3 fonts +# +# Type 3 fonts embed bitmaps and are not allowed in camera-ready submissions +# for many conferences. TrueType fonts look better and are accepted. +# This follows: https://www.conference-publishing.com/Help.php +pdf.fonttype: 42 +ps.fonttype: 42 + +font.size: 12 +#font.family: serif +font.family: sans-serif +#font.sans-serif: ["Helvetica"] + +text.usetex: True + +## Enable tight_layout by default +# +# This ensures the plot has always sufficient space for legends, ... +# Without this sometimes parts of the figure would be cut off. +figure.autolayout: True + +#figure.figsize: 3, 2.5 +#figure.figsize: 4, 2.5 + +legend.frameon: False +legend.fontsize: 14 +legend.loc: upper center +legend.fancybox: False +legend.framealpha: 1.0 +legend.shadow: False +legend.borderaxespad: 0 +legend.edgecolor: gray +legend.handleheight: 1 +legend.labelspacing: 0.1 +legend.columnspacing: 0.8 +legend.handletextpad: 0.1 + +xtick.top: False +xtick.bottom: True +xtick.major.size: 3 +xtick.major.width: 0.4 + +ytick.left: True +ytick.right: False +ytick.direction: out +ytick.major.size: 3 +ytick.major.width: 0.4 + +axes.grid.axis: y +axes.grid: True + +# Hide the right and top spines +# +# This reduces the number of lines in the plot. Lines typically catch +# a readers attention and distract the reader from the actual content. +# By removing unnecessary spines, we help the reader to focus on +# the figures in the graph. +axes.spines.right: False +axes.spines.top: False + +grid.color: black +grid.alpha: 0.2 +grid.linewidth: 0.4 +grid.linestyle: dashed + + +savefig.bbox: tight +savefig.pad_inches: 0.02 diff --git a/plots-cgo2025-ae/cycles.py b/plots-cgo2025-ae/cycles.py new file mode 100644 index 00000000..36899dcd --- /dev/null +++ b/plots-cgo2025-ae/cycles.py @@ -0,0 +1,47 @@ +from typing import Iterable, Sequence, cast +import pandas as pd +import numpy as np +import numpy.typing as npt +from plot_utils import IMPL_COLORS, IMPL_MARKERS, plot_combined, GridPlotRow +from matplotlib.axes import Axes + +from math import log10, ceil, floor + + +class CyclesGridPlotRow(GridPlotRow): + ylabel = "Cycles" + + @classmethod + def yrange(cls, dfs: Sequence[pd.DataFrame]) -> npt.NDArray[np.float64]: + max_value = cast(float, max(_df.max().iloc[0] for _df in dfs)) + magnitude: float = 10 ** floor(log10(max_value)) + greater_round_number = ceil(max_value / magnitude) * magnitude + yrange = np.arange(0, greater_round_number + 1, greater_round_number // 10) + return yrange + + @classmethod + def plot_grid_cell( + cls, + ax: Axes, + df: pd.DataFrame, + *, + hide_xlabel: bool, + ) -> None: + for col in df: + ax.scatter( + x=df.index, + y=df[col], + color=IMPL_COLORS[col], + marker=IMPL_MARKERS[col], + ) + ax.set_xticks(df.index) + if not hide_xlabel: + ax.set_xlabel(df.index.name, fontsize=12) + + +def plot_cycles(cycles_dfs: tuple[pd.DataFrame, ...]): + return plot_combined( + CyclesGridPlotRow.get_rows(cycles_dfs, 4), + legend_cols=3, + rcparams_cfg_file="config/gridplot.mplstyle", + ) diff --git a/plots-cgo2025-ae/data.py b/plots-cgo2025-ae/data.py new file mode 100644 index 00000000..58104850 --- /dev/null +++ b/plots-cgo2025-ae/data.py @@ -0,0 +1,258 @@ +from collections.abc import Iterable +import pandas as pd +import numpy as np + +from enum import StrEnum + + +class Impl(StrEnum): + OURS = "Ours" + CLANG = "Clang" + MLIR = "MLIR" + + +class Operator(StrEnum): + CONV = "Conv 3x3" + FILL = "Fill" + MATMUL = "MatMul" + MATMUL_TRANSB = "MatMulT" + MAX_POOL = "Max Pool 3x3" + RELU = "ReLU" + SUM = "Sum" + SUM_POOL = "Sum Pool 3x3" + + +OPERATOR_BY_TEST = { + "conv2d_d1_s1_3x3": Operator.CONV, + "fill": Operator.FILL, + "matmul": Operator.MATMUL, + "matmul_transb": Operator.MATMUL_TRANSB, + "pooling_nchw_max_d1_s2_3x3": Operator.MAX_POOL, + "relu": Operator.RELU, + "dsum": Operator.SUM, + "sum": Operator.SUM, + "pooling_nchw_sum_d1_s2_3x3": Operator.SUM_POOL, +} + +PARAMS_BY_OPERATOR = { + Operator.CONV: ("M", "N"), + Operator.FILL: ("M", "N"), + Operator.MATMUL: ("M", "K", "N"), + Operator.MATMUL_TRANSB: ("M", "K", "N"), + Operator.MAX_POOL: ("M", "N"), + Operator.RELU: ("M", "N"), + Operator.SUM: ("M", "N"), + Operator.SUM_POOL: ("M", "N"), +} + +FLOPS_BY_OPERATOR = { + Operator.CONV: lambda m, n: 2 * 9 * n * m, + Operator.FILL: lambda m, n: n * m, + Operator.MATMUL: lambda m, k, n: 2 * n * m * k, + Operator.MATMUL_TRANSB: lambda m, k, n: 2 * n * m * k, + Operator.MAX_POOL: lambda m, n: 9 * n * m, + Operator.RELU: lambda m, n: n * m, + Operator.SUM: lambda m, n: n * m, + Operator.SUM_POOL: lambda m, n: 9 * n * m, +} +""" +FLOPS adjusted for whether the operation can benefit from the fmadd instruction. +""" + +OPERAND_SHAPES_BY_OPERATOR = { + Operator.CONV: lambda m, n: ((m, n),), + Operator.FILL: lambda m, n: ((m, n),), + Operator.MATMUL: lambda m, k, n: ((m, k), (k, n)), + Operator.MATMUL_TRANSB: lambda m, k, n: ((m, k), (n, k)), + Operator.MAX_POOL: lambda m, n: ((m, n),), + Operator.RELU: lambda m, n: ((m, n),), + Operator.SUM: lambda m, n: ((m, n), (m, n)), + Operator.SUM_POOL: lambda m, n: ((m, n),), +} + +FMA_OPERATORS = {Operator.CONV, Operator.MATMUL, Operator.MATMUL_TRANSB} + + +def _get_kernels(filename: str) -> pd.DataFrame: + df = pd.read_csv(filename) + df.replace( + { + "linalg_xdsl": Impl.OURS, + "snitch_stream": Impl.OURS, + "baseline": Impl.CLANG, + "linalg": Impl.MLIR, + **OPERATOR_BY_TEST, + }, + inplace=True, + ) + df = df[df.impl.isin(set(Impl))] + df.set_index(["test", "params"], inplace=True) + # Get the result of adding_overhead for each operator and concatenate the dataframes + df_with_overhead = pd.concat([ + adding_overhead(df[df.index.get_level_values(0) == operator], operator) + for operator in Operator + if operator in df.index.get_level_values(0) + ]) + return df_with_overhead + + +def get_kernels(cleaned: bool = True) -> pd.DataFrame: + df = _get_kernels("results/kernels.csv") + # Drop unknown operators + df = df[df.index.get_level_values(0).isin(tuple(Operator))] + if cleaned: + # exclude K=400 matmul entries + df = df[df.index.get_level_values(1) != "1x400x25xf64"] + return df + + +def get_low_level_representation() -> pd.DataFrame: + return _get_kernels("results/kernels.low_level_representation.csv") + + +def get_pivoted_all(kernels_df: pd.DataFrame) -> pd.DataFrame: + return kernels_df.pivot(columns="impl") + + +def get_pivoted_fpu(pivoted_all_df: pd.DataFrame) -> pd.DataFrame: + return pivoted_all_df["fpss_fpu_occupancy"] + + +def get_pivoted_cycles(pivoted_all_df: pd.DataFrame) -> pd.DataFrame: + return pivoted_all_df[["cycles", "Min Cycles", "Overhead", "FLOPs", "Throughput", "Max Throughput"]] + + +def get_flops(operator_df: pd.DataFrame, operator: Operator) -> pd.Series: + operator_series: list[pd.Series] = [ + operator_df[param] for param in PARAMS_BY_OPERATOR[operator] + ] + return FLOPS_BY_OPERATOR[operator](*operator_series) + + +def get_overhead( + kernels_operator_df: pd.DataFrame, + operator: Operator, +) -> pd.DataFrame: + cols = PARAMS_BY_OPERATOR[operator] + col_vals: pd.DataFrame = kernels_operator_df.index.get_level_values(1).str.extract( + "x".join((r"(\d+)" for _ in range(len(cols)))) + r"xf(\d+)" + ) + col_vals.columns = cols + ("bitwidth",) + col_vals.index = kernels_operator_df.index + col_vals = col_vals.apply(pd.to_numeric) + flops = get_flops(col_vals, operator) + assert (64 % col_vals.bitwidth == 0).all() + throughput = flops / kernels_operator_df["cycles"] + max_throughput = (2 if operator in FMA_OPERATORS else 1) * 64 // col_vals["bitwidth"] + rel_throughput = throughput / max_throughput + min_cycles = np.ceil((flops / max_throughput)) + overhead = kernels_operator_df["cycles"] - min_cycles + res = pd.DataFrame({ + "Min Cycles": min_cycles, + "Overhead": overhead, + "FLOPs": flops, + "Throughput": throughput, + "Max Throughput": max_throughput, + "Rel Throughput": rel_throughput, + "bitwidth": col_vals["bitwidth"] + }) + return res + +def adding_overhead( + operator_df: pd.DataFrame, + operator: Operator, +) -> pd.DataFrame: + return pd.concat( + ( + operator_df, + get_overhead( + operator_df, + operator + ), + ), + axis=1, + ) + +def get_operator_df( + pivoted_df: pd.DataFrame, operator: Operator, *, bitwidth: int +) -> pd.DataFrame: + cols = PARAMS_BY_OPERATOR[operator] + operator_df = pivoted_df.loc[operator.value] + col_vals = operator_df.index.str.extract( + "x".join((r"(\d+)" for _ in range(len(cols)))) + r"xf(\d+)" + ) + col_vals.columns = cols + ("bitwidth",) + col_vals.index = operator_df.index + df = pd.concat( + ( + operator_df, + col_vals.apply(pd.to_numeric), + ), + axis=1, + ) + df = df[df.bitwidth == bitwidth] + df.index.name = operator.value + return df + + +def get_params_dfs(operator_df: pd.DataFrame) -> Iterable[pd.DataFrame]: + name = operator_df.index.name + cols = PARAMS_BY_OPERATOR[name] + maxs = tuple(operator_df[col].max() for col in cols) + operand_shapes_map = OPERAND_SHAPES_BY_OPERATOR[name] + for i, col in enumerate(cols): + my_df = operator_df + name_components: list[str] = [] + for j, other_col in enumerate(cols): + if i != j: + name_components.append(str(int(maxs[j]))) + my_df = my_df[my_df[other_col] == maxs[j]] + else: + name_components.append(col) + other_cols = list(cols) + ["bitwidth"] + del other_cols[i] + shape_string = " ".join("x".join(t) for t in operand_shapes_map(*name_components)) + new_name = f"{name} {shape_string}" + my_df = my_df.rename(columns={col: new_name}).set_index(new_name).sort_index() + my_df.drop(other_cols, axis=1, inplace=True) + yield my_df + + +def get_regalloc() -> pd.DataFrame: + regalloc_df = pd.read_csv("results/regalloc.csv") + regalloc_df = regalloc_df[regalloc_df["impl"].isin(OPERATOR_BY_TEST)] + regalloc_df.replace(OPERATOR_BY_TEST, inplace=True) + regalloc_df = regalloc_df[~regalloc_df["params"].str.contains("f16")] + regalloc_df.reset_index(drop=True, inplace=True) + param_components = tuple(param.split("x") for param in regalloc_df["params"]) + bitwidths = tuple({"Bits": param[-1][1:]} for param in param_components) + params = tuple( + {p: v for p, v in zip("MNK", param[:-1])} for param in param_components + ) + params_df = pd.DataFrame(params).fillna("{--}") + regalloc_df = pd.concat((regalloc_df, pd.DataFrame(bitwidths), params_df), axis=1) + del regalloc_df["params"] + + # Reorder columns to move Cycles and Occupancy to the end + cols = regalloc_df.columns.tolist() + cols = [col for col in cols if col not in ['allocated_float', 'allocated_int']] + [ + 'allocated_float', + 'allocated_int', + ] + regalloc_df = regalloc_df[cols] + + return regalloc_df + + +def get_opt_pipeline() -> pd.DataFrame: + opt_pipeline_df = pd.read_csv("results/pipeline.csv") + opt_pipeline_df = opt_pipeline_df.rename( + columns={ + "FPU Occupancy [%]": "Occupancy", + "FMAdd Issues": "FMAdd", + "FRep Count": "FRep", + "variant": "Optimizations", + } + ) + + return opt_pipeline_df diff --git a/plots-cgo2025-ae/fp_throughput.py b/plots-cgo2025-ae/fp_throughput.py new file mode 100644 index 00000000..db341377 --- /dev/null +++ b/plots-cgo2025-ae/fp_throughput.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +import argparse +import numpy as np + +LABELS = { + "conv2d_d1_s1_3x3": "2D Convolution", + "ddot": "Inner Product", + "dense": "Dense ReLU Layer", + "dsum": "Vector\nElement-wise Add", + "matmul": "Matrix Multiplication", + "pooling_nchw_max_d1_s2_3x3": "Max Pooling Layer", + "pooling_nchw_sum_d1_s2_3x3": "Sum Pooling Layer", +} + +def add_metrics(data: pd.DataFrame) -> pd.DataFrame: + if "fp_inst_throughput" not in data: + data["fp_inst_throughput"] = data["fpss_fpu_issues"] / data["cycles"] + if "fp_flop_throughput" not in data: + data["fp_flop_throughput"] = ( + data["fpss_fpu_issues"] + data["fpss_fpu_fmadd_issues"] + ) / data["cycles"] + return data + +def generate_throughput(data): + df = data[data["impl"].isin(["snitch_stream", "linalg_xdsl"])] + # If multiple rows (experiments) are present for a single 'test', + # just pick the best one: + df = df.loc[df.groupby("test")["fp_flop_throughput"].idxmax()] + # Use meaningful labels: + df["test"] = df["test"].map(LABELS) + fig, ax = plt.subplots(figsize=(8, 9)) + sns.histplot( + ax=ax, + data=df, + x="test", + weights="fp_flop_throughput", + legend=False, + edgecolor=None, + shrink=0.9, + ) + plt.axhline(y=2, color="grey", linestyle="--", linewidth=1) + plt.axhline(y=1, color="grey", linestyle="--", linewidth=0.5) + plt.xticks(rotation=45, fontsize=8) + ax.set_title("Kernels FP sustained throughput @ f64") + ax.set_xlabel("") + ax.set_ylabel("FLOP/cycle") + ax.set_yticks(np.arange(0, 2.1, 0.1)) + return fig + + +def main(): + parser = argparse.ArgumentParser( + description="Generate kernels FP throughput from CSV data." + ) + parser.add_argument("csv_file", help="Path to the CSV file") + args = parser.parse_args() + data = pd.read_csv(args.csv_file) + data = add_metrics(data) + fig = generate_throughput(data) + fig.savefig(f"fp_throughput.pdf", format="pdf") + + +if __name__ == "__main__": + main() diff --git a/plots-cgo2025-ae/fpu.py b/plots-cgo2025-ae/fpu.py new file mode 100644 index 00000000..911222be --- /dev/null +++ b/plots-cgo2025-ae/fpu.py @@ -0,0 +1,74 @@ +from typing import Sequence +from matplotlib.axes import Axes +import pandas as pd +import numpy as np +import numpy.typing as npt +from data import Impl, Operator, get_operator_df, get_params_dfs +from plot_utils import IMPL_COLORS, IMPL_MARKERS, GridPlotRow, plot_combined + + +def all_plot_dfs( + pivoted_df: pd.DataFrame, operators: tuple[Operator, ...] +) -> tuple[pd.DataFrame, ...]: + return tuple( + param_df + for operator in operators + for param_df in get_params_dfs( + get_operator_df(pivoted_df, operator, bitwidth=64) + ) + ) + + +def get_fpu(pivoted_fpu_df: pd.DataFrame) -> tuple[pd.DataFrame, ...]: + return all_plot_dfs( + pivoted_fpu_df.filter([Impl.OURS, Impl.CLANG, Impl.MLIR]), + ( + Operator.SUM, + Operator.FILL, + Operator.RELU, + Operator.CONV, + Operator.MAX_POOL, + Operator.SUM_POOL, + # Operator.MATMUL, # Matmul included in other plot + ), + ) + + +class FPUGridPlotRow(GridPlotRow): + ylabel = "FPU Utilization" + + @classmethod + def yrange(cls, dfs: Sequence[pd.DataFrame]) -> npt.NDArray[np.float64]: + return np.arange(0.0, 1.1, 0.1) + + @classmethod + def plot_grid_cell( + cls, + ax: Axes, + df: pd.DataFrame, + *, + hide_xlabel: bool, + ) -> None: + for col in df: + ax.scatter( + x=df.index, + y=df[col], + color=IMPL_COLORS[col], + marker=IMPL_MARKERS[col], + ) + ax.set_xticks(df.index) + if not hide_xlabel: + ax.set_xlabel(df.index.name) + + @classmethod + def get_roofline(cls, df: pd.DataFrame) -> float | None: + df['Performance Roofline'] = 1.0 + return 1.0 + + +def plot_fpu(fpu_dfs: tuple[pd.DataFrame, ...]): + return plot_combined( + FPUGridPlotRow.get_rows(fpu_dfs, 6, hide_xtick_labels=[True, False]), + legend_cols=4, + rcparams_cfg_file="config/gridplot.mplstyle", + ) diff --git a/plots-cgo2025-ae/heatmap.py b/plots-cgo2025-ae/heatmap.py new file mode 100644 index 00000000..1b120432 --- /dev/null +++ b/plots-cgo2025-ae/heatmap.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +import argparse + + +def add_metrics(data: pd.DataFrame) -> pd.DataFrame: + if "fp_inst_throughput" not in data: + data["fp_inst_throughput"] = data["fpss_fpu_issues"] / data["cycles"] + if "fp_flop_throughput" not in data: + data["fp_flop_throughput"] = ( + data["fpss_fpu_issues"] + data["fpss_fpu_fmadd_issues"] + ) / data["cycles"] + return data + + +def highlight_1_8_cells(ax: plt.axes, df: pd.DataFrame): + Ks = df["K"].unique() + Ns = df["N"].unique() + threshold = 1.8 + + # Find the first index in every row with a value above 90 + for i, k in enumerate(Ks): + row = df[df["K"] == k] + left_index = row[row["fp_flop_throughput"] >= threshold]["N"].min() + if pd.notna(left_index): + ax.axvline( + x=(left_index - 4) / 4, + ymin=(k - 4) / 64, + ymax=k / 64, + color='white', + linewidth=1, + alpha=1, + ) + + # Find the last index in every column with a value above 90 + for j, n in enumerate(Ns): + col = df[df["N"] == n] + bottom_index = col[col["fp_flop_throughput"] >= threshold]["K"].min() + if pd.notna(bottom_index): + ax.axhline( + xmin=(n - 4) / 64, + xmax=n / 64, + y=(bottom_index - 4) / 4, + color='white', + linewidth=1, + alpha=1, + ) + + +def generate_heatmaps(data: pd.DataFrame): + data[["M", "K", "N", "bitwidth"]] = data["params"].str.extract( + r"(\d+)x(\d+)x(\d+)xf(\d+)" + ) + data[["M", "K", "N", "bitwidth"]] = data[["M", "K", "N", "bitwidth"]].astype(int) + + # filter out K values unrelated to the heatmap experimental runs + data = data[(data["K"] <= 65)] + + sns.set(rc={'text.usetex': True}) + + for m_value, m_group in data.groupby("M"): + selection = m_group[["K", "N", "fp_flop_throughput"]] + pivot = ( + selection.pivot(index="K", columns="N", values="fp_flop_throughput") + / 2 + * 100 + ).apply(np.floor) + + # find the min value to use in colorbar + min_val = (pivot.agg('min').agg('min') // 10) * 10 + + fig, ax = plt.subplots(figsize=(5, 4)) + sns.heatmap( + pivot, + ax=ax, + annot=True, + fmt=".0f", + cmap="YlGnBu", + vmin=min_val, + vmax=100, + annot_kws={"fontsize": 9}, + cbar_kws={ + "orientation": "horizontal", + "aspect": 35, + "shrink": 0.85, + "pad": 0.07, + }, + ) + # ax.set_title( + # "Matrix multiplication, $C_{{M \\times N}} = A_{{M \\times K}} B_{{K \\times N}}$ with $M={}$ @ f64".format( + # m_value + # ) + # ) + ax.tick_params( + axis="both", + which="major", + labelbottom=True, + bottom=False, + top=False, + labeltop=False, + pad=0.0, + length=4, + ) + ax.invert_yaxis() # make sure bottom-left corner is origin for both dimensions + ax.set_xlabel("$N$") + ax.set_ylabel("$K$", rotation=0) + plt.yticks(rotation=0) + ax.yaxis.set_label_coords(-0.05, 0.95, transform=None) + ax.xaxis.set_label_coords(1, -0.05, transform=None) + + # Skip every second x-axis label + for label in ax.xaxis.get_ticklabels()[1::2]: + label.set_visible(False) + + cbar = ax.collections[0].colorbar + cbar.set_label("\% of FLOP/cycle Roofline", labelpad=2, fontsize=10) + cbar.ax.tick_params(size=0) + + plt.tight_layout() + highlight_1_8_cells(ax, selection) + yield m_value, fig + + +def main(): + parser = argparse.ArgumentParser(description="Generate heatmaps from CSV data.") + parser.add_argument("csv_file", help="Path to the CSV file") + args = parser.parse_args() + data = pd.read_csv(args.csv_file) + data = add_metrics(data) + # FIXME we are able to generate snitch_stream matmul only at the moment + data = data.loc[(data["test"] == "matmul") & (data["impl"] == "linalg_xdsl")] + for m, fig in generate_heatmaps(data): + fig.savefig(f"matmul_heatmap_M_{m}.pdf", format="pdf", bbox_inches="tight") + + +if __name__ == "__main__": + main() diff --git a/plots-cgo2025-ae/low_level_representation.py b/plots-cgo2025-ae/low_level_representation.py new file mode 100644 index 00000000..a986e082 --- /dev/null +++ b/plots-cgo2025-ae/low_level_representation.py @@ -0,0 +1,83 @@ +from matplotlib.figure import Figure + +import pandas as pd +from typing import Sequence, NamedTuple +from plot_utils import plot_combined + +from fpu import FPUGridPlotRow +from data import ( + get_pivoted_all, + get_pivoted_fpu, + get_pivoted_cycles, + Operator, + get_params_dfs, + get_operator_df, + Impl, +) +from throughput import ThroughputGridPlotRow +from cycles import CyclesGridPlotRow + + +class LLRDataFrames(NamedTuple): + fpu_dfs: Sequence[pd.DataFrame] + throughput_dfs: Sequence[pd.DataFrame] + cycles_dfs: Sequence[pd.DataFrame] + + +def get_llr_dfs(llr_kernels_df: pd.DataFrame) -> LLRDataFrames: + llr_pivoted_all_df = ( + get_pivoted_all(llr_kernels_df) + .loc[:, (slice(None), 'Ours')] + .droplevel('impl', axis=1) + ) + llr_pivoted_fpu_df = pd.DataFrame( + get_pivoted_fpu(llr_pivoted_all_df).rename(Impl.OURS) + ) + llr_pivoted_cycles_df = get_pivoted_cycles(llr_pivoted_all_df).rename( + columns={"cycles": Impl.OURS} + ) + operators = (Operator.SUM, Operator.RELU, Operator.MATMUL_TRANSB) + llr_fpu_dfs = tuple( + param_df + for operator in operators + for param_df in get_params_dfs( + get_operator_df(llr_pivoted_fpu_df, operator, bitwidth=32) + ) + ) + # Remove the matmul_t 1 fpu df + llr_fpu_dfs = llr_fpu_dfs[:4] + llr_fpu_dfs[5:] + llr_throughput_dfs: list[pd.DataFrame] = [] + llr_cycles_dfs: list[pd.DataFrame] = [] + for operator in operators: + llr_operator_df = get_operator_df(llr_pivoted_cycles_df, operator, bitwidth=32) + llr_operator_params_dfs = tuple(get_params_dfs(llr_operator_df)) + # Remove the matmul_t 1 params df + if operator == Operator.MATMUL_TRANSB: + llr_operator_params_dfs = llr_operator_params_dfs[1:] + llr_operator_throughput_dfs = tuple( + pd.DataFrame( + {"Ours": df["Throughput"], "Performance Roofline": df["Max Throughput"]} + ) + for df in llr_operator_params_dfs + ) + llr_operator_cycles_dfs = tuple( + df[[Impl.OURS, "Min Cycles", "Overhead"]] for df in llr_operator_params_dfs + ) + llr_throughput_dfs.extend(llr_operator_throughput_dfs) + llr_cycles_dfs.extend(llr_operator_cycles_dfs) + + return LLRDataFrames(llr_fpu_dfs, llr_throughput_dfs, llr_cycles_dfs) + + +def plot_llr(llr_dfs: LLRDataFrames) -> Figure: + return plot_combined( + FPUGridPlotRow.get_rows( + llr_dfs.fpu_dfs, 6, hide_xlabel=True, hide_xtick_labels=[True] + ) + + ThroughputGridPlotRow.get_rows( + llr_dfs.throughput_dfs, 6, hide_xlabel=True, hide_xtick_labels=[True] + ) + + CyclesGridPlotRow.get_rows(llr_dfs.cycles_dfs, 6), + legend_cols=4, + rcparams_cfg_file="config/gridplot.mplstyle", + ) diff --git a/plots-cgo2025-ae/max_util.py b/plots-cgo2025-ae/max_util.py new file mode 100644 index 00000000..188706ea --- /dev/null +++ b/plots-cgo2025-ae/max_util.py @@ -0,0 +1,23 @@ + +from typing import Sequence +import pandas as pd + +from data import Impl, Operator + +def get_max_util(llr_kernels_df: pd.DataFrame, fpu_dfs: Sequence[pd.DataFrame]) -> str: + llr_max_occupancy = llr_kernels_df["fpss_fpu_occupancy"].max() + llr_max_throughput = llr_kernels_df["Rel Throughput"].max() + llr_matmult_throughput = llr_kernels_df["Throughput"][Operator.MATMUL_TRANSB, :].max() + llr_matmult_max_occupancy = llr_kernels_df["fpss_fpu_occupancy"][Operator.MATMUL_TRANSB, :].max() + proto_comp_max_occupancy = max(fpu_df[Impl.OURS].max() for fpu_df in fpu_dfs) + proto_comp_min_max_occupancy = min_max = min(_df[Impl.OURS].max() for _df in fpu_dfs) + clang_max_occupancy = max(fpu_df[Impl.CLANG].max() for fpu_df in fpu_dfs) + return f"""\ +\\newdelimitedcommand{{maxutilprotocomp}}{{{proto_comp_max_occupancy*100:.0f}\\%}} +\\newdelimitedcommand{{minmaxutilprotocomp}}{{{proto_comp_min_max_occupancy*100:.0f}\\%}} +\\newdelimitedcommand{{maxutilclang}}{{{clang_max_occupancy*100:.0f}\\%}} +\\newdelimitedcommand{{maxutilllr}}{{{llr_max_occupancy*100:.0f}\\%}} +\\newdelimitedcommand{{maxutilllrmatmult}}{{{llr_matmult_max_occupancy*100:.0f}\\%}} +\\newdelimitedcommand{{maxrelthroughputllr}}{{{llr_max_throughput*100:.0f}\\%}} +\\newdelimitedcommand{{maxabsthroughputllrmatmult}}{{{llr_matmult_throughput:.2f}}} +""" diff --git a/plots-cgo2025-ae/opt_pipeline.py b/plots-cgo2025-ae/opt_pipeline.py new file mode 100644 index 00000000..53f41c67 --- /dev/null +++ b/plots-cgo2025-ae/opt_pipeline.py @@ -0,0 +1,115 @@ +import pandas as pd + +col_names = { + 'Optimizations': '', + 'F Registers': 'FP', + 'X Registers': 'Integer', + 'F Loads': 'Loads', + 'F Stores': 'Stores', + 'FMAdd': 'FMAdd', + 'FRep': 'FRep', + 'Cycles': 'Cycles (\#)', + 'Occupancy': 'Occupancy (\%)', +} + +col_alignment = { + 'Optimizations': 'l', + 'F Registers': 'S[table-format=2.0]', + 'X Registers': 'S[table-format=2.0]', + 'F Loads': 'S[table-format=4.0]', + 'F Stores': 'S[table-format=4.0]', + 'FMAdd': 'S[table-format=4.0]', + 'FRep': 'S[table-format=1.0]', + 'Cycles': 'S[table-format=5.0]', + 'Occupancy': 'S[table-format=2.2]', +} + + +def get_opt_pipeline_table(opt_pipeline_df: pd.DataFrame) -> str: + # Reorder columns to move Cycles and Occupancy to the end + cols = opt_pipeline_df.columns.tolist() + cols = [col for col in cols if col not in ['Cycles', 'Occupancy']] + [ + 'Cycles', + 'Occupancy', + ] + opt_pipeline_df = opt_pipeline_df[cols] + del opt_pipeline_df["params"] + latex_table = "\\begin{table*}[h]\n" + + latex_table += "\\sisetup{group-separator = {\ },group-minimum-digits=3}\n" + + latex_table += ( + "\\setlength\\tabcolsep{0pt} % let LaTeX compute intercolumn whitespace\n" + ) + + latex_table += "\\caption{" + latex_table += ( + "Our compilation pipeline leverages custom " + "\\ac{isa} extensions and knowledge of \\ac{fpu} design in order to achieve " + "over 90\\% \\ac{fpu} occupancy for the MatMul kernel, " + "operating on 1$\\times$200 and 200$\\times$5 64-bit inputs. " + "Incrementally adding each optimization minimizes and, " + "eventually eliminates, explicit memory operations, while reducing " + "execution time (cycles) and maximizing \\ac{fpu} utilization." + ) + latex_table += "}\n\\label{tab:opt_pipeline}\n" + + latex_table += ( + "\\centering\n\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill}}" + + " ".join(f"{col_alignment[col]}" for col in opt_pipeline_df.columns) + + "}\n\\toprule\n" + ) + + latex_table += "\\textbf{Optimizations} & \\multicolumn{2}{r}{\\textbf{Allocated Registers (\\#)}} & \\multicolumn{4}{c}{\\textbf{Assembly Operations (\\#)}} & \\multicolumn{2}{c}{\\textbf{Performance}}\\\\\n" + + latex_table += "\\cmidrule{2-3}\\cmidrule{4-7}\\cmidrule{8-9}\n" + + latex_table += ( + " & ".join(f"\\textbf{{{col_names[col]}}}" for col in opt_pipeline_df.columns) + + " \\\\\n\\midrule\n" + ) + + string_table = [] + + # Add the rest of the rows + for _, row in opt_pipeline_df.iterrows(): + string_table.append([str(x) for x in row]) + + # replace text for last row in the first column + string_table[-1][0] = "+ Unroll-and-Jam" + + # change text style for first column + for row in string_table: + row[0] = f"\\texttt{{{row[0]}}}" + + # replace text for baseline which should be at the first row and column + string_table[0][0] = "Baseline (for MatMul)" + + # add max register count for fp registers + for row in string_table: + row[1] = row[1] + "\\textcolor{lightgray}{/20}" + + # add max register count for int registers + for row in string_table: + row[2] = row[2] + "\\textcolor{lightgray}{/15}" + + # replace text for baseline which should be at the first row and column + string_table[0][0] = "Baseline (for MatMul)" + + # gray out baseline which should be the first line + for idx, val in enumerate(string_table[0]): + string_table[0][idx] = "\\color{gray} " + val + + # highlight rightmost entry which should be the max FPU util achieved + string_table[-1][-1] = "\\textbf{" + string_table[-1][-1] + "}" + + for row in string_table: + latex_table += " & ".join(val for val in row) + " \\\\\n" + + latex_table += "\\bottomrule\n" + + latex_table += "\\end{tabular*}\n" + + latex_table += "\\end{table*}\n" + + return latex_table diff --git a/plots-cgo2025-ae/pass_improvements.py b/plots-cgo2025-ae/pass_improvements.py new file mode 100644 index 00000000..90b70f2f --- /dev/null +++ b/plots-cgo2025-ae/pass_improvements.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +import pandas as pd +import argparse + + +def extract(df): + mask = df["impl"].str.contains(r"linalg_\d_xdsl|linalg_full_xdsl", regex=True) + return df[mask] + + +def pass_order_mapping(n_passes=10): + custom_order = [f"linalg_{i}_xdsl" for i in range(n_passes)] + ["linalg_full_xdsl"] + return {name: index for index, name in enumerate(custom_order)} + + +def add_linalg_passes(data): + df = data.copy() + # Pass order + df["linalg_pass_order"] = df["impl"].map(pass_order_mapping()) + df = df.sort_values(by=["test", "params", "linalg_pass_order"]) + # fpss_fpu_occupancy relative improvement + df["linalg_pass_relative_improvement"] = ( + df.groupby(["test", "params"])["fpss_fpu_occupancy"].diff().fillna(0) + ) + # group min/max/delta + group_max = df.groupby(["test", "params"])["fpss_fpu_occupancy"].max().reset_index() + group_max = group_max.rename( + columns={"fpss_fpu_occupancy": "linalg_pass_group_max"} + ) + group_min = df.groupby(["test", "params"])["fpss_fpu_occupancy"].min().reset_index() + group_min = group_min.rename( + columns={"fpss_fpu_occupancy": "linalg_pass_group_min"} + ) + group_delta = pd.merge(group_min, group_max, on=["test", "params"]) + group_delta["linalg_pass_group_delta"] = ( + group_delta["linalg_pass_group_max"] - group_delta["linalg_pass_group_min"] + ) + # overall pass % contribution + df = pd.merge(df, group_delta, on=["test", "params"]) + df["linalg_pass_%_contribution"] = ( + df["linalg_pass_relative_improvement"].abs() + / df["linalg_pass_group_delta"].abs() + ) + return df + + +def get_pass_contributions_table(df): + pivot_df = df.pivot( + index=["test", "params"], columns="impl", values="linalg_pass_%_contribution" + ) + pivot_df.columns.name = None + pivot_df.reset_index(inplace=True) + return pivot_df + + +def main(): + parser = argparse.ArgumentParser( + description="Generate pass improvements CSV table." + ) + parser.add_argument("csv_file", help="Path to the input CSV file") + args = parser.parse_args() + data = pd.read_csv(args.csv_file) + df = extract(data) + df = add_linalg_passes(df) + df = get_pass_contributions_table(df) + print(df.to_csv(index=False)) + + +if __name__ == "__main__": + main() diff --git a/plots-cgo2025-ae/pass_improvements_stacked.py b/plots-cgo2025-ae/pass_improvements_stacked.py new file mode 100644 index 00000000..714d765c --- /dev/null +++ b/plots-cgo2025-ae/pass_improvements_stacked.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 + +import matplotlib.pyplot as plt +import numpy as np +import argparse +import pandas as pd + + +def generate_stacked_bars(labels, passes, values): + + n_bars = values.shape[0] + n_values = values.shape[1] + + cmap = plt.get_cmap("viridis", n_values) + colors = [cmap(i) for i in range(n_values)] + + # Create the stacked bar plot + fig, ax = plt.subplots(figsize=(10, 6)) + + bar_width = 0.1 + bar_tick = 0.15 + x = np.array([bar_tick * v for v in range(n_bars)]) + bottom = np.zeros(n_bars) + + for i in range(n_values): + segments = ax.bar( + x, + values[:, i], + bottom=bottom, + width=bar_width, + color=colors[i], + label=passes[i], + ) + bottom += values[:, i] + + # Add segment values inside each segment + if i == 0 or i == n_values - 1: + continue + for bar in segments: + height = bar.get_height() + # Calculate vertical position for the text within each segment + text_y = bar.get_y() + height / 2.0 + ax.text( + bar.get_x() + bar.get_width() / 2, + text_y, + f"{height:.2f}", + ha="center", + va="center", + color="white", + fontsize=10, + fontweight="bold", + ) + + # Add much thicker white zigzag line with 3 less sloped segments across the full width of each bar + zigzag_height = ( + values[:, -1] * 0.1 + ) # Reduced to 30% of the top segment height for less slope + + for i in range(n_bars): + bar_top = np.sum(values[i]) + zigzag_center = bar_top - values[i, -1] / 2 # Center of the top segment + zigzag_top = zigzag_center + zigzag_height[i] / 2 + zigzag_bottom = zigzag_center - zigzag_height[i] / 2 + + # Create 4 points for 3 straight segments with less slope + zigzag_x = [ + x[i] - bar_width / 2, + x[i] - bar_width / 4, + x[i] + bar_width / 4, + x[i] + bar_width / 2, + ] + zigzag_y = [zigzag_bottom, zigzag_top, zigzag_bottom, zigzag_top] + + ax.plot( + zigzag_x, zigzag_y, color="white", linewidth=16, solid_capstyle="round" + ) # Doubled linewidth to 16 + + ax.axhline(y=100, color="black", linewidth=2, zorder=3) + + ax.set_xlim(-0.1, n_bars * bar_tick) + + # Add labels to the right of the last bar + fontdict = {"family": "Arial", "size": 12, "weight": "bold"} + for i in range(1, n_values - 1): + y_position = sum(values[0, :i]) + values[0, i] / 2 + ax.text( + x[-1] + bar_width / 2 + 0.01, + y_position, + passes[i], + va="center", + color=colors[i], + fontdict=fontdict, + ) + + ax.set_xticks(x) + ax.set_xticklabels(labels) + ax.get_yaxis().set_visible(False) + for key, spine in ax.spines.items(): + spine.set_visible(False) + + plt.tight_layout() + return fig + + +def main(): + parser = argparse.ArgumentParser( + description="Generate pass improvements stacked bars plot from pass improvements CSV table." + ) + parser.add_argument("csv_file", help="Path to the input pass improvements CSV file") + args = parser.parse_args() + df = pd.read_csv(args.csv_file) + df["label"] = df["test"] + " " + df["params"] + df.set_index("label", inplace=True) + df.drop(["test", "params"], axis=1, inplace=True) + df *= 100.0 + print(df) + column_to_pass = { + "linalg_0_xdsl": "baseline", + "linalg_1_xdsl": "memref-stream-tile-outer-loops", + "linalg_2_xdsl": "memref-stream-unnest-out-parameters", + "linalg_3_xdsl": "memref-stream-interleave", + "linalg_4_xdsl": "memref-streamify", + "linalg_full_xdsl": "convert-riscv-scf-for-to-frep", + } + passes = [column_to_pass[column] for column in df.columns] + df["remainder"] = 40.0 + passes.append("remainder") + fig = generate_stacked_bars(df.index, passes=passes, values=df.values) + fig.savefig(f"pass_improvements.pdf", format="pdf", bbox_inches="tight") + + +if __name__ == "__main__": + main() diff --git a/plots-cgo2025-ae/plot.py b/plots-cgo2025-ae/plot.py new file mode 100644 index 00000000..2e7fcae4 --- /dev/null +++ b/plots-cgo2025-ae/plot.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +from data import ( + get_low_level_representation, + get_opt_pipeline, + get_pivoted_all, + get_pivoted_fpu, + get_kernels, +) +from fpu import get_fpu, plot_fpu +from low_level_representation import get_llr_dfs, plot_llr +from max_util import get_max_util +from regalloc import get_regalloc, print_regalloc +from plot_utils import savefig +from opt_pipeline import get_opt_pipeline_table + + +def main(): + kernels_df = get_kernels() + pivoted_all_df = get_pivoted_all(kernels_df) + pivoted_fpu_df = get_pivoted_fpu(pivoted_all_df) + + # Plot FPU utilization + fpu_dfs = get_fpu(pivoted_fpu_df) + fpu_fig = plot_fpu(fpu_dfs) + savefig(fpu_fig, "fpu.pdf") + + # Print the regalloc stats + regalloc_df = get_regalloc() + print_regalloc(regalloc_df, filename="regalloc.tex") + + # Plot low-level representation + llr_kernels_df = get_low_level_representation() + llr_dfs = get_llr_dfs(llr_kernels_df) + llr_fig = plot_llr(llr_dfs) + savefig(llr_fig, "low_level_representation.pdf") + + # Print opt pipeline table + opt_pipeline_df = get_opt_pipeline() + opt_pipeline_table = get_opt_pipeline_table(opt_pipeline_df) + with open("opt_pipeline.tex", "w") as f: + f.write(opt_pipeline_table) + + # Print max utilization stats + max_util_macros = get_max_util(llr_kernels_df, fpu_dfs) + with open("max_util.tex", "w") as f: + f.write(max_util_macros) + + +if __name__ == "__main__": + main() diff --git a/plots-cgo2025-ae/plot_utils.py b/plots-cgo2025-ae/plot_utils.py new file mode 100644 index 00000000..e03e8d43 --- /dev/null +++ b/plots-cgo2025-ae/plot_utils.py @@ -0,0 +1,241 @@ +from collections.abc import Iterable +from typing import ClassVar, Sequence, cast +import os +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure +import numpy as np +import numpy.typing as npt +import seaborn as sns +from matplotlib.lines import Line2D +from data import Impl +from abc import ABC, abstractmethod + +# Color palette +light_gray = "#cacaca" +dark_gray = "#827b7b" +light_blue = "#a6cee3" +dark_blue = "#1f78b4" +light_green = "#b2df8a" +dark_green = "#33a02c" +light_red = "#fb9a99" +dark_red = "#e31a1c" +black = "#000000" +white = "#ffffff" + +COLORS = [ + light_gray, + dark_gray, + light_blue, + dark_blue, + light_green, + dark_green, + light_red, + dark_red, +] + + +IMPL_COLORS = { + Impl.OURS.value: dark_green, + Impl.CLANG.value: light_blue, + Impl.MLIR.value: dark_blue, + "Min Cycles": dark_gray, + "Overhead": dark_red, + "Performance Roofline": dark_gray, +} + +IMPL_MARKERS = { + Impl.OURS.value: 'o', + Impl.CLANG.value: 's', + Impl.MLIR.value: 'v', + "Min Cycles": '^', + "Overhead": 'x', + "Performance Roofline": "", +} + +IMPL_LINESTYLES = { + Impl.OURS.value: '', + Impl.CLANG.value: '', + Impl.MLIR.value: '', + "Min Cycles": '', + "Overhead": '', + "Performance Roofline": "--", +} + + +class GridPlotRow(ABC): + ylabel: ClassVar[str] + + dfs: Sequence[pd.DataFrame] + hide_xlabel: bool + hide_xtick_labels: bool + + def __init__( + self, + dfs: Sequence[pd.DataFrame], + *, + hide_xlabel: bool = False, + hide_xtick_labels: bool = False, + ) -> None: + self.dfs = dfs + self.hide_xlabel = hide_xlabel + self.hide_xtick_labels = hide_xtick_labels + + @classmethod + @abstractmethod + def yrange(cls, dfs: Sequence[pd.DataFrame]) -> npt.NDArray[np.float64]: + raise NotImplementedError + + @classmethod + @abstractmethod + def plot_grid_cell(cls, ax: Axes, df: pd.DataFrame, *, hide_xlabel: bool) -> None: + raise NotImplementedError + + @classmethod + def get_roofline(cls, df: pd.DataFrame) -> float | None: + return None + + def plot_grid_row(self, axs: Sequence[Axes]): + yrange = self.yrange(self.dfs) + + for i, (_ax, _d) in enumerate(zip(axs, self.dfs)): + self.plot_grid_cell( + _ax, + _d, + hide_xlabel=self.hide_xlabel, + ) + + if (roofline := self.get_roofline(_d)) is not None: + _ax.axhline( + y=roofline, + color=IMPL_COLORS["Performance Roofline"], + linestyle=IMPL_LINESTYLES["Performance Roofline"], + ) + + _ax.set_yticks(yrange) + ytick_distance = (yrange[-1] - yrange[0]) / len(yrange) + # add extra distance for the yaxis to avoid graph truncation and + # misaligned y-axis ticks in subplots + _ax.set_ylim(yrange[0], yrange[-1] + ytick_distance / 10) + + _ax.yaxis.grid(True) + _ax.tick_params(axis="both", which="both", left=True) + + yticks = _ax.yaxis.get_major_ticks() + for _j, ytick in enumerate(yticks): + if _j % 2: + ytick.label1.set_visible(False) + ytick.tick1line.set_visible(False) + if i: + ytick.label1.set_visible(False) + + # Improve readability when we have too many xticks + # (e.g.: matmul with lots of data points) + xtick_labels = _ax.get_xticklabels() + if len(xtick_labels) > 10: + for label in xtick_labels: + label.set_rotation(90) + + sns.despine(top=True, right=True) + + if self.hide_xtick_labels: + _ax.set_xticklabels([]) + + # y axis label on first column only + axs[0].set_ylabel(self.ylabel) + + @classmethod + def get_rows( + cls, + dfs: Sequence[pd.DataFrame], + ncols: int, + *, + hide_xlabel: bool = False, + hide_xtick_labels: list[bool] = [], + ) -> tuple["GridPlotRow", ...]: + num_plots = len(dfs) + rng = range(0, num_plots, ncols) + + if hide_xtick_labels == []: + hide_xtick_labels = [False] * len(rng) + + assert len(rng) == len(hide_xtick_labels) + + return tuple( + cls( + dfs[offset : offset + ncols], + hide_xlabel=hide_xlabel, + hide_xtick_labels=hide_xtick_labels[i], + ) + for i, offset in enumerate(rng) + ) + + +def get_legend_entries(dfs: Sequence[pd.DataFrame]) -> dict[str, tuple[str, str, str]]: + return { + col: (IMPL_COLORS[col], IMPL_MARKERS[col], IMPL_LINESTYLES[col]) + for _d in dfs + for col in cast(Iterable[str], _d) + } + + +def savefig(fig: Figure, filename: str): + fig.savefig(filename) + + +def subplots(nrows: int, ncols: int, figsize: tuple[float, float]): + fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, sharey=False) + + # plt.subplots returns different types depending on nrows and ncols + # These normalise the type to always be doubly-nested sequence + if nrows == 1: + axs = [axs] + if ncols == 1: + axs = [[ax] for ax in axs] + + return fig, cast(Sequence[Sequence[Axes]], axs) + + +def plot_combined( + rows: Sequence["GridPlotRow"], rcparams_cfg_file: str = "", *, legend_cols: int +): + nrows = len(rows) + ncols = max(len(row.dfs) for row in rows) + if os.path.exists(rcparams_cfg_file): + plt.style.use(rcparams_cfg_file) + + fig, axs = subplots(nrows, ncols, (ncols * 2.5, nrows * 1.8)) + + fig.align_labels() + fig.subplots_adjust(hspace=0.5, wspace=0.3) + + for plot_row, axs_row in zip(rows, axs): + plot_row.plot_grid_row(axs_row) + + # Remove empty subplots + empty_axs = ncols - len(plot_row.dfs) + if empty_axs: + for ax in axs_row[-empty_axs:]: + fig.delaxes(ax) + + # Shared legend + legend_entries = get_legend_entries(tuple(df for row in rows for df in row.dfs)) + lines = [ + Line2D( + [], + [], + color=color, + marker=marker, + label=entry, + linestyle=linestyle, + markersize=6, + ) + for entry, (color, marker, linestyle) in legend_entries.items() + ] + labels = list(legend_entries.keys()) + fig.legend(lines, labels, ncols=legend_cols, bbox_to_anchor=(0.5, 1.03)) + + fig.tight_layout() + + return fig diff --git a/plots-cgo2025-ae/regalloc.py b/plots-cgo2025-ae/regalloc.py new file mode 100644 index 00000000..06d2ecc8 --- /dev/null +++ b/plots-cgo2025-ae/regalloc.py @@ -0,0 +1,63 @@ +import re +import pandas as pd +from data import get_regalloc as _get_regalloc + + +def get_regalloc() -> pd.DataFrame: + regalloc_df = _get_regalloc() + + return regalloc_df + + +def color(color: str, text: str) -> str: + return r"\textcolor{" + color + "}{" + text + "}" + + +def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): + stream = None if filename is None else open(filename, "w") + colors = (color("lightgray", "/20"), color("lightgray", "/15")) + + # Sort the DataFrame + regalloc_df = regalloc_df.sort_values( + [ + "Bits", + "allocated_float", + "allocated_int", + ], + ascending=[False, True, True], + ) + + string_table = [] + + for row in regalloc_df.iterrows(): + items = tuple(row[1]) + params = items[:5] + regs = items[5:] + + reg_cells = tuple(f"{reg}{col}" for reg, col in zip(regs, colors)) + + string_table.append([str(p) for p in params + reg_cells]) + + current_precision = None + + for row in string_table: + line = "" + + # replace NxM where N and M are integers with N$\times$M in kernel names + pattern = r"(\d+)x(\d+)" + row[0] = re.sub(pattern, r"\1$\\times$\2", row[0]) + + # add short row space to separate precision groups + if current_precision is None: + current_precision = row[1] + + if current_precision != row[1]: + current_precision = row[1] + line = "\\addlinespace[0.5em]\n" + + line += " & ".join(val for val in row) + print(line, end=" \\\\\n", file=stream) + + print(r"\bottomrule", file=stream) + if stream is not None: + stream.close() diff --git a/plots-cgo2025-ae/throughput.py b/plots-cgo2025-ae/throughput.py new file mode 100644 index 00000000..2a6f7a7a --- /dev/null +++ b/plots-cgo2025-ae/throughput.py @@ -0,0 +1,57 @@ +from typing import Sequence, cast +import pandas as pd +import numpy as np +import numpy.typing as npt +from plot_utils import IMPL_COLORS, IMPL_MARKERS, plot_combined, GridPlotRow +from matplotlib.axes import Axes + +from math import log2, ceil, floor + + +class ThroughputGridPlotRow(GridPlotRow): + ylabel = "Throughput" + + @classmethod + def yrange(cls, dfs: Sequence[pd.DataFrame]) -> npt.NDArray[np.float64]: + max_value = cast(float, max(_df.max().iloc[0] for _df in dfs)) + magnitude: float = 2 ** floor(log2(max_value)) + greater_round_number = ceil(max_value / magnitude) * magnitude + yrange = np.arange(0, greater_round_number + 1) + return yrange + + @classmethod + def plot_grid_cell( + cls, + ax: Axes, + df: pd.DataFrame, + *, + hide_xlabel: bool, + ) -> None: + for col in df: + if col == "Performance Roofline": + continue + ax.scatter( + x=df.index, + y=df[col], + color=IMPL_COLORS[col], + marker=IMPL_MARKERS[col], + ) + ax.set_xticks(df.index) + if not hide_xlabel: + ax.set_xlabel(df.index.name, fontsize=12) + + @classmethod + def get_roofline(cls, df: pd.DataFrame) -> float | None: + max_throughputs = df["Performance Roofline"] + max_throughput = max_throughputs.max() + min_throughput = max_throughputs.min() + assert max_throughput == min_throughput + return max_throughput + + +def plot_throughput(throughput_dfs: tuple[pd.DataFrame, ...]): + return plot_combined( + ThroughputGridPlotRow.get_rows(throughput_dfs, 4), + legend_cols=3, + rcparams_cfg_file="config/gridplot.mplstyle", + ) From 3046490344d5639034d48a54af8f6ab47f3f1087 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Wed, 6 Nov 2024 16:53:35 +0000 Subject: [PATCH 02/20] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 7ad2a3a0..a9add958 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ pip pandas==2.1.1 +seaborn numpy==1.26.4 snakemake==8.14.0 -e /src/xdsl From 7ed06fe1454078186baf5ab83aceeea84ff6beb1 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Wed, 6 Nov 2024 17:12:59 +0000 Subject: [PATCH 03/20] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a9add958..760a4e59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ pip pandas==2.1.1 -seaborn +seaborn==0.13.2 numpy==1.26.4 snakemake==8.14.0 -e /src/xdsl From 99c0ad7e1afee8557db2ae39fac2897b3882bb6e Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Wed, 6 Nov 2024 17:33:56 +0000 Subject: [PATCH 04/20] Disable TeX matplotlib backend --- plots-cgo2025-ae/config/gridplot.mplstyle | 2 +- plots-cgo2025-ae/heatmap.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plots-cgo2025-ae/config/gridplot.mplstyle b/plots-cgo2025-ae/config/gridplot.mplstyle index 1ace4cb8..0114f979 100644 --- a/plots-cgo2025-ae/config/gridplot.mplstyle +++ b/plots-cgo2025-ae/config/gridplot.mplstyle @@ -13,7 +13,7 @@ font.size: 12 font.family: sans-serif #font.sans-serif: ["Helvetica"] -text.usetex: True +text.usetex: False ## Enable tight_layout by default # diff --git a/plots-cgo2025-ae/heatmap.py b/plots-cgo2025-ae/heatmap.py index 1b120432..20a8a8d1 100644 --- a/plots-cgo2025-ae/heatmap.py +++ b/plots-cgo2025-ae/heatmap.py @@ -60,7 +60,7 @@ def generate_heatmaps(data: pd.DataFrame): # filter out K values unrelated to the heatmap experimental runs data = data[(data["K"] <= 65)] - sns.set(rc={'text.usetex': True}) + # sns.set(rc={'text.usetex': True}) for m_value, m_group in data.groupby("M"): selection = m_group[["K", "N", "fp_flop_throughput"]] From 541d7a4f4cd7ede7e97151569acbef7ec027a23c Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 10:04:13 +0000 Subject: [PATCH 05/20] Add symlink to results dir --- plots-cgo2025-ae/results | 1 + 1 file changed, 1 insertion(+) create mode 120000 plots-cgo2025-ae/results diff --git a/plots-cgo2025-ae/results b/plots-cgo2025-ae/results new file mode 120000 index 00000000..f42d2767 --- /dev/null +++ b/plots-cgo2025-ae/results @@ -0,0 +1 @@ +../results/ \ No newline at end of file From 874633842d3059d07706d84db8d66ade9f7ab2fa Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 10:12:16 +0000 Subject: [PATCH 06/20] Disable TeX backend in matplotlib config --- plots-cgo2025-ae/config/cycles/all_barchart.mplstyle | 2 +- plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle b/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle index 980b425c..647d6061 100644 --- a/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle +++ b/plots-cgo2025-ae/config/cycles/all_barchart.mplstyle @@ -13,7 +13,7 @@ font.size: 12 font.family: sans-serif #font.sans-serif: ["Helvetica"] -text.usetex: True +text.usetex: False ## Enable tight_layout by default # diff --git a/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle b/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle index 163c89d5..0ef75540 100644 --- a/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle +++ b/plots-cgo2025-ae/config/cycles/xdsl_barchart.mplstyle @@ -13,7 +13,7 @@ font.size: 12 font.family: sans-serif #font.sans-serif: ["Helvetica"] -text.usetex: True +text.usetex: False ## Enable tight_layout by default # From 2a308be1a22f1cf324b42595e470a7de96813d7c Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 10:12:40 +0000 Subject: [PATCH 07/20] Use specific CSV file for regalloc --- plots-cgo2025-ae/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plots-cgo2025-ae/data.py b/plots-cgo2025-ae/data.py index 58104850..d3a4a443 100644 --- a/plots-cgo2025-ae/data.py +++ b/plots-cgo2025-ae/data.py @@ -219,7 +219,7 @@ def get_params_dfs(operator_df: pd.DataFrame) -> Iterable[pd.DataFrame]: def get_regalloc() -> pd.DataFrame: - regalloc_df = pd.read_csv("results/regalloc.csv") + regalloc_df = pd.read_csv("results/regalloc.fast.csv") regalloc_df = regalloc_df[regalloc_df["impl"].isin(OPERATOR_BY_TEST)] regalloc_df.replace(OPERATOR_BY_TEST, inplace=True) regalloc_df = regalloc_df[~regalloc_df["params"].str.contains("f16")] From f63720e71076fb4d02f367abd5a1939c7a0547ca Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 10:41:56 +0000 Subject: [PATCH 08/20] Allow relative input and output paths based on script location --- plots-cgo2025-ae/data.py | 27 +++++++++++++++++---------- plots-cgo2025-ae/max_util.py | 2 +- plots-cgo2025-ae/plot.py | 28 +++++++++++++++++++--------- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/plots-cgo2025-ae/data.py b/plots-cgo2025-ae/data.py index d3a4a443..26abe7cf 100644 --- a/plots-cgo2025-ae/data.py +++ b/plots-cgo2025-ae/data.py @@ -3,6 +3,7 @@ import numpy as np from enum import StrEnum +from pathlib import Path class Impl(StrEnum): @@ -96,8 +97,8 @@ def _get_kernels(filename: str) -> pd.DataFrame: return df_with_overhead -def get_kernels(cleaned: bool = True) -> pd.DataFrame: - df = _get_kernels("results/kernels.csv") +def get_kernels(dir: Path = Path("."), cleaned: bool = True) -> pd.DataFrame: + df = _get_kernels(f"{dir}/kernels.csv") # Drop unknown operators df = df[df.index.get_level_values(0).isin(tuple(Operator))] if cleaned: @@ -106,8 +107,8 @@ def get_kernels(cleaned: bool = True) -> pd.DataFrame: return df -def get_low_level_representation() -> pd.DataFrame: - return _get_kernels("results/kernels.low_level_representation.csv") +def get_low_level_representation(dir: Path = Path(".")) -> pd.DataFrame: + return _get_kernels(f"{dir}/kernels.low_level_representation.csv") def get_pivoted_all(kernels_df: pd.DataFrame) -> pd.DataFrame: @@ -119,7 +120,9 @@ def get_pivoted_fpu(pivoted_all_df: pd.DataFrame) -> pd.DataFrame: def get_pivoted_cycles(pivoted_all_df: pd.DataFrame) -> pd.DataFrame: - return pivoted_all_df[["cycles", "Min Cycles", "Overhead", "FLOPs", "Throughput", "Max Throughput"]] + return pivoted_all_df[ + ["cycles", "Min Cycles", "Overhead", "FLOPs", "Throughput", "Max Throughput"] + ] def get_flops(operator_df: pd.DataFrame, operator: Operator) -> pd.Series: @@ -158,6 +161,7 @@ def get_overhead( }) return res + def adding_overhead( operator_df: pd.DataFrame, operator: Operator, @@ -173,6 +177,7 @@ def adding_overhead( axis=1, ) + def get_operator_df( pivoted_df: pd.DataFrame, operator: Operator, *, bitwidth: int ) -> pd.DataFrame: @@ -211,15 +216,17 @@ def get_params_dfs(operator_df: pd.DataFrame) -> Iterable[pd.DataFrame]: name_components.append(col) other_cols = list(cols) + ["bitwidth"] del other_cols[i] - shape_string = " ".join("x".join(t) for t in operand_shapes_map(*name_components)) + shape_string = " ".join( + "x".join(t) for t in operand_shapes_map(*name_components) + ) new_name = f"{name} {shape_string}" my_df = my_df.rename(columns={col: new_name}).set_index(new_name).sort_index() my_df.drop(other_cols, axis=1, inplace=True) yield my_df -def get_regalloc() -> pd.DataFrame: - regalloc_df = pd.read_csv("results/regalloc.fast.csv") +def get_regalloc(dir: Path = Path(".")) -> pd.DataFrame: + regalloc_df = pd.read_csv(f"{dir}/regalloc.fast.csv") regalloc_df = regalloc_df[regalloc_df["impl"].isin(OPERATOR_BY_TEST)] regalloc_df.replace(OPERATOR_BY_TEST, inplace=True) regalloc_df = regalloc_df[~regalloc_df["params"].str.contains("f16")] @@ -244,8 +251,8 @@ def get_regalloc() -> pd.DataFrame: return regalloc_df -def get_opt_pipeline() -> pd.DataFrame: - opt_pipeline_df = pd.read_csv("results/pipeline.csv") +def get_opt_pipeline(dir: Path = Path(".")) -> pd.DataFrame: + opt_pipeline_df = pd.read_csv(f"{dir}/pipeline.csv") opt_pipeline_df = opt_pipeline_df.rename( columns={ "FPU Occupancy [%]": "Occupancy", diff --git a/plots-cgo2025-ae/max_util.py b/plots-cgo2025-ae/max_util.py index 188706ea..c397ba50 100644 --- a/plots-cgo2025-ae/max_util.py +++ b/plots-cgo2025-ae/max_util.py @@ -1,9 +1,9 @@ - from typing import Sequence import pandas as pd from data import Impl, Operator + def get_max_util(llr_kernels_df: pd.DataFrame, fpu_dfs: Sequence[pd.DataFrame]) -> str: llr_max_occupancy = llr_kernels_df["fpss_fpu_occupancy"].max() llr_max_throughput = llr_kernels_df["Rel Throughput"].max() diff --git a/plots-cgo2025-ae/plot.py b/plots-cgo2025-ae/plot.py index 2e7fcae4..4e3425a9 100644 --- a/plots-cgo2025-ae/plot.py +++ b/plots-cgo2025-ae/plot.py @@ -13,36 +13,46 @@ from plot_utils import savefig from opt_pipeline import get_opt_pipeline_table +from pathlib import Path + +import os + +SCRIPT_DIR = Path(os.path.dirname(os.path.abspath(__file__))) +RESULTS_DIR = SCRIPT_DIR / "results" + def main(): - kernels_df = get_kernels() + output_dir = SCRIPT_DIR / "output" + output_dir.mkdir(exist_ok=True) + + kernels_df = get_kernels(RESULTS_DIR) pivoted_all_df = get_pivoted_all(kernels_df) pivoted_fpu_df = get_pivoted_fpu(pivoted_all_df) # Plot FPU utilization fpu_dfs = get_fpu(pivoted_fpu_df) fpu_fig = plot_fpu(fpu_dfs) - savefig(fpu_fig, "fpu.pdf") + savefig(fpu_fig, output_dir / "fpu.pdf") # Print the regalloc stats - regalloc_df = get_regalloc() - print_regalloc(regalloc_df, filename="regalloc.tex") + regalloc_df = get_regalloc(RESULTS_DIR) + print_regalloc(regalloc_df, filename=output_dir / "regalloc.tex") # Plot low-level representation - llr_kernels_df = get_low_level_representation() + llr_kernels_df = get_low_level_representation(RESULTS_DIR) llr_dfs = get_llr_dfs(llr_kernels_df) llr_fig = plot_llr(llr_dfs) - savefig(llr_fig, "low_level_representation.pdf") + savefig(llr_fig, output_dir / "low_level_representation.pdf") # Print opt pipeline table - opt_pipeline_df = get_opt_pipeline() + opt_pipeline_df = get_opt_pipeline(RESULTS_DIR) opt_pipeline_table = get_opt_pipeline_table(opt_pipeline_df) - with open("opt_pipeline.tex", "w") as f: + with open(output_dir / "opt_pipeline.tex", "w") as f: f.write(opt_pipeline_table) # Print max utilization stats max_util_macros = get_max_util(llr_kernels_df, fpu_dfs) - with open("max_util.tex", "w") as f: + with open(output_dir / "max_util.tex", "w") as f: f.write(max_util_macros) From e1adf6da868b72dcc18c102f4a7e3353ecbca1b4 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 10:46:44 +0000 Subject: [PATCH 09/20] Fix data fetching method --- plots-cgo2025-ae/regalloc.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plots-cgo2025-ae/regalloc.py b/plots-cgo2025-ae/regalloc.py index 06d2ecc8..f7b1366f 100644 --- a/plots-cgo2025-ae/regalloc.py +++ b/plots-cgo2025-ae/regalloc.py @@ -2,9 +2,11 @@ import pandas as pd from data import get_regalloc as _get_regalloc +from pathlib import Path -def get_regalloc() -> pd.DataFrame: - regalloc_df = _get_regalloc() + +def get_regalloc(dir: Path = Path(".")) -> pd.DataFrame: + regalloc_df = _get_regalloc(dir) return regalloc_df From fcb04e1f7f33e0570827b81683d4780557e3a13b Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 11:49:12 +0000 Subject: [PATCH 10/20] Allow relative input and output paths based on script location --- plots-cgo2025-ae/heatmap.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/plots-cgo2025-ae/heatmap.py b/plots-cgo2025-ae/heatmap.py index 20a8a8d1..33d148dd 100644 --- a/plots-cgo2025-ae/heatmap.py +++ b/plots-cgo2025-ae/heatmap.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3 +from pathlib import Path + import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import argparse +import os + +SCRIPT_DIR = Path(os.path.dirname(os.path.abspath(__file__))) +RESULTS_DIR = SCRIPT_DIR / "results" def add_metrics(data: pd.DataFrame) -> pd.DataFrame: @@ -126,15 +132,26 @@ def generate_heatmaps(data: pd.DataFrame): def main(): + output_dir = SCRIPT_DIR / "output" + output_dir.mkdir(exist_ok=True) + parser = argparse.ArgumentParser(description="Generate heatmaps from CSV data.") parser.add_argument("csv_file", help="Path to the CSV file") args = parser.parse_args() - data = pd.read_csv(args.csv_file) + + csv_path = Path(args.csv_file) + + if not csv_path.is_absolute(): + csv_path = SCRIPT_DIR / csv_path + + data = pd.read_csv(csv_path) data = add_metrics(data) # FIXME we are able to generate snitch_stream matmul only at the moment data = data.loc[(data["test"] == "matmul") & (data["impl"] == "linalg_xdsl")] for m, fig in generate_heatmaps(data): - fig.savefig(f"matmul_heatmap_M_{m}.pdf", format="pdf", bbox_inches="tight") + fig.savefig( + output_dir / f"matmul_heatmap_M_{m}.pdf", format="pdf", bbox_inches="tight" + ) if __name__ == "__main__": From cb813eba68f7a26f164a025bb57e0084d6d7fd64 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 12:12:32 +0000 Subject: [PATCH 11/20] Delatexify --- plots-cgo2025-ae/heatmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plots-cgo2025-ae/heatmap.py b/plots-cgo2025-ae/heatmap.py index 33d148dd..ab0426c3 100644 --- a/plots-cgo2025-ae/heatmap.py +++ b/plots-cgo2025-ae/heatmap.py @@ -123,7 +123,7 @@ def generate_heatmaps(data: pd.DataFrame): label.set_visible(False) cbar = ax.collections[0].colorbar - cbar.set_label("\% of FLOP/cycle Roofline", labelpad=2, fontsize=10) + cbar.set_label("% of FLOP/cycle Roofline", labelpad=2, fontsize=10) cbar.ax.tick_params(size=0) plt.tight_layout() From b96c78737c88bfd4ea387966b52e0e6f001bce6f Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 12:15:39 +0000 Subject: [PATCH 12/20] Add script for all plot commands --- plots-cgo2025-ae/plot.sh | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 plots-cgo2025-ae/plot.sh diff --git a/plots-cgo2025-ae/plot.sh b/plots-cgo2025-ae/plot.sh new file mode 100644 index 00000000..8a17b282 --- /dev/null +++ b/plots-cgo2025-ae/plot.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +python3 /src/plots-cgo2025-ae/plot.py +python3 /src/plots-cgo2025-ae/heatmap.py results/kernels.all.csv + From c1312a4dba5c747e5b9877757affdc988fb697a2 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 12:54:13 +0000 Subject: [PATCH 13/20] Make scripts executable --- plots-cgo2025-ae/plot.py | 0 plots-cgo2025-ae/plot.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 plots-cgo2025-ae/plot.py mode change 100644 => 100755 plots-cgo2025-ae/plot.sh diff --git a/plots-cgo2025-ae/plot.py b/plots-cgo2025-ae/plot.py old mode 100644 new mode 100755 diff --git a/plots-cgo2025-ae/plot.sh b/plots-cgo2025-ae/plot.sh old mode 100644 new mode 100755 From 94ff97fa4c084cce20a76f57b253ec802e036e0f Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 13:42:02 +0000 Subject: [PATCH 14/20] Produce csv instead of tex files --- plots-cgo2025-ae/opt_pipeline.py | 65 +++++--------------------------- plots-cgo2025-ae/plot.py | 4 +- plots-cgo2025-ae/regalloc.py | 21 ++--------- 3 files changed, 14 insertions(+), 76 deletions(-) diff --git a/plots-cgo2025-ae/opt_pipeline.py b/plots-cgo2025-ae/opt_pipeline.py index 53f41c67..4fc53e68 100644 --- a/plots-cgo2025-ae/opt_pipeline.py +++ b/plots-cgo2025-ae/opt_pipeline.py @@ -8,8 +8,8 @@ 'F Stores': 'Stores', 'FMAdd': 'FMAdd', 'FRep': 'FRep', - 'Cycles': 'Cycles (\#)', - 'Occupancy': 'Occupancy (\%)', + 'Cycles': 'Cycles (#)', + 'Occupancy': 'Occupancy (%)', } col_alignment = { @@ -34,40 +34,11 @@ def get_opt_pipeline_table(opt_pipeline_df: pd.DataFrame) -> str: ] opt_pipeline_df = opt_pipeline_df[cols] del opt_pipeline_df["params"] - latex_table = "\\begin{table*}[h]\n" + csv_table = "" - latex_table += "\\sisetup{group-separator = {\ },group-minimum-digits=3}\n" + csv_table += "Optimizations, Allocated Registers (#), Assembly Operations (#) , Performance\\n" - latex_table += ( - "\\setlength\\tabcolsep{0pt} % let LaTeX compute intercolumn whitespace\n" - ) - - latex_table += "\\caption{" - latex_table += ( - "Our compilation pipeline leverages custom " - "\\ac{isa} extensions and knowledge of \\ac{fpu} design in order to achieve " - "over 90\\% \\ac{fpu} occupancy for the MatMul kernel, " - "operating on 1$\\times$200 and 200$\\times$5 64-bit inputs. " - "Incrementally adding each optimization minimizes and, " - "eventually eliminates, explicit memory operations, while reducing " - "execution time (cycles) and maximizing \\ac{fpu} utilization." - ) - latex_table += "}\n\\label{tab:opt_pipeline}\n" - - latex_table += ( - "\\centering\n\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill}}" - + " ".join(f"{col_alignment[col]}" for col in opt_pipeline_df.columns) - + "}\n\\toprule\n" - ) - - latex_table += "\\textbf{Optimizations} & \\multicolumn{2}{r}{\\textbf{Allocated Registers (\\#)}} & \\multicolumn{4}{c}{\\textbf{Assembly Operations (\\#)}} & \\multicolumn{2}{c}{\\textbf{Performance}}\\\\\n" - - latex_table += "\\cmidrule{2-3}\\cmidrule{4-7}\\cmidrule{8-9}\n" - - latex_table += ( - " & ".join(f"\\textbf{{{col_names[col]}}}" for col in opt_pipeline_df.columns) - + " \\\\\n\\midrule\n" - ) + csv_table += " , ".join(f"{col_names[col]}" for col in opt_pipeline_df.columns) string_table = [] @@ -80,36 +51,18 @@ def get_opt_pipeline_table(opt_pipeline_df: pd.DataFrame) -> str: # change text style for first column for row in string_table: - row[0] = f"\\texttt{{{row[0]}}}" - # replace text for baseline which should be at the first row and column string_table[0][0] = "Baseline (for MatMul)" # add max register count for fp registers for row in string_table: - row[1] = row[1] + "\\textcolor{lightgray}{/20}" + row[1] = row[1] + "/20" # add max register count for int registers for row in string_table: - row[2] = row[2] + "\\textcolor{lightgray}{/15}" - - # replace text for baseline which should be at the first row and column - string_table[0][0] = "Baseline (for MatMul)" - - # gray out baseline which should be the first line - for idx, val in enumerate(string_table[0]): - string_table[0][idx] = "\\color{gray} " + val - - # highlight rightmost entry which should be the max FPU util achieved - string_table[-1][-1] = "\\textbf{" + string_table[-1][-1] + "}" + row[2] = row[2] + "/15" for row in string_table: - latex_table += " & ".join(val for val in row) + " \\\\\n" - - latex_table += "\\bottomrule\n" - - latex_table += "\\end{tabular*}\n" - - latex_table += "\\end{table*}\n" + csv_table += " , ".join(val for val in row) + " \\n" - return latex_table + return csv_table diff --git a/plots-cgo2025-ae/plot.py b/plots-cgo2025-ae/plot.py index 4e3425a9..c7e5a9a9 100755 --- a/plots-cgo2025-ae/plot.py +++ b/plots-cgo2025-ae/plot.py @@ -36,7 +36,7 @@ def main(): # Print the regalloc stats regalloc_df = get_regalloc(RESULTS_DIR) - print_regalloc(regalloc_df, filename=output_dir / "regalloc.tex") + print_regalloc(regalloc_df, filename=output_dir / "regalloc.csv") # Plot low-level representation llr_kernels_df = get_low_level_representation(RESULTS_DIR) @@ -47,7 +47,7 @@ def main(): # Print opt pipeline table opt_pipeline_df = get_opt_pipeline(RESULTS_DIR) opt_pipeline_table = get_opt_pipeline_table(opt_pipeline_df) - with open(output_dir / "opt_pipeline.tex", "w") as f: + with open(output_dir / "opt_pipeline.csv", "w") as f: f.write(opt_pipeline_table) # Print max utilization stats diff --git a/plots-cgo2025-ae/regalloc.py b/plots-cgo2025-ae/regalloc.py index f7b1366f..3b2e5793 100644 --- a/plots-cgo2025-ae/regalloc.py +++ b/plots-cgo2025-ae/regalloc.py @@ -1,4 +1,3 @@ -import re import pandas as pd from data import get_regalloc as _get_regalloc @@ -11,13 +10,8 @@ def get_regalloc(dir: Path = Path(".")) -> pd.DataFrame: return regalloc_df -def color(color: str, text: str) -> str: - return r"\textcolor{" + color + "}{" + text + "}" - - def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): stream = None if filename is None else open(filename, "w") - colors = (color("lightgray", "/20"), color("lightgray", "/15")) # Sort the DataFrame regalloc_df = regalloc_df.sort_values( @@ -36,7 +30,7 @@ def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): params = items[:5] regs = items[5:] - reg_cells = tuple(f"{reg}{col}" for reg, col in zip(regs, colors)) + reg_cells = tuple(f"{reg}" for reg in regs) string_table.append([str(p) for p in params + reg_cells]) @@ -45,21 +39,12 @@ def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): for row in string_table: line = "" - # replace NxM where N and M are integers with N$\times$M in kernel names - pattern = r"(\d+)x(\d+)" - row[0] = re.sub(pattern, r"\1$\\times$\2", row[0]) - # add short row space to separate precision groups if current_precision is None: current_precision = row[1] - if current_precision != row[1]: - current_precision = row[1] - line = "\\addlinespace[0.5em]\n" - - line += " & ".join(val for val in row) - print(line, end=" \\\\\n", file=stream) + line += " , ".join(val for val in row) + print(line, end="\\n", file=stream) - print(r"\bottomrule", file=stream) if stream is not None: stream.close() From e4ee515ef9538d01feea128fa0d851e011e00eea Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 14:31:47 +0000 Subject: [PATCH 15/20] Fix csv output| --- plots-cgo2025-ae/data.py | 2 +- plots-cgo2025-ae/max_util.py | 27 +++++++++++++++++---------- plots-cgo2025-ae/opt_pipeline.py | 8 +++++--- plots-cgo2025-ae/plot.py | 2 +- plots-cgo2025-ae/regalloc.py | 7 ++++--- 5 files changed, 28 insertions(+), 18 deletions(-) diff --git a/plots-cgo2025-ae/data.py b/plots-cgo2025-ae/data.py index 26abe7cf..a41f0b9e 100644 --- a/plots-cgo2025-ae/data.py +++ b/plots-cgo2025-ae/data.py @@ -236,7 +236,7 @@ def get_regalloc(dir: Path = Path(".")) -> pd.DataFrame: params = tuple( {p: v for p, v in zip("MNK", param[:-1])} for param in param_components ) - params_df = pd.DataFrame(params).fillna("{--}") + params_df = pd.DataFrame(params).fillna("-") regalloc_df = pd.concat((regalloc_df, pd.DataFrame(bitwidths), params_df), axis=1) del regalloc_df["params"] diff --git a/plots-cgo2025-ae/max_util.py b/plots-cgo2025-ae/max_util.py index c397ba50..1998fbf7 100644 --- a/plots-cgo2025-ae/max_util.py +++ b/plots-cgo2025-ae/max_util.py @@ -7,17 +7,24 @@ def get_max_util(llr_kernels_df: pd.DataFrame, fpu_dfs: Sequence[pd.DataFrame]) -> str: llr_max_occupancy = llr_kernels_df["fpss_fpu_occupancy"].max() llr_max_throughput = llr_kernels_df["Rel Throughput"].max() - llr_matmult_throughput = llr_kernels_df["Throughput"][Operator.MATMUL_TRANSB, :].max() - llr_matmult_max_occupancy = llr_kernels_df["fpss_fpu_occupancy"][Operator.MATMUL_TRANSB, :].max() + llr_matmult_throughput = llr_kernels_df["Throughput"][ + Operator.MATMUL_TRANSB, : + ].max() + llr_matmult_max_occupancy = llr_kernels_df["fpss_fpu_occupancy"][ + Operator.MATMUL_TRANSB, : + ].max() proto_comp_max_occupancy = max(fpu_df[Impl.OURS].max() for fpu_df in fpu_dfs) - proto_comp_min_max_occupancy = min_max = min(_df[Impl.OURS].max() for _df in fpu_dfs) + proto_comp_min_max_occupancy = min_max = min( + _df[Impl.OURS].max() for _df in fpu_dfs + ) clang_max_occupancy = max(fpu_df[Impl.CLANG].max() for fpu_df in fpu_dfs) + return f"""\ -\\newdelimitedcommand{{maxutilprotocomp}}{{{proto_comp_max_occupancy*100:.0f}\\%}} -\\newdelimitedcommand{{minmaxutilprotocomp}}{{{proto_comp_min_max_occupancy*100:.0f}\\%}} -\\newdelimitedcommand{{maxutilclang}}{{{clang_max_occupancy*100:.0f}\\%}} -\\newdelimitedcommand{{maxutilllr}}{{{llr_max_occupancy*100:.0f}\\%}} -\\newdelimitedcommand{{maxutilllrmatmult}}{{{llr_matmult_max_occupancy*100:.0f}\\%}} -\\newdelimitedcommand{{maxrelthroughputllr}}{{{llr_max_throughput*100:.0f}\\%}} -\\newdelimitedcommand{{maxabsthroughputllrmatmult}}{{{llr_matmult_throughput:.2f}}} +Section 4.2 Maximum FPU utilization for low-level representations, {llr_max_occupancy*100:.0f}% +Section 4.2 Maximum FPU utilization for low-level representation MatMulT, {llr_matmult_max_occupancy*100:.0f}% +Section 4.2 Percent of maximum theoretical throughput achieved for low-level representations, {llr_max_throughput*100:.0f}% +Section 4.2 Maximum throughput achieved for low-level representation MatMulT, {llr_matmult_throughput:.2f} +Section 4.4 Maximum FPU utilization for prototype micro-kernel compiler, {proto_comp_max_occupancy*100:.0f}% +Section 4.4 Minimum FPU utilization for prototype micro-kernel compiler, {proto_comp_min_max_occupancy*100:.0f}% +Section 4.4 Maximum FPU utilization for Clang, {clang_max_occupancy*100:.0f}% """ diff --git a/plots-cgo2025-ae/opt_pipeline.py b/plots-cgo2025-ae/opt_pipeline.py index 4fc53e68..67352d50 100644 --- a/plots-cgo2025-ae/opt_pipeline.py +++ b/plots-cgo2025-ae/opt_pipeline.py @@ -36,9 +36,11 @@ def get_opt_pipeline_table(opt_pipeline_df: pd.DataFrame) -> str: del opt_pipeline_df["params"] csv_table = "" - csv_table += "Optimizations, Allocated Registers (#), Assembly Operations (#) , Performance\\n" + csv_table += "Optimizations, Allocated Registers (#), , Assembly Operations (#), , , , Performance\n" - csv_table += " , ".join(f"{col_names[col]}" for col in opt_pipeline_df.columns) + csv_table += ", ".join(f"{col_names[col]}" for col in opt_pipeline_df.columns) + + csv_table += "\n" string_table = [] @@ -63,6 +65,6 @@ def get_opt_pipeline_table(opt_pipeline_df: pd.DataFrame) -> str: row[2] = row[2] + "/15" for row in string_table: - csv_table += " , ".join(val for val in row) + " \\n" + csv_table += ", ".join(val for val in row) + "\n" return csv_table diff --git a/plots-cgo2025-ae/plot.py b/plots-cgo2025-ae/plot.py index c7e5a9a9..feb2c1f6 100755 --- a/plots-cgo2025-ae/plot.py +++ b/plots-cgo2025-ae/plot.py @@ -52,7 +52,7 @@ def main(): # Print max utilization stats max_util_macros = get_max_util(llr_kernels_df, fpu_dfs) - with open(output_dir / "max_util.tex", "w") as f: + with open(output_dir / "max_util.csv", "w") as f: f.write(max_util_macros) diff --git a/plots-cgo2025-ae/regalloc.py b/plots-cgo2025-ae/regalloc.py index 3b2e5793..2eaa93e3 100644 --- a/plots-cgo2025-ae/regalloc.py +++ b/plots-cgo2025-ae/regalloc.py @@ -29,8 +29,9 @@ def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): items = tuple(row[1]) params = items[:5] regs = items[5:] + max_regs = ("/20", "/15") - reg_cells = tuple(f"{reg}" for reg in regs) + reg_cells = tuple(f"{reg}{max_reg}" for reg, max_reg in zip(regs, max_regs)) string_table.append([str(p) for p in params + reg_cells]) @@ -43,8 +44,8 @@ def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): if current_precision is None: current_precision = row[1] - line += " , ".join(val for val in row) - print(line, end="\\n", file=stream) + line += ", ".join(val for val in row) + print(line, file=stream) if stream is not None: stream.close() From b433e2597f9d632f54a88c607886f1811b2063b0 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 14:35:48 +0000 Subject: [PATCH 16/20] Add CSV column titles --- plots-cgo2025-ae/regalloc.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/plots-cgo2025-ae/regalloc.py b/plots-cgo2025-ae/regalloc.py index 2eaa93e3..54b3c8a7 100644 --- a/plots-cgo2025-ae/regalloc.py +++ b/plots-cgo2025-ae/regalloc.py @@ -23,7 +23,17 @@ def print_regalloc(regalloc_df: pd.DataFrame, *, filename: str | None = None): ascending=[False, True, True], ) - string_table = [] + string_table = [ + [ + "Kernel", + "Precision bits", + "N", + "M", + "K", + "Allocated FP registers", + "Allocated Integer registers", + ] + ] for row in regalloc_df.iterrows(): items = tuple(row[1]) From 058358b23db625da810e455bf872401f68064f31 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 14:41:39 +0000 Subject: [PATCH 17/20] Make scripts executable --- plots-cgo2025-ae/heatmap.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 plots-cgo2025-ae/heatmap.py diff --git a/plots-cgo2025-ae/heatmap.py b/plots-cgo2025-ae/heatmap.py old mode 100644 new mode 100755 From 83e464cc87f3d1a0f7fab5a644957bd24fc3e852 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 15:06:16 +0000 Subject: [PATCH 18/20] Fix kernels csv source --- plots-cgo2025-ae/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plots-cgo2025-ae/data.py b/plots-cgo2025-ae/data.py index a41f0b9e..88536c13 100644 --- a/plots-cgo2025-ae/data.py +++ b/plots-cgo2025-ae/data.py @@ -98,7 +98,7 @@ def _get_kernels(filename: str) -> pd.DataFrame: def get_kernels(dir: Path = Path("."), cleaned: bool = True) -> pd.DataFrame: - df = _get_kernels(f"{dir}/kernels.csv") + df = _get_kernels(f"{dir}/kernels.all.csv") # Drop unknown operators df = df[df.index.get_level_values(0).isin(tuple(Operator))] if cleaned: From 11f09f4d95d54a4e88b47c25b0d82e065ca39f09 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 15:10:08 +0000 Subject: [PATCH 19/20] Add artifact Make target --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d4e88280..1264800d 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,12 @@ JOBS ?= all THIS := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) -.PHONY: default fast all clean +.PHONY: default fast all clean artifact default: fast +artifact: fast all low_level_representation pipeline + fast: maybe_update_xdsl_commit snakemake --cores $(JOBS) --rerun-incomplete fast From 16fe0a7660b2c51436e5a2a8c61138588685f0e4 Mon Sep 17 00:00:00 2001 From: Chris Vasiladiotis Date: Thu, 7 Nov 2024 16:02:15 +0000 Subject: [PATCH 20/20] Name outputs as referenced in text --- plots-cgo2025-ae/heatmap.py | 9 ++++++--- plots-cgo2025-ae/plot.py | 14 ++++---------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/plots-cgo2025-ae/heatmap.py b/plots-cgo2025-ae/heatmap.py index ab0426c3..d4858a9c 100755 --- a/plots-cgo2025-ae/heatmap.py +++ b/plots-cgo2025-ae/heatmap.py @@ -149,9 +149,12 @@ def main(): # FIXME we are able to generate snitch_stream matmul only at the moment data = data.loc[(data["test"] == "matmul") & (data["impl"] == "linalg_xdsl")] for m, fig in generate_heatmaps(data): - fig.savefig( - output_dir / f"matmul_heatmap_M_{m}.pdf", format="pdf", bbox_inches="tight" - ) + if m == 1: + fig.savefig( + output_dir / f"figure9_matmul_heatmap_M_{m}.pdf", + format="pdf", + bbox_inches="tight", + ) if __name__ == "__main__": diff --git a/plots-cgo2025-ae/plot.py b/plots-cgo2025-ae/plot.py index feb2c1f6..5cc2435d 100755 --- a/plots-cgo2025-ae/plot.py +++ b/plots-cgo2025-ae/plot.py @@ -8,7 +8,6 @@ ) from fpu import get_fpu, plot_fpu from low_level_representation import get_llr_dfs, plot_llr -from max_util import get_max_util from regalloc import get_regalloc, print_regalloc from plot_utils import savefig from opt_pipeline import get_opt_pipeline_table @@ -32,29 +31,24 @@ def main(): # Plot FPU utilization fpu_dfs = get_fpu(pivoted_fpu_df) fpu_fig = plot_fpu(fpu_dfs) - savefig(fpu_fig, output_dir / "fpu.pdf") + savefig(fpu_fig, output_dir / "figure10.pdf") # Print the regalloc stats regalloc_df = get_regalloc(RESULTS_DIR) - print_regalloc(regalloc_df, filename=output_dir / "regalloc.csv") + print_regalloc(regalloc_df, filename=output_dir / "table2.csv") # Plot low-level representation llr_kernels_df = get_low_level_representation(RESULTS_DIR) llr_dfs = get_llr_dfs(llr_kernels_df) llr_fig = plot_llr(llr_dfs) - savefig(llr_fig, output_dir / "low_level_representation.pdf") + savefig(llr_fig, output_dir / "figure8.pdf") # Print opt pipeline table opt_pipeline_df = get_opt_pipeline(RESULTS_DIR) opt_pipeline_table = get_opt_pipeline_table(opt_pipeline_df) - with open(output_dir / "opt_pipeline.csv", "w") as f: + with open(output_dir / "table3.csv", "w") as f: f.write(opt_pipeline_table) - # Print max utilization stats - max_util_macros = get_max_util(llr_kernels_df, fpu_dfs) - with open(output_dir / "max_util.csv", "w") as f: - f.write(max_util_macros) - if __name__ == "__main__": main()