diff --git a/frontend/app/components/main_page/BUILD b/frontend/app/components/main_page/BUILD
index 2763afba..38de3ba0 100644
--- a/frontend/app/components/main_page/BUILD
+++ b/frontend/app/components/main_page/BUILD
@@ -38,6 +38,7 @@ xprof_ng_module(
"@org_xprof//frontend/app/components/op_profile",
"@org_xprof//frontend/app/components/overview_page",
"@org_xprof//frontend/app/components/pod_viewer",
+ "@org_xprof//frontend/app/components/roofline_model",
"@org_xprof//frontend/app/components/sidenav",
"@org_xprof//frontend/app/components/tf_data_bottleneck_analysis",
"@org_xprof//frontend/app/components/trace_viewer",
diff --git a/frontend/app/components/main_page/main_page_module.ts b/frontend/app/components/main_page/main_page_module.ts
index d1d20eb5..9ccc094a 100644
--- a/frontend/app/components/main_page/main_page_module.ts
+++ b/frontend/app/components/main_page/main_page_module.ts
@@ -29,6 +29,8 @@ import {OverviewPage} from 'org_xprof/frontend/app/components/overview_page/over
import {OverviewPageModule} from 'org_xprof/frontend/app/components/overview_page/overview_page_module';
import {PodViewer} from 'org_xprof/frontend/app/components/pod_viewer/pod_viewer';
import {PodViewerModule} from 'org_xprof/frontend/app/components/pod_viewer/pod_viewer_module';
+import {RooflineModel} from 'org_xprof/frontend/app/components/roofline_model/roofline_model';
+import {RooflineModelModule} from 'org_xprof/frontend/app/components/roofline_model/roofline_model_module';
import {SideNavModule} from 'org_xprof/frontend/app/components/sidenav/sidenav_module';
import {TfDataBottleneckAnalysis} from 'org_xprof/frontend/app/components/tf_data_bottleneck_analysis/tf_data_bottleneck_analysis';
import {TfDataBottleneckAnalysisModule} from 'org_xprof/frontend/app/components/tf_data_bottleneck_analysis/tf_data_bottleneck_analysis_module';
@@ -69,6 +71,8 @@ export const routes: Routes = [
{path: 'inference_profile', component: InferenceProfile},
{path: 'hlo_stats', component: HloStats},
{path: 'hlo_stats^', component: HloStats},
+ {path: 'roofline_model', component: RooflineModel},
+ {path: 'roofline_model^', component: RooflineModel},
{path: '**', component: EmptyPage},
];
@@ -96,6 +100,7 @@ export const routes: Routes = [
FrameworkOpStatsAdapterModule,
DcnCollectiveStatsModule,
HloStatsModule,
+ RooflineModelModule,
InferenceProfileModule,
RouterModule.forRoot(routes),
],
diff --git a/frontend/app/components/roofline_model/BUILD b/frontend/app/components/roofline_model/BUILD
new file mode 100644
index 00000000..1e8f0f90
--- /dev/null
+++ b/frontend/app/components/roofline_model/BUILD
@@ -0,0 +1,40 @@
+load("@io_bazel_rules_sass//:defs.bzl", "sass_binary")
+load("//defs:defs.bzl", "xprof_ng_module")
+
+package(default_visibility = ["//frontend:internal"])
+
+xprof_ng_module(
+ name = "roofline_model",
+ srcs = [
+ "roofline_model.ts",
+ "roofline_model_module.ts",
+ ],
+ assets = [
+ ":roofline_model_css",
+ "roofline_model.ng.html",
+ ],
+ deps = [
+ "@npm//@angular/core",
+ "@npm//@angular/router",
+ "@npm//@ngrx/store",
+ "@npm//@types/google.visualization",
+ "@npm//rxjs",
+ "@org_xprof//frontend/app/common/constants:roofline_model_constants",
+ "@org_xprof//frontend/app/common/interfaces",
+ "@org_xprof//frontend/app/common/utils",
+ "@org_xprof//frontend/app/components/chart/table",
+ "@org_xprof//frontend/app/components/controls/category_filter",
+ "@org_xprof//frontend/app/components/controls/string_filter",
+ "@org_xprof//frontend/app/components/roofline_model/operation_level_analysis",
+ "@org_xprof//frontend/app/components/roofline_model/program_level_analysis",
+ "@org_xprof//frontend/app/services/data_service",
+ "@org_xprof//frontend/app/store",
+ ],
+)
+
+sass_binary(
+ name = "roofline_model_css",
+ src = "roofline_model.scss",
+ # stack = True,
+ sourcemap = False,
+)
diff --git a/frontend/app/components/roofline_model/roofline_model.ng.html b/frontend/app/components/roofline_model/roofline_model.ng.html
new file mode 100644
index 00000000..dabd72b8
--- /dev/null
+++ b/frontend/app/components/roofline_model/roofline_model.ng.html
@@ -0,0 +1,56 @@
+
+
+
+
+
{{info.label}}: {{info.value}} {{info.unit}} {{info.context}}
+
+
+
+
+
+
+
Section1: Program-Level Analysis
+
+
+
+ Note:
+ (1) This section provides program-level analysis.
+ (2) A tooltip with extra information will show up if you mouse over a point in the roofline chart.
+ (3) You can choose whether to include infeed and outfeed ops for the analysis.
+ (4) "Total" aggregates all operations throughout the entire profiling session. It includes incomplete steps.
+ (5) "Total (HW)" is based on the hardware performance counters while the others are based on the XLA's cost analysis.
+ It is always calculated including infeed and outfeed ops regardless of the option.
+ The gap between "Total" and "Total (HW)" is due to hardware limitation (e.g., padding).
+ (6) "Average" shows the average step information by aggregating the operations in the complete steps only.
+
+
+
+
+
+
+
+
Section2: Operation-Level Analysis
+
+
+
+ Note:
+ (1) This section provides operation-level analysis.
+ (2) A tooltip with extra information will show up if you mouse over a point in the roofline chart.
+ (3) To avoid sluggishness, only the 1000 most time-consuming operations are shown.
+ (4) You can choose whether to include infeed and outfeed ops for the analysis.
+ (5) You can filter data by HLO category, bottleneck resource or HLO name.
+ (6) "IDLE" represents the portion of the total execution time on device that is idle.
+ (7) Ops with zero FLOP (e.g., data formatting ops like reshape, IDLE, etc.) do not show up in the roofline chart.
+
+
+
+
+
+
diff --git a/frontend/app/components/roofline_model/roofline_model.scss b/frontend/app/components/roofline_model/roofline_model.scss
new file mode 100644
index 00000000..da57955c
--- /dev/null
+++ b/frontend/app/components/roofline_model/roofline_model.scss
@@ -0,0 +1,39 @@
+.section-container {
+ margin: 20px 20px 0px;
+}
+
+.block-content {
+ padding: 5px;
+}
+
+.row {
+ display: flex;
+}
+
+.flex-space {
+ flex: 1;
+}
+
+.description {
+ font-size: 14px;
+}
+
+.tableHeaderCell {
+ word-wrap: break-word;
+ background-color: azure; //!to hide the scrolled-up text.
+}
+
+.tableTableCell {
+ word-break: break-all;
+}
+
+.opColumnClass {
+ max-height: 200px;
+ overflow-y: auto;
+}
+
+.errorMessage {
+ border: 2px solid;
+ background-color: #ffcccb;
+ color: red;
+}
diff --git a/frontend/app/components/roofline_model/roofline_model.ts b/frontend/app/components/roofline_model/roofline_model.ts
new file mode 100644
index 00000000..8dd2148f
--- /dev/null
+++ b/frontend/app/components/roofline_model/roofline_model.ts
@@ -0,0 +1,1101 @@
+import {Component, OnDestroy} from '@angular/core';
+import {ActivatedRoute} from '@angular/router';
+import {Store} from '@ngrx/store';
+import {DEVICE_INFO, NUMERIC_DATA_FORMAT, PIE_CHART_PALETTE, ROOFLINE_STYLES, SCATTER_CHART_AXIS, SCATTER_CHART_OPTIONS,} from 'org_xprof/frontend/app/common/constants/roofline_model_constants';
+import {NavigationEvent} from 'org_xprof/frontend/app/common/interfaces/navigation_event';
+import {RooflineModelData} from 'org_xprof/frontend/app/common/interfaces/roofline_model';
+import {setLoadingState} from 'org_xprof/frontend/app/common/utils/utils';
+import {DataService} from 'org_xprof/frontend/app/services/data_service/data_service';
+import {setCurrentToolStateAction} from 'org_xprof/frontend/app/store/actions';
+import {ReplaySubject} from 'rxjs';
+import {takeUntil} from 'rxjs/operators';
+
+interface DeviceInfoData {
+ id: string;
+ label: string;
+ type?: string;
+ value?: string | number;
+ unit?: string;
+ context?: string;
+ display?: boolean;
+}
+declare interface DeviceIndicators {
+ hasMergedVmem: boolean;
+ hasCmem: boolean;
+ hasMegacore: boolean;
+ isGpu: boolean;
+}
+type ColumnIdxArr = Array;
+
+interface TooltipRow {
+ id: string;
+ label: string;
+ operation?: (val: string | number) => string;
+}
+
+const NVIDIA_GPU_TYPE_PREFIX = 'Nvidia GPU';
+
+/** A roofline model component. */
+@Component({
+ standalone: false,
+ selector: 'roofline-model',
+ templateUrl: './roofline_model.ng.html',
+ styleUrls: ['./roofline_model.scss'],
+})
+export class RooflineModel implements OnDestroy {
+ readonly tool = 'roofline_model';
+
+ /** Handles on-destroy Subject, used to unsubscribe. */
+ private readonly destroyed = new ReplaySubject(1);
+
+ currentRun = '';
+ // Device Information section data
+ deviceInfoArray: DeviceInfoData[] = [];
+ // Some critical indicators
+ deviceIndicators: DeviceIndicators = {
+ hasMergedVmem: false,
+ hasCmem: false,
+ hasMegacore: false,
+ isGpu: false,
+ };
+
+ // dataTableRaw from the raw roofline model data
+ // DataTable data format makes a lot data manipulation easier
+ dataTableRaw: google.visualization.DataTable | null = null;
+
+ /** Program level section variables */
+ // DataTable data for underlying table chart filtered on category for program
+ dataTableProgram: google.visualization.DataTable | null = null;
+ // visible columns for the table chart view, if empty all columns are shown
+ columnsIdxProgram: ColumnIdxArr = [];
+ // preprocessed data for underlying roofline scatter chart
+ scatterDataProgram: google.visualization.DataTable | null = null;
+ readonly scatterChartOptionsProgram = {
+ ...SCATTER_CHART_OPTIONS,
+ series: [],
+ } as google.visualization.ScatterChartOptions;
+ readonly programLevelAgg = ['Total', 'Total (HW)', 'Average', 'Step'];
+
+ /** Operation level section variables */
+ dataTableOp?: google.visualization.DataTable | null = null;
+ columnsIdxOp: ColumnIdxArr = [];
+ scatterDataOp?: google.visualization.DataTable | null = null;
+ readonly scatterChartOptionsOp = {
+ ...SCATTER_CHART_OPTIONS,
+ series: [],
+ } as google.visualization.ScatterChartOptions;
+ // Prepopulated op name from url
+ selectedOpName = '';
+
+ constructor(
+ route: ActivatedRoute,
+ private readonly dataService: DataService,
+ private readonly store: Store<{}>,
+ ) {
+ route.params.pipe(takeUntil(this.destroyed)).subscribe((params) => {
+ this.update(params as NavigationEvent);
+ });
+ this.store.dispatch(setCurrentToolStateAction({currentTool: this.tool}));
+ }
+
+ parseUrlParams() {
+ this.selectedOpName =
+ this.dataService.searchParams?.get('roofline_op_name') || '';
+ }
+
+ update(event: NavigationEvent) {
+ setLoadingState(true, this.store, 'Loading roofline model data');
+
+ // get tool data
+ this.currentRun = event.run || '';
+ const tag = event.tag || 'roofline_model';
+ const host = event.host || '';
+ this.dataService.getData(this.currentRun, tag, host)
+ .pipe(takeUntil(this.destroyed))
+ .subscribe((data) => {
+ setLoadingState(false, this.store);
+ this.parseData(data as RooflineModelData[]);
+ this.parseUrlParams();
+ });
+ }
+
+ parseData(data?: RooflineModelData[]) {
+ if (data === null || !Array.isArray(data) || data.length < 1) {
+ return;
+ }
+ this.dataTableRaw = new google.visualization.DataTable(data[0]);
+
+ this.parseDeviceInfoData(this.dataTableRaw);
+ this.parseBaseOpAndProgramTableData();
+
+ // process section 1 data
+ this.setColumnsIdxProgram();
+ this.processScatterDataProgram();
+
+ // process section 2 data
+ this.setColumnsIdxOp();
+ this.processScatterDataOp();
+ }
+
+ /** parse the device information from the original dataset */
+ parseDeviceInfoData(dataTableRaw: google.visualization.DataTable) {
+ this.deviceIndicators = {
+ hasMergedVmem: !(dataTableRaw.getTableProperty('has_merged_vmem') === '0'),
+ hasCmem: !(dataTableRaw.getTableProperty('has_cmem') === '0'),
+ hasMegacore: !(dataTableRaw.getTableProperty('megacore') === '0'),
+ isGpu: dataTableRaw.getTableProperty('device_type')
+ .startsWith(
+ NVIDIA_GPU_TYPE_PREFIX,
+ ),
+ };
+
+ this.deviceInfoArray = DEVICE_INFO.reduce(
+ (acc: DeviceInfoData[], cur: DeviceInfoData) => {
+ // copy cur to avoid mutating the original object
+ // when switch between GPU and TPU runs
+ const curInfo = {...cur};
+ // deal with category of specific context
+ if (this.deviceIndicators.isGpu) {
+ if (cur.id === 'peak_flop_rate') {
+ curInfo.label = 'Peak FLOP Rate per GPU';
+ } else if (cur.id === 'peak_hbm_bw') {
+ curInfo.label = 'Peak HBM Bandwidth per GPU';
+ } else if (cur.id.startsWith('peak_cmem')) {
+ curInfo.display = false;
+ } else if (cur.id === 'megacore') {
+ curInfo.display = false;
+ } else if (cur.id === 'peak_vmem_read_bw') {
+ // TODO(b/374835204): Better refactor proto for GPU roofline
+ // model and refine related code. including ids like this
+ // peak_vmem_read_bw, and peak_vmem_write_bw, megacore, etc.
+ curInfo.label = 'Peak L2 cache Bandwidth per GPU';
+ curInfo.display = false;
+ } else if (cur.id === 'peak_vmem_write_bw') {
+ curInfo.label = 'Peak Shared Memory / L1 Cache Bandwidth per GPU';
+ }
+ } else {
+ if (cur.id.startsWith('peak_vmem')) {
+ if (!this.deviceIndicators.hasMergedVmem) {
+ curInfo.display = false;
+ }
+ } else if (cur.id.startsWith('peak_cmem')) {
+ if (!this.deviceIndicators.hasCmem) {
+ curInfo.display = false;
+ }
+ } else if (cur.id === 'megacore') {
+ curInfo.context +=
+ '(if yes, the analysis assumes Megacore where an HLO runs on both TensorCores utilizing the full chip\'s resources so that the rooflines are twice higher)';
+ curInfo.value = this.deviceIndicators.hasMegacore ? 'Yes' : 'No';
+ }
+ }
+ const value = this.dataTableRaw!.getTableProperty(cur.id);
+ acc.push({
+ // convert numeric value to numbers, as some ridge numbers will be
+ // used as axis values in chart
+ value: cur.type === 'number' ? Number(value) : value,
+ // put cur at last to overwrite with preprocessed data
+ ...curInfo,
+ });
+ return acc;
+ },
+ [] as DeviceInfoData[],
+ );
+ }
+
+ /** Filter and get DataTable data for op and program secions */
+ parseBaseOpAndProgramTableData() {
+ if (!this.dataTableRaw) {
+ return;
+ }
+ const gViewProgram = new google.visualization.DataView(this.dataTableRaw);
+ gViewProgram.setRows(
+ this.dataTableRaw.getFilteredRows([
+ {
+ column: this.dataTableRaw.getColumnIndex('category'),
+ value: 'Program',
+ },
+ ]),
+ );
+ this.dataTableProgram = gViewProgram.toDataTable();
+ this.formatTableData(this.dataTableProgram);
+
+ const gViewOp = new google.visualization.DataView(this.dataTableRaw);
+ gViewOp.setRows(
+ this.dataTableRaw.getFilteredRows([
+ {column: this.dataTableRaw.getColumnIndex('step'), value: 'Total'},
+ ]),
+ );
+ // TODO(b/359276801) Enable injecting Graph Viewer crosslink after
+ // dispatching host list to global store, so we can infer module name from
+ // program_id given the module list (aka host list in graph viewer)
+ this.dataTableOp = gViewOp.toDataTable();
+ this.formatTableData(this.dataTableOp);
+ }
+
+ /** Get the index array of columns that is visible on the table view */
+ getColumnIdx(baseColumnsIds: string[]) {
+ const cmemColumnsIds = this.deviceIndicators.hasCmem
+ ? ['measured_memory_bw', 'cmem_read_bw', 'cmem_write_bw']
+ : [];
+ const coreColumnsIds = [
+ 'roofline_efficiency',
+ 'compute_efficiency',
+ 'max_mem_bw_utilization',
+ ];
+ const columnsIds = [
+ ...baseColumnsIds,
+ ...cmemColumnsIds,
+ ...coreColumnsIds,
+ ];
+
+ const getColumnIdxes = (columnIds: string[]) => {
+ return columnIds.reduce((acc: ColumnIdxArr, cur: string) => {
+ acc.push(this.dataTableRaw!.getColumnIndex(cur));
+ return acc;
+ }, [] as ColumnIdxArr);
+ };
+ return getColumnIdxes(columnsIds);
+ }
+
+ setColumnsIdxProgram() {
+ const baseColumnsIds = [
+ 'step',
+ 'total_time_per_core',
+ 'measured_flop_rate',
+ 'bound_by',
+ 'hbm_bw',
+ ];
+ this.columnsIdxProgram = this.getColumnIdx(baseColumnsIds);
+ }
+
+ setColumnsIdxOp() {
+ const baseColumnIds = [
+ 'step',
+ 'rank',
+ 'hlo_module_id',
+ 'category',
+ 'operation',
+ 'occurrences',
+ 'total_time',
+ 'measured_flop_rate',
+ 'model_flop_rate',
+ 'bound_by',
+ 'hbm_bw',
+ ];
+ this.columnsIdxOp = this.getColumnIdx(baseColumnIds);
+ }
+
+ formatTableData(data: google.visualization.DataTable | null) {
+ if (!data) return;
+ let dataFormatter = null;
+ for (
+ let columnIdx = 0;
+ columnIdx < data.getNumberOfColumns();
+ ++columnIdx
+ ) {
+ const id = data.getColumnId(columnIdx);
+ const formattedColumnIds = Object.keys(NUMERIC_DATA_FORMAT);
+ if (!formattedColumnIds.includes(id)) {
+ continue;
+ }
+ switch (NUMERIC_DATA_FORMAT[id].type) {
+ case 'decimal':
+ dataFormatter = new google.visualization.NumberFormat({
+ fractionDigits: NUMERIC_DATA_FORMAT[id].digit,
+ });
+ dataFormatter.format(data, columnIdx);
+ break;
+ case 'percent':
+ const pattern = `##.${'#'.repeat(
+ NUMERIC_DATA_FORMAT[id].digit || 2,
+ )}%`;
+ dataFormatter = new google.visualization.NumberFormat({pattern});
+ dataFormatter.format(data, columnIdx);
+ break;
+ default:
+ console.log(`Cannot identify format config for column ${id}`);
+ }
+ }
+ }
+
+ /** Helper function to get operation name from op graph viewer link
+ * eg: op_name
+ */
+ getOpName(opGraphLinkStr: string) {
+ const regex = '(.*?)';
+ const match = opGraphLinkStr.match(regex);
+ const opName = match?.[1] || '';
+ return this.truncateOperationName(opName);
+ }
+
+ /** Helper function to truncate operation name for up to 30 chars */
+ truncateOperationName(operationName: string) {
+ if (operationName.length > 30) {
+ return operationName.substring(0, 30) + '...';
+ } else {
+ return operationName;
+ }
+ }
+
+ /**
+ * Helper function to add columns to the scatter plot data
+ * General for program and operation levels
+ * # columns = (1 y value + 1 tooltip) * #series + 1 X axis value
+ */
+ addScatterDataColumns(
+ seriesNames: string[],
+ scatterData: google.visualization.DataTable,
+ ) {
+ // add columns: x axis, series data + corresponding tooltip
+ scatterData.addColumn('number', 'Bottleneck Operational Intensity');
+ // create 1 value + 1 tooltip column for each series
+ seriesNames.forEach((s: string) => {
+ scatterData.addColumn('number', s);
+ scatterData.addColumn({
+ type: 'string',
+ role: 'tooltip',
+ 'p': {'html': true},
+ });
+ });
+ }
+
+ /**
+ * Helper function to construct data rows for the scatter chart
+ * scatter chart includes the rooflines and other clustered points
+ */
+ makeScatterRow(
+ numColumns: number,
+ xIndex: number,
+ yIndex: number,
+ xVal: number,
+ yVal: number,
+ tooltip: string,
+ ) {
+ const newRow = Array.from({
+ length: numColumns,
+ }).fill(null);
+ newRow[xIndex] = xVal;
+ newRow[yIndex] = yVal;
+ newRow[yIndex + 1] = tooltip;
+ return newRow;
+ }
+
+ /** Helper function to add a data row for the scatter chart */
+ addSeriesRow(
+ sourceDataTable: google.visualization.DataTable,
+ scatterDataTable: google.visualization.DataTable,
+ rowIndex: number,
+ columnIndex: number,
+ ) {
+ if (rowIndex < 0 || columnIndex < 0) {
+ return;
+ }
+ const numScatterDataColumns = scatterDataTable.getNumberOfColumns();
+ const xValue = sourceDataTable.getValue(
+ rowIndex,
+ sourceDataTable.getColumnIndex('bottleneck_operational_intensity'),
+ );
+ const yValue = sourceDataTable.getValue(
+ rowIndex,
+ sourceDataTable.getColumnIndex('measured_flop_rate'),
+ );
+ // xValue is always assigned to the first column
+ // yValue is assigned to the given Step agg level column (columnIdx)
+ scatterDataTable.addRow(
+ this.makeScatterRow(
+ numScatterDataColumns,
+ 0,
+ columnIndex,
+ xValue,
+ yValue,
+ this.makeTooltip(sourceDataTable, rowIndex),
+ ),
+ );
+ }
+
+ /** Helper function to add data rows for a single roofline */
+ addRoofline(
+ rooflineName: string,
+ seriesIndex: number,
+ peakFlopRate: number,
+ peakMemoryBw: number,
+ ridgePoint: number,
+ scatterData: google.visualization.DataTable,
+ ) {
+ if (seriesIndex < 0) {
+ return;
+ }
+ const numColumns = scatterData.getNumberOfColumns();
+ // Roofline before the ridge point.
+ scatterData.addRow(
+ this.makeScatterRow(
+ numColumns,
+ 0,
+ seriesIndex,
+ SCATTER_CHART_AXIS.minX,
+ SCATTER_CHART_AXIS.minX * peakMemoryBw,
+ this.makeRooflineTooltip(
+ 'Roofline',
+ SCATTER_CHART_AXIS.minX,
+ SCATTER_CHART_AXIS.minX * peakMemoryBw,
+ ),
+ ),
+ );
+ // Ridge point.
+ scatterData.addRow(
+ this.makeScatterRow(
+ numColumns,
+ 0,
+ seriesIndex,
+ ridgePoint,
+ peakFlopRate,
+ this.makeRooflineTooltip(
+ rooflineName + ' Ridge Point',
+ ridgePoint,
+ peakFlopRate,
+ ),
+ ),
+ );
+ // Roofline after the ridge point.
+ scatterData.addRow(
+ this.makeScatterRow(
+ numColumns,
+ 0,
+ seriesIndex,
+ SCATTER_CHART_AXIS.maxX,
+ peakFlopRate,
+ this.makeRooflineTooltip(
+ 'Roofline',
+ SCATTER_CHART_AXIS.maxX,
+ peakFlopRate,
+ ),
+ ),
+ );
+ }
+
+ /** Callback function when filterUpdated in child is triggered */
+ updateDataTableOp(newFilters: google.visualization.DataTableCellFilter[]) {
+ this.processScatterDataOp(newFilters);
+ }
+
+ /** Callback function when filterUpdated in child is triggered */
+ updateDataTableProgram(
+ newFilters: google.visualization.DataTableCellFilter[],
+ ) {
+ this.processScatterDataProgram(newFilters);
+ }
+
+ /**
+ * Parse dataset for program level roofline scatter chart
+ * With series of data, operation scatter plot =
+ * rooflines (line) plot + program level step cluster(scatter) plot
+ */
+ processScatterDataProgram(
+ filters?: google.visualization.DataTableCellFilter[],
+ ) {
+ if (!this.dataTableProgram) {
+ return;
+ }
+ const filteredDataTableProgram = this.getFilteredDataTable(
+ this.dataTableProgram,
+ filters,
+ );
+ // TODO: update the programSeries based on data received
+ const programSeries = this.getProgramSeries();
+ // clear and recreate the scatter data
+ this.scatterDataProgram = new google.visualization.DataTable();
+ this.addScatterDataColumns(programSeries, this.scatterDataProgram);
+ this.addRooflinesSeriesRows(this.scatterDataProgram);
+ this.addProgramSeriesRows(programSeries, filteredDataTableProgram);
+ this.updateProgramScatterStyles(programSeries.length);
+ }
+
+ /**
+ * Parse dataset for operation level roofline scatter chart
+ * With series of data, operation scatter plot =
+ * rooflines (line) plot + op categoreis cluster(scatter) plot
+ */
+ processScatterDataOp(filters?: google.visualization.DataTableCellFilter[]) {
+ if (!this.dataTableOp) {
+ return;
+ }
+ const filteredDataTableOp = this.getFilteredDataTable(
+ this.dataTableOp,
+ filters,
+ );
+
+ const opCategories = this.getOpCategories(filteredDataTableOp);
+ const opSeries = this.getOpSeries(opCategories);
+
+ // clear the original scatter data
+ this.scatterDataOp = new google.visualization.DataTable();
+ this.addScatterDataColumns(opSeries, this.scatterDataOp);
+ this.addRooflinesSeriesRows(this.scatterDataOp);
+ this.addOpSeriesRows(opSeries, filteredDataTableOp);
+ this.updateOpScatterStyles(opSeries.length);
+ }
+
+ /**
+ * Helper function to get filtered DataTable given base op/proram DataTable,
+ * and feed to child component as source data for roofline scatter chart.
+ * Because scatter chart DataTable is in a different structure than the table
+ * chart DataTable.
+ * Filteres are passed from child filters.
+ */
+ getFilteredDataTable(
+ dataTable: google.visualization.DataTable,
+ filters?: google.visualization.DataTableCellFilter[],
+ ) {
+ // apply filters if any, filters are emitted from child component
+ // because the scatter dataTable is restructured and cannot be applied in
+ // child directly
+ let filteredDataTable: google.visualization.DataTable | null = null;
+ if (filters && filters.length > 0) {
+ const filteredDataView = new google.visualization.DataView(dataTable);
+ filteredDataView.setRows(dataTable.getFilteredRows(filters));
+ filteredDataTable = filteredDataView.toDataTable();
+ } else {
+ filteredDataTable = dataTable;
+ }
+ return filteredDataTable;
+ }
+
+ /**
+ * Helper function to get operation categories, with data filtered on
+ * "step == Total", the list is sorted by total_self_time in order to make the
+ * scatter chart style in consistent with the pie chart
+ */
+ getOpCategories(filteredDataTableOp: google.visualization.DataTable) {
+ const sortedOpCategories: string[] = [];
+
+ // sort the categories given frequency
+ const chartView = google.visualization.data.group(
+ filteredDataTableOp,
+ [filteredDataTableOp.getColumnIndex('category')],
+ [
+ {
+ 'column': filteredDataTableOp.getColumnIndex('total_self_time'),
+ 'aggregation': google.visualization.data.sum,
+ 'type': 'number',
+ },
+ ],
+ );
+ // sort categories on sum of total_self_time
+ chartView.sort({column: 1, desc: true});
+ for (let i = 0; i < chartView.getNumberOfRows(); ++i) {
+ const category = chartView.getValue(i, 0);
+ // Program will be appended separately
+ if (category !== 'Program') {
+ sortedOpCategories.push(category);
+ }
+ }
+ return sortedOpCategories;
+ }
+
+ /**
+ * The roofline chart consists of a seris of data (roofline series +
+ * appregation series)
+ * This helper function gets the roofline base series
+ */
+ getRooflineBaseSeries() {
+ let series: string[] = [];
+ if (this.deviceIndicators.isGpu) {
+ series = series.concat(['Shared Mem / L1 Roofline']);
+ } else {
+ if (this.deviceIndicators.hasMergedVmem) {
+ series = series.concat(['VMEM Read Roofline', 'VMEM Write Roofline']);
+ } else if (this.deviceIndicators.hasCmem) {
+ series = series.concat(['CMEM Read Roofline', 'CMEM Write Roofline']);
+ }
+ }
+ return [...series, 'HBM Roofline'];
+ }
+
+ /**
+ * #series = #roofline(line) + 4 program level aggregation series
+ */
+ getProgramSeries() {
+ let series: string[] = this.getRooflineBaseSeries();
+ series = series.concat(this.programLevelAgg);
+ return series;
+ }
+
+ /**
+ * #series = #roofline(line) + #operation level aggregation series
+ * (categories) + 2 'Program' datapoints
+ * The sereis list will decide the style of the scatter chart
+ */
+ getOpSeries(opCategories: string[]) {
+ let series: string[] = this.getRooflineBaseSeries();
+ // the first program is to make it's legend shows on top
+ // the second program is to show marker on top layer on the chart
+ series = series.concat(['Program', ...opCategories, 'Program']);
+ return series;
+ }
+
+ /**
+ * Helper function to add data rows for roofline plot - vmem, cmem, hbm
+ * generalized function for both op & program
+ */
+ addRooflinesSeriesRows(scatterData: google.visualization.DataTable) {
+ const rooflineInfo = this.deviceInfoArray.reduce(
+ (acc, item) => {
+ acc[item.id] = Number(item.value || 0);
+ return acc;
+ },
+ {} as {[key: string]: number},
+ );
+ let columnIndex = 1;
+
+ if (!this.deviceIndicators.isGpu) {
+ const addRooflinePairs = (memType: 'cmem' | 'vmem') => {
+ ['read', 'write'].forEach((opType) => {
+ this.addRoofline(
+ `${memType.toUpperCase()} ${opType.charAt(0).toUpperCase() + opType.slice(1)}`,
+ columnIndex,
+ rooflineInfo['peak_flop_rate'],
+ rooflineInfo[`peak_${memType}_${opType}_bw`],
+ rooflineInfo[`${memType}_${opType}_ridge_point`],
+ scatterData,
+ );
+ columnIndex += 2; // value col + tooltip col
+ });
+ };
+ if (this.deviceIndicators.hasMergedVmem) {
+ addRooflinePairs('vmem');
+ }
+ if (this.deviceIndicators.hasCmem) {
+ addRooflinePairs('cmem');
+ }
+ } else {
+ // Just use vmem_read for gpu SHM/L1
+ this.addRoofline(
+ 'Shared Mem / L1',
+ columnIndex,
+ rooflineInfo['peak_flop_rate'],
+ rooflineInfo['peak_vmem_write_bw'],
+ rooflineInfo['vmem_write_ridge_point'],
+ scatterData,
+ );
+ columnIndex += 2; // value col + tooltip col
+ }
+
+ this.addRoofline(
+ 'HBM',
+ columnIndex,
+ rooflineInfo['peak_flop_rate'],
+ rooflineInfo['peak_hbm_bw'],
+ rooflineInfo['hbm_ridge_point'],
+ scatterData,
+ );
+ }
+
+ /**
+ * Poluplate program level scatter chart data rows with series using filtered
+ * operation DataTable data
+ */
+ addProgramSeriesRows(
+ programSeries: string[],
+ filteredDataTableProgram: google.visualization.DataTable,
+ ) {
+ for (
+ let rowIndex = 0;
+ rowIndex < filteredDataTableProgram.getNumberOfRows();
+ ++rowIndex
+ ) {
+ let step = filteredDataTableProgram.getValue(
+ rowIndex,
+ filteredDataTableProgram.getColumnIndex('step'),
+ );
+ // Assgin 'Step' as value if the step field is numeric string
+ if (!this.programLevelAgg.includes(step)) {
+ step = 'Step';
+ }
+ const columnIndex = 1 + 2 * programSeries.lastIndexOf(step);
+ this.addSeriesRow(
+ filteredDataTableProgram,
+ this.scatterDataProgram!,
+ rowIndex,
+ columnIndex,
+ );
+ }
+ }
+
+ /**
+ * Poluplate operation level scatter chart data rows with series using
+ * filtered operation DataTable data
+ */
+ addOpSeriesRows(
+ opSeries: string[],
+ filteredDataTableOp: google.visualization.DataTable,
+ ) {
+ for (
+ let rowIndex = 0;
+ rowIndex < filteredDataTableOp.getNumberOfRows();
+ ++rowIndex
+ ) {
+ const category = filteredDataTableOp.getValue(
+ rowIndex,
+ filteredDataTableOp.getColumnIndex('category'),
+ );
+ const columnIndex = 1 + 2 * opSeries.lastIndexOf(category);
+ if (
+ columnIndex > 0 &&
+ filteredDataTableOp.getValue(
+ rowIndex,
+ filteredDataTableOp.getColumnIndex('bound_by'),
+ ) !== 'Unknown'
+ ) {
+ this.addSeriesRow(
+ filteredDataTableOp,
+ this.scatterDataOp!,
+ rowIndex,
+ columnIndex,
+ );
+ }
+ }
+ }
+
+ /** Make tooltip for rooflines series in the scatter chart */
+ makeRooflineTooltip(
+ rooflineName: string,
+ operationIntensity: number,
+ flopRate: number,
+ ) {
+ return (
+ '' +
+ '' +
+ rooflineName +
+ '
' +
+ 'Operational Intensity (FLOP/Byte): ' +
+ operationIntensity.toLocaleString(undefined, {maximumFractionDigits: 2}) +
+ '
' +
+ 'Flop Rate (GFLOP/s): ' +
+ flopRate.toLocaleString(undefined, {maximumFractionDigits: 2}) +
+ '
' +
+ '
'
+ );
+ }
+
+ /** Make tooltip for the clustered series (points) in the scatter chart */
+ makeTooltip(dataTable: google.visualization.DataTable, rowIndex: number) {
+ // Prepare column index to make easier access
+ const columns: {[columnKey: string]: number} = {};
+ for (let i = 0; i < dataTable.getNumberOfColumns(); i++) {
+ columns[dataTable.getColumnId(i)] = i;
+ }
+ // TODO(jihochoi): fix the utilization numbers for TPU V4.
+ // ' - Percent relative to optimal: '
+ // + (100 * dataTable.getValue(rowIndex,
+ // columns.roofline_efficiency)).toLocaleString(undefined, {maximumFractionDigits:2})
+ // + '%
' +
+ // ' - Percent relative to HW limit: '
+ // + (100 * dataTable.getValue(rowIndex,
+ // columns.compute_efficiency)).toLocaleString(undefined, {maximumFractionDigits:2})
+ // + '%
' +
+ // ' - Percent relative to HW limit: '
+ // + (100 * dataTable.getValue(rowIndex,
+ // columns.hbm_bw_utilization)).toLocaleString(undefined, {maximumFractionDigits:2})
+ // + '%
' +
+ const tooltipRows: TooltipRow[] = [
+ {
+ id: 'step',
+ label: 'Step',
+ },
+ {
+ id: 'rank',
+ label: 'Rank',
+ },
+ {
+ id: 'hlo_module_id',
+ label: 'Program ID',
+ },
+ {
+ id: 'category',
+ label: 'Category',
+ },
+ {
+ id: 'operation',
+ label: 'Operation',
+ operation: (val) => this.getOpName(val as string),
+ },
+ {
+ id: 'occurrences',
+ label: '# of Occurrences',
+ },
+ {
+ id: 'total_time_per_core',
+ label: 'Total Time per core (us)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 2}),
+ },
+ {
+ id: 'total_time_in_percentage',
+ label: 'Total Time / Program',
+ operation: (val) => `${100 * Number(Number(val).toFixed(4))}%`,
+ },
+ {
+ id: 'measured_flop_rate',
+ label: 'Normalized FLOP Rate (GFLOP/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'model_flop_rate',
+ label: 'Model FLOP Rate (GFLOP/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'hbm_bw',
+ label: 'HBM BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'cmem_read_bw',
+ label: 'CMEM Read BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'cmem_write_bw',
+ label: 'CMEM Write BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'vmem_read_bw',
+ label: 'VMEM Read BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'vmem_write_bw',
+ label: 'VMEM Write BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'operational_intensity',
+ label: 'Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'hbm_operational_intensity',
+ label: 'HBM Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'cmem_read_operational_intensity',
+ label: 'CMEM Read Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'cmem_write_operational_intensity',
+ label: 'CMEM Write Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'vmem_read_operational_intensity',
+ label: 'VMEM Read Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'vmem_write_operational_intensity',
+ label: 'VMEM Write Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'bottleneck_operational_intensity',
+ label: 'Bottleneck Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {id: 'boundy_by', label: 'Bound By'},
+ ];
+ const gpuTooltipRows: TooltipRow[] = [
+ {
+ id: 'step',
+ label: 'Step',
+ },
+ {
+ id: 'rank',
+ label: 'Rank',
+ },
+ {
+ id: 'hlo_module_id',
+ label: 'Program ID',
+ },
+ {
+ id: 'category',
+ label: 'Category',
+ },
+ {
+ id: 'operation',
+ label: 'Operation',
+ operation: (val) => this.getOpName(val as string),
+ },
+ {
+ id: 'occurrences',
+ label: '# of Occurrences',
+ },
+ {
+ id: 'total_time_per_core',
+ label: 'Total Time per gpu (us)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 2}),
+ },
+ {
+ id: 'total_time_in_percentage',
+ label: 'Total Time / Program',
+ operation: (val) => `${100 * Number(Number(val).toFixed(4))}%`,
+ },
+ {
+ id: 'measured_flop_rate',
+ label: 'Normalized FLOP Rate (GFLOP/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'model_flop_rate',
+ label: 'Model FLOP Rate (GFLOP/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'hbm_bw',
+ label: 'HBM BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'vmem_write_bw',
+ label: 'Shm/L1 BW (GiB/s)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'operational_intensity',
+ label: 'Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'hbm_operational_intensity',
+ label: 'HBM Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {
+ id: 'bottleneck_operational_intensity',
+ label: 'Bottleneck Operational Intensity (FLOP/Byte)',
+ operation: (val) =>
+ val.toLocaleString(undefined, {maximumFractionDigits: 4}),
+ },
+ {id: 'boundy_by', label: 'Bound By'},
+ ];
+
+ const tooltipBodyHtml = (
+ this.deviceIndicators.isGpu ? gpuTooltipRows : tooltipRows
+ ).reduce((acc: string, row: TooltipRow) => {
+ if (!columns.hasOwnProperty(row.id)) {
+ return acc;
+ }
+ const val: string | number = dataTable.getValue(
+ rowIndex,
+ columns[row.id],
+ );
+ acc += `${row.label}: ${
+ row.operation ? row.operation(val) : val
+ }
`;
+ return acc;
+ }, '');
+ return `${tooltipBodyHtml}
`;
+ }
+
+ // TODO(yinzz) remove the style updating dependency on the series order
+ // make it a k-v based format
+ formatRooflineSeriesStyle(
+ seriesIndex: number,
+ chartOptions: google.visualization.ScatterChartOptions,
+ ) {
+ if (this.deviceIndicators.isGpu) {
+ chartOptions.series[seriesIndex++] = ROOFLINE_STYLES.write;
+ } else {
+ if (this.deviceIndicators.hasMergedVmem) {
+ chartOptions.series[seriesIndex++] = ROOFLINE_STYLES.read;
+ chartOptions.series[seriesIndex++] = ROOFLINE_STYLES.write;
+ }
+ if (this.deviceIndicators.hasCmem) {
+ chartOptions.series[seriesIndex++] = ROOFLINE_STYLES.read;
+ chartOptions.series[seriesIndex++] = ROOFLINE_STYLES.write;
+ }
+ }
+ chartOptions.series[seriesIndex++] = ROOFLINE_STYLES.hbm;
+ return seriesIndex;
+ }
+
+ updateProgramScatterStyles(numSeries: number) {
+ let seriesIndex = 0;
+ seriesIndex = this.formatRooflineSeriesStyle(
+ seriesIndex,
+ this.scatterChartOptionsProgram,
+ );
+ for (; seriesIndex < numSeries; ++seriesIndex) {
+ this.scatterChartOptionsProgram.series[seriesIndex] = {pointSize: 4};
+ }
+ }
+
+ // TODO(yinzz) remove the style updating dependency on the series order
+ updateOpScatterStyles(numSeries: number) {
+ let seriesIndex = 0;
+ seriesIndex = this.formatRooflineSeriesStyle(
+ seriesIndex,
+ this.scatterChartOptionsOp,
+ );
+ // extra series style record for the Program legend
+ this.scatterChartOptionsOp.series[seriesIndex++] = {
+ pointSize: 20,
+ color: '#FF0000',
+ pointShape: 'star',
+ };
+ // Other ops are colored in the same order as in the pie chart, cmem, vmem,
+ // hbm, program
+ const numSeriesBeforeOps =
+ 2 * (this.deviceIndicators.hasCmem ? 1 : 0) +
+ 2 * (this.deviceIndicators.hasMergedVmem ? 1 : 0) +
+ 2;
+ for (; seriesIndex < numSeries - 1; ++seriesIndex) {
+ this.scatterChartOptionsOp.series[seriesIndex] = {
+ pointSize: 3,
+ // make sure the color of series matches the pie chart
+ color:
+ PIE_CHART_PALETTE[
+ (seriesIndex - numSeriesBeforeOps) % PIE_CHART_PALETTE.length
+ ],
+ };
+ }
+ // Real series for program which does not show in the legend.
+ // This is added at the end to make it plotted at the top and not buried by
+ // other op points.
+ this.scatterChartOptionsOp.series[numSeries - 1] = {
+ pointSize: 20,
+ color: '#FF0000',
+ pointShape: 'star',
+ visibleInLegend: false,
+ };
+ }
+
+ ngOnDestroy() {
+ setLoadingState(false, this.store);
+ this.destroyed.next();
+ this.destroyed.complete();
+ }
+}
diff --git a/frontend/app/components/roofline_model/roofline_model_module.ts b/frontend/app/components/roofline_model/roofline_model_module.ts
new file mode 100644
index 00000000..6e475789
--- /dev/null
+++ b/frontend/app/components/roofline_model/roofline_model_module.ts
@@ -0,0 +1,25 @@
+import {CommonModule} from '@angular/common';
+import {NgModule} from '@angular/core';
+import {TableModule} from 'org_xprof/frontend/app/components/chart/table/table_module';
+import {CategoryFilterModule} from 'org_xprof/frontend/app/components/controls/category_filter/category_filter_module';
+import {StringFilterModule} from 'org_xprof/frontend/app/components/controls/string_filter/string_filter_module';
+import {OperationLevelAnalysisModule} from 'org_xprof/frontend/app/components/roofline_model/operation_level_analysis/operation_level_analysis_module';
+import {ProgramLevelAnalysisModule} from 'org_xprof/frontend/app/components/roofline_model/program_level_analysis/program_level_analysis_module';
+
+import {RooflineModel} from './roofline_model';
+
+/** A roofline model module. */
+@NgModule({
+ declarations: [RooflineModel],
+ imports: [
+ CommonModule,
+ TableModule,
+ CategoryFilterModule,
+ StringFilterModule,
+ ProgramLevelAnalysisModule,
+ OperationLevelAnalysisModule,
+ ],
+ exports: [RooflineModel],
+})
+export class RooflineModelModule {
+}
diff --git a/plugin/tensorboard_plugin_profile/convert/BUILD b/plugin/tensorboard_plugin_profile/convert/BUILD
index 73de40fc..537617e6 100644
--- a/plugin/tensorboard_plugin_profile/convert/BUILD
+++ b/plugin/tensorboard_plugin_profile/convert/BUILD
@@ -97,6 +97,15 @@ py_library(
],
)
+py_library(
+ name = "roofline_model_proto_to_gviz",
+ srcs = ["roofline_model_proto_to_gviz.py"],
+ deps = [
+ requirement("gviz_api"),
+ "@org_xprof//plugin/tensorboard_plugin_profile/protobuf:protos_all_py_pb2",
+ ],
+)
+
py_test(
name = "overview_page_proto_to_gviz_test",
size = "small",
@@ -251,6 +260,7 @@ py_library(
":input_pipeline_proto_to_gviz",
":kernel_stats_proto_to_gviz",
":overview_page_proto_to_gviz",
+ ":roofline_model_proto_to_gviz",
":tf_data_stats_proto_to_gviz",
":tf_stats_proto_to_gviz",
":trace_events_json",
diff --git a/plugin/tensorboard_plugin_profile/convert/raw_to_tool_data.py b/plugin/tensorboard_plugin_profile/convert/raw_to_tool_data.py
index 57c0f790..836c407e 100644
--- a/plugin/tensorboard_plugin_profile/convert/raw_to_tool_data.py
+++ b/plugin/tensorboard_plugin_profile/convert/raw_to_tool_data.py
@@ -32,6 +32,7 @@
from tensorboard_plugin_profile.convert import input_pipeline_proto_to_gviz
from tensorboard_plugin_profile.convert import kernel_stats_proto_to_gviz
from tensorboard_plugin_profile.convert import overview_page_proto_to_gviz
+from tensorboard_plugin_profile.convert import roofline_model_proto_to_gviz
from tensorboard_plugin_profile.convert import tf_data_stats_proto_to_gviz
from tensorboard_plugin_profile.convert import tf_stats_proto_to_gviz
from tensorboard_plugin_profile.convert import trace_events_json
@@ -175,6 +176,10 @@ def xspace_to_tool_data(
raw_data, success = xspace_wrapper_func(xspace_paths, tool)
if success:
data = hlo_stats_proto_to_gviz.to_json(raw_data)
+ elif tool == 'roofline_model':
+ raw_data, success = xspace_wrapper_func(xspace_paths, tool)
+ if success:
+ data = roofline_model_proto_to_gviz.to_json(raw_data)
elif tool == 'graph_viewer':
options = params.get('graph_viewer_options', {})
raw_data, success = xspace_wrapper_func(xspace_paths, tool, options)
diff --git a/plugin/tensorboard_plugin_profile/convert/roofline_model_proto_to_gviz.py b/plugin/tensorboard_plugin_profile/convert/roofline_model_proto_to_gviz.py
new file mode 100644
index 00000000..91b7d1c5
--- /dev/null
+++ b/plugin/tensorboard_plugin_profile/convert/roofline_model_proto_to_gviz.py
@@ -0,0 +1,392 @@
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""For conversion of RooflineModel protos to GViz DataTables.
+
+Usage:
+ gviz_data_tables = generate_roofline_model_table(roofline_model_db)
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gviz_api
+
+from tensorboard_plugin_profile.protobuf import roofline_model_pb2
+
+
+def get_step_string(record_type, step_num):
+ match record_type:
+ case roofline_model_pb2.RecordType.INVALID_RECORD_TYPE:
+ return "Invalid"
+ case roofline_model_pb2.RecordType.ALL:
+ return "Total"
+ case roofline_model_pb2.RecordType.ALL_HW:
+ return "Total (HW)"
+ case roofline_model_pb2.RecordType.AVERAGE_STEP:
+ return "Average"
+ case roofline_model_pb2.RecordType.PER_STEP:
+ return step_num
+
+
+def gibi_to_giga(gibibytes):
+ return gibibytes * ((1 << 30) / 1.0e9)
+
+
+def ridge_point(peak_gigaflops_per_second, peak_gibibytes_per_second):
+ if peak_gibibytes_per_second == 0:
+ return 0.0
+ return peak_gigaflops_per_second / gibi_to_giga(peak_gibibytes_per_second)
+
+
+def get_roofline_model_table_args_for_gpu(roofline_model_db):
+ """Creates roofline model table args from a roofline model proto for gpu.
+
+ Args:
+ roofline_model_db: A RooflineModelDatabase proto.
+
+ Returns:
+ Returns table description(columns), data(rows) and custom properties.
+ """
+ table_description = [
+ ("step", "string", "Step"),
+ ("rank", "number", "Rank"),
+ ("category", "string", "Category"),
+ ("operation", "string", "Operation"),
+ ("occurrences", "number", "# Occurrences"),
+ ("total_time", "number", "Total Time (us)"),
+ ("avg_time", "number", "Avg. time (us)"),
+ ("total_self_time", "number", "Total self time (us)"),
+ ("avg_self_time", "number", "Avg. self time (us)"),
+ ("total_self_time_percent", "number", "Total self time (%)"),
+ (
+ "cumulative_total_self_time_percent",
+ "number",
+ "Cumulative total self time (%)",
+ ),
+ ("measured_flop_rate", "number", "Normalized FLOP Rate (GFLOP/s)"),
+ ("model_flop_rate", "number", "Model FLOP Rate (GFLOP/s)"),
+ ("measured_memory_bw", "number", "Memory BW (GiB/s)"),
+ ("hbm_bw", "number", "HBM BW (GiB/s)"),
+ # For nvidia gpu, currently no vmem_read_bw field, and
+ # vmem_write_bw is used for SHM/L1.
+ ("vmem_write_bw", "number", "SHM/L1 BW (GiB/s)"),
+ ("operational_intensity", "number", "Operational Intensity (FLOP/Byte)"),
+ (
+ "hbm_operational_intensity",
+ "number",
+ "HBM Operational Intensity (FLOP/Byte)",
+ ),
+ # for nvidia gpu, currently novmem_read_operational_intensity field, and
+ # vmem_write_operational_intensity used for SHM/L1.
+ (
+ "vmem_write_operational_intensity",
+ "number",
+ "SHM/L1 Operational Intensity (FLOP/Byte)",
+ ),
+ (
+ "bottleneck_operational_intensity",
+ "number",
+ "Bottleneck Operational Intensity (FLOP/Byte)",
+ ),
+ ("bound_by", "string", "Bound by"),
+ ("total_time_per_core", "number", "Total Time per core (us)"),
+ ("total_time_in_percentage", "number", "Total Time (%)"),
+ ("optimal_flop_rate", "number", "Optimal FLOP Rate (GFLOP/s)"),
+ ("roofline_efficiency", "number", "Roofline efficiency (%)"),
+ ("compute_efficiency", "number", "FLOP Rate / Peak (%)"),
+ (
+ "max_mem_bw_utilization",
+ "number",
+ "Max memory (cmem or hbm) bandwidth utilization (%)",
+ ),
+ ("include_infeed_outfeed", "boolean", "Include Infeed/Outfeed"),
+ ("hlo_module_id", "string", "Program ID"),
+ ]
+
+ data = []
+ for record in roofline_model_db.roofline_model_record:
+ row = [
+ get_step_string(record.record_type, record.step_num),
+ record.rank,
+ record.hlo_category,
+ record.hlo_name,
+ record.occurrences,
+ record.total_time_in_us,
+ record.avg_time_in_us,
+ record.total_self_time_in_us,
+ record.avg_self_time_in_us,
+ record.total_self_time_as_fraction,
+ record.cumulative_total_self_time_as_fraction,
+ record.measured_flop_rate,
+ record.model_flop_rate,
+ record.measured_memory_bw,
+ record.hbm_bw,
+ record.vmem_write_bw,
+ record.operational_intensity,
+ record.hbm_operational_intensity,
+ record.vmem_write_operational_intensity,
+ record.bottleneck_operational_intensity,
+ record.bound_by,
+ record.total_time_per_core_in_us,
+ record.total_time_in_percentage,
+ record.optimal_flop_rate,
+ record.roofline_efficiency,
+ record.flop_rate_relative_to_hw_limit,
+ record.memory_bw_relative_to_hw_limit,
+ record.include_infeed_outfeed,
+ record.hlo_module_id,
+ ]
+ data.append(row)
+ custom_properties = {
+ "device_type": roofline_model_db.device_type,
+ "has_cmem": roofline_model_db.has_cmem,
+ "has_merged_vmem": roofline_model_db.has_merged_vmem,
+ "peak_flop_rate": roofline_model_db.peak_flop_rate,
+ "peak_hbm_bw": roofline_model_db.peak_hbm_bw,
+ "peak_shml1_write_bw": roofline_model_db.peak_vmem_write_bw,
+ "hbm_ridge_point": ridge_point(
+ roofline_model_db.peak_flop_rate, roofline_model_db.peak_hbm_bw
+ ),
+ "shml1_write_ridge_point": ridge_point(
+ roofline_model_db.peak_flop_rate, roofline_model_db.peak_vmem_write_bw
+ ),
+ }
+ return (table_description, data, custom_properties)
+
+
+def get_roofline_model_table_args(roofline_model_db):
+ """Creates roofline model table args from a roofline model proto.
+
+ Args:
+ roofline_model_db: A RooflineModelDatabase proto.
+
+ Returns:
+ Returns table description(columns), data(rows) and custom properties.
+ """
+
+ table_description = [
+ ("step", "string", "Step"),
+ ("rank", "number", "Rank"),
+ ("category", "string", "Category"),
+ ("operation", "string", "Operation"),
+ ("occurrences", "number", "# Occurrences"),
+ ("total_time", "number", "Total Time (us)"),
+ ("avg_time", "number", "Avg. time (us)"),
+ ("total_self_time", "number", "Total self time (us)"),
+ ("avg_self_time", "number", "Avg. self time (us)"),
+ ("total_self_time_percent", "number", "Total self time (%)"),
+ (
+ "cumulative_total_self_time_percent",
+ "number",
+ "Cumulative total self time (%)",
+ ),
+ ("dma_stall_percent", "number", "%time stalled by DMA"),
+ ("measured_flop_rate", "number", "Normalized FLOP Rate (GFLOP/s)"),
+ ("model_flop_rate", "number", "Model FLOP Rate (GFLOP/s)"),
+ ("measured_memory_bw", "number", "Memory BW (GiB/s)"),
+ ("hbm_bw", "number", "HBM BW (GiB/s)"),
+ ("cmem_read_bw", "number", "CMEM Read BW (GiB/s)"),
+ ("cmem_write_bw", "number", "CMEM Write BW (GiB/s)"),
+ ("vmem_read_bw", "number", "VMEM Read BW (GiB/s)"),
+ ("vmem_write_bw", "number", "VMEM Write BW (GiB/s)"),
+ ("operational_intensity", "number", "Operational Intensity (FLOP/Byte)"),
+ (
+ "hbm_operational_intensity",
+ "number",
+ "HBM Operational Intensity (FLOP/Byte)",
+ ),
+ (
+ "cmem_read_operational_intensity",
+ "number",
+ "CMEM Read Operational Intensity (FLOP/Byte)",
+ ),
+ (
+ "cmem_write_operational_intensity",
+ "number",
+ "CMEM Write Operational Intensity (FLOP/Byte)",
+ ),
+ (
+ "vmem_read_operational_intensity",
+ "number",
+ "VMEM Read Operational Intensity (FLOP/Byte)",
+ ),
+ (
+ "vmem_write_operational_intensity",
+ "number",
+ "VMEM Write Operational Intensity (FLOP/Byte)",
+ ),
+ (
+ "bottleneck_operational_intensity",
+ "number",
+ "Bottleneck Operational Intensity (FLOP/Byte)",
+ ),
+ ("bound_by", "string", "Bound by"),
+ ("total_time_per_core", "number", "Total Time per core (us)"),
+ ("total_time_in_percentage", "number", "Total Time (%)"),
+ ("optimal_flop_rate", "number", "Optimal FLOP Rate (GFLOP/s)"),
+ ("roofline_efficiency", "number", "Roofline efficiency (%)"),
+ ("compute_efficiency", "number", "FLOP Rate / Peak (%)"),
+ (
+ "max_mem_bw_utilization",
+ "number",
+ "Max memory (cmem or hbm) bandwidth utilization (%)",
+ ),
+ ("include_infeed_outfeed", "boolean", "Include Infeed/Outfeed"),
+ ("hlo_module_id", "string", "Program ID"),
+ ]
+
+ data = []
+ for record in roofline_model_db.roofline_model_record:
+ row = [
+ get_step_string(record.record_type, record.step_num),
+ record.rank,
+ record.hlo_category,
+ record.hlo_name,
+ record.occurrences,
+ record.total_time_in_us,
+ record.avg_time_in_us,
+ record.total_self_time_in_us,
+ record.avg_self_time_in_us,
+ record.total_self_time_as_fraction,
+ record.cumulative_total_self_time_as_fraction,
+ record.dma_stall_fraction,
+ record.measured_flop_rate,
+ record.model_flop_rate,
+ record.measured_memory_bw,
+ record.hbm_bw,
+ record.cmem_read_bw,
+ record.cmem_write_bw,
+ record.vmem_read_bw,
+ record.vmem_write_bw,
+ record.operational_intensity,
+ record.hbm_operational_intensity,
+ record.cmem_read_operational_intensity,
+ record.cmem_write_operational_intensity,
+ record.vmem_read_operational_intensity,
+ record.vmem_write_operational_intensity,
+ record.bottleneck_operational_intensity,
+ record.bound_by,
+ record.total_time_per_core_in_us,
+ record.total_time_in_percentage,
+ record.optimal_flop_rate,
+ record.roofline_efficiency,
+ record.flop_rate_relative_to_hw_limit,
+ record.memory_bw_relative_to_hw_limit,
+ record.include_infeed_outfeed,
+ record.hlo_module_id,
+ ]
+ data.append(row)
+ custom_properties = {
+ "device_type": roofline_model_db.device_type,
+ "megacore": str(int(roofline_model_db.megacore)),
+ "has_cmem": str(int(roofline_model_db.has_cmem)),
+ "has_merged_vmem": str(int(roofline_model_db.has_merged_vmem)),
+ "peak_flop_rate": str(roofline_model_db.peak_flop_rate),
+ "peak_hbm_bw": str(roofline_model_db.peak_hbm_bw),
+ "peak_cmem_read_bw": str(roofline_model_db.peak_cmem_read_bw),
+ "peak_cmem_write_bw": str(roofline_model_db.peak_cmem_write_bw),
+ "peak_vmem_read_bw": str(roofline_model_db.peak_vmem_read_bw),
+ "peak_vmem_write_bw": str(roofline_model_db.peak_vmem_write_bw),
+ "hbm_ridge_point": str(
+ ridge_point(
+ roofline_model_db.peak_flop_rate, roofline_model_db.peak_hbm_bw
+ )
+ ),
+ "cmem_read_ridge_point": str(
+ ridge_point(
+ roofline_model_db.peak_flop_rate,
+ roofline_model_db.peak_cmem_read_bw,
+ )
+ ),
+ "cmem_write_ridge_point": str(
+ ridge_point(
+ roofline_model_db.peak_flop_rate,
+ roofline_model_db.peak_cmem_write_bw,
+ )
+ ),
+ "vmem_read_ridge_point": str(
+ ridge_point(
+ roofline_model_db.peak_flop_rate,
+ roofline_model_db.peak_vmem_read_bw,
+ )
+ ),
+ "vmem_write_ridge_point": str(
+ ridge_point(
+ roofline_model_db.peak_flop_rate,
+ roofline_model_db.peak_vmem_write_bw,
+ )
+ ),
+ }
+
+ return (table_description, data, custom_properties)
+
+
+def generate_roofline_model_table(roofline_model_db):
+ """Creates roofline model table from a list of roofline model protos.
+
+ Args:
+ roofline_model_db: a RooflineModelDatabase proto.
+
+ included and one without..
+
+ Returns:
+ Returns a gviz_api.DataTable
+ """
+ device_type_str = roofline_model_db.device_type
+ if "GPU" not in device_type_str:
+ table_description, data, custom_properties = get_roofline_model_table_args(
+ roofline_model_db
+ )
+ else:
+ table_description, data, custom_properties = (
+ get_roofline_model_table_args_for_gpu(roofline_model_db)
+ )
+
+ return gviz_api.DataTable(table_description, data, custom_properties)
+
+
+def get_diagnostics_table_args(roofline_model_db):
+ """Creates diagnostics table from a roofline model proto."""
+ table_description = [
+ ("severity", "string", "Severity"),
+ ("message", "string", "Message"),
+ ]
+ data = []
+ for info in roofline_model_db.diagnostics.info:
+ data.append(["INFO", info])
+ for warning in roofline_model_db.diagnostics.warnings:
+ data.append(["WARNING", warning])
+ for error in roofline_model_db.diagnostics.errors:
+ data.append(["ERROR", error])
+ return (table_description, data, {})
+
+
+def generate_diagnostics_table(roofline_model_db):
+ table_description, data, custom_properties = get_diagnostics_table_args(
+ roofline_model_db
+ )
+ return gviz_api.DataTable(table_description, data, custom_properties)
+
+
+def to_json(raw_data):
+ """Converts a serialized HloStatsDb string to json."""
+ roofline_model_db = roofline_model_pb2.RooflineModelDatabase()
+ roofline_model_db.ParseFromString(raw_data)
+ roofline_model_table = generate_roofline_model_table(
+ roofline_model_db
+ ).ToJSon()
+ diagnostics_table = generate_diagnostics_table(roofline_model_db).ToJSon()
+ return "[" + roofline_model_table + "," + diagnostics_table + "]"
diff --git a/plugin/tensorboard_plugin_profile/profile_plugin.py b/plugin/tensorboard_plugin_profile/profile_plugin.py
index 0516c038..409010f0 100644
--- a/plugin/tensorboard_plugin_profile/profile_plugin.py
+++ b/plugin/tensorboard_plugin_profile/profile_plugin.py
@@ -111,6 +111,7 @@
'tf_data_bottleneck_analysis^',
'op_profile^',
'hlo_stats^',
+ 'roofline_model^',
]
# XPlane generated tools that support all host mode.
diff --git a/plugin/tensorboard_plugin_profile/protobuf/BUILD b/plugin/tensorboard_plugin_profile/protobuf/BUILD
index c13a65a0..1c79823a 100644
--- a/plugin/tensorboard_plugin_profile/protobuf/BUILD
+++ b/plugin/tensorboard_plugin_profile/protobuf/BUILD
@@ -18,6 +18,7 @@ proto_library(
"kernel_stats.proto",
"overview_page.proto",
"power_metrics.proto",
+ "roofline_model.proto",
"tf_data_stats.proto",
"tf_stats.proto",
"tpu_input_pipeline.proto",
@@ -37,6 +38,7 @@ py_proto_library(
"kernel_stats.proto",
"overview_page.proto",
"power_metrics.proto",
+ "roofline_model.proto",
"tf_data_stats.proto",
"tf_stats.proto",
"tpu_input_pipeline.proto",
diff --git a/plugin/tensorboard_plugin_profile/protobuf/roofline_model.proto b/plugin/tensorboard_plugin_profile/protobuf/roofline_model.proto
new file mode 100644
index 00000000..6a7bfbe7
--- /dev/null
+++ b/plugin/tensorboard_plugin_profile/protobuf/roofline_model.proto
@@ -0,0 +1,196 @@
+// This proto describes the format of the output profile file from
+// the Roofline Model tool.
+syntax = "proto2";
+
+package tensorflow.profiler.roofline_model;
+
+import "plugin/tensorboard_plugin_profile/protobuf/diagnostics.proto";
+
+// The record type which describes the scope this record captures.
+enum RecordType {
+ INVALID_RECORD_TYPE = 0;
+
+ // Captures the entire profiling duration including incomplete steps.
+ ALL = 1;
+
+ // Captures the average of all complete steps.
+ AVERAGE_STEP = 2;
+
+ // Captures a single step.
+ PER_STEP = 3;
+
+ // Same as ALL but the performance metrics (FLOPS and memory bandwidth) are
+ // derived from the hardware performance conuters.
+ ALL_HW = 4;
+}
+
+// A database of RooflineModel records.
+message RooflineModelDatabase {
+ // The device type.
+ optional string device_type = 1;
+
+ // Whether megacore is used.
+ optional bool megacore = 12;
+
+ // Whether the device has shared CMEM.
+ optional bool has_cmem = 8;
+
+ // Whether the device has merged VMEM.
+ optional bool has_merged_vmem = 15;
+
+ // Peak flop rate in GFLOP/s.
+ optional double peak_flop_rate = 2;
+
+ // Peak HBM bandwidth in GiB/s
+ optional double peak_hbm_bw = 9;
+
+ // Peak CMEM read bandwidth in GiB/s
+ optional double peak_cmem_read_bw = 10;
+
+ // Peak CMEM write bandwidth in GiB/s
+ optional double peak_cmem_write_bw = 11;
+
+ // Peak VMEM read bandwidth in GiB/s
+ optional double peak_vmem_read_bw = 13;
+
+ // Peak VMEM write bandwidth in GiB/s
+ optional double peak_vmem_write_bw = 14;
+
+ // All RooflineModel records, one for each HLO operation.
+ repeated RooflineModelRecord roofline_model_record = 5;
+
+ // Error and warning messages for diagnosing profiling issues.
+ optional tensorflow.profiler.Diagnostics diagnostics = 7;
+
+ reserved 3, 4, 6;
+}
+
+// There is one RooflineModelRecord for each HLO operation profiled.
+// Next ID: 43
+message RooflineModelRecord {
+ // The record type.
+ optional RecordType record_type = 18;
+
+ // Step number when record type is PER_STEP. Otherwise, invalid.
+ optional uint32 step_num = 19;
+
+ // The rank by self time
+ optional uint64 rank = 1;
+
+ // The hlo module id of the op
+ optional uint64 hlo_module_id = 35;
+
+ // The HLO category name.
+ optional string hlo_category = 17;
+
+ // The HLO operation name.
+ optional string hlo_name = 2;
+
+ // Number of occurrences of the operation.
+ optional int64 occurrences = 3;
+
+ // Total "accumulated" time in micro-seconds that the operation
+ // took. If this operation has any children operations,
+ // the "accumulated" time includes the time spent inside children.
+ optional double total_time_in_us = 4;
+
+ // Total time per core in micro-seconds.
+ optional double total_time_per_core_in_us = 20;
+
+ // Total time as fraction of the total program time.
+ optional double total_time_in_percentage = 21;
+
+ // Average "accumulated" time in micro-seconds that each
+ // occurrence of the operation took.
+ optional double avg_time_in_us = 5;
+
+ // Total "self" time in micro-seconds that the operation took.
+ // If this operation has any children operations, the "self" time
+ // doesn't include the time spent inside children.
+ optional double total_self_time_in_us = 6;
+
+ // Average "self" time in micro-seconds that the operation took.
+ optional double avg_self_time_in_us = 7;
+
+ // Percentage of the total "accumulated" time that was caused by
+ // DMA stall.
+ optional double total_self_time_as_fraction = 8;
+ optional double cumulative_total_self_time_as_fraction = 9;
+ optional double dma_stall_fraction = 10;
+
+ // Number of total floating-point operations (FLOPs) performed per second
+ // normalized to the bf16 peak performance.
+ optional double measured_flop_rate = 13;
+
+ // Numbef or total floating point operations (FLOPs) performed per second for
+ // the op.
+ optional double model_flop_rate = 38;
+
+ // Number of total bytes (including both read and write) accessed per
+ // second.
+ optional double measured_memory_bw = 14;
+
+ // HBM bandwidth in GiB/s (including both read and write).
+ optional double hbm_bw = 27;
+
+ // CMEM read bandwidth in GiB/s.
+ optional double cmem_read_bw = 28;
+
+ // CMEM write bandwidth in GiB/s.
+ optional double cmem_write_bw = 29;
+
+ // VMEM read bandwidth in GiB/s.
+ optional double vmem_read_bw = 39;
+
+ // VMEM write bandwidth in GiB/s.
+ optional double vmem_write_bw = 40;
+
+ // Overall operational intensity in FLOP/Byte.
+ optional double operational_intensity = 15;
+
+ // Operational intensity based on HBM in FLOP/Byte.
+ optional double hbm_operational_intensity = 30;
+
+ // Operational intensity based on CMEM read in FLOP/Byte.
+ optional double cmem_read_operational_intensity = 31;
+
+ // Operational intensity based on CMEM write in FLOP/Byte.
+ optional double cmem_write_operational_intensity = 32;
+
+ // Operational intensity based on VMEM read in FLOP/Byte.
+ optional double vmem_read_operational_intensity = 41;
+
+ // Operational intensity based on VMEM write in FLOP/Byte.
+ optional double vmem_write_operational_intensity = 42;
+
+ // Operational intensity based on the bottleneck resource in FLOP/Byte.
+ optional double bottleneck_operational_intensity = 33;
+
+ // Whether this operation is "Compute", "HBM", "CMEM Read", "CMEM Write"
+ // bound, according to the Roofline Model.
+ optional string bound_by = 16;
+
+ // The optimal flop rate calculated as
+ // (operational intensity) * (peak memory bw)
+ optional double optimal_flop_rate = 22;
+
+ // Roofline efficiency.
+ optional double roofline_efficiency = 34;
+
+ // Percentage of measured flop rate relative to the hardware limit.
+ optional double flop_rate_relative_to_hw_limit = 24;
+
+ // Percentage of measured memory bandwidth relative to the hardware limit.
+ optional double memory_bw_relative_to_hw_limit = 25;
+
+ // Whether the record is calculated including infeed and outfeed ops.
+ optional bool include_infeed_outfeed = 26;
+
+ // Flops for the record
+ optional uint64 flops = 36;
+
+ // Bytes accessed for the record
+ optional uint64 bytes_accessed = 37;
+
+ reserved 11, 12, 23;
+}