From 8c891373e95f5b40d07d30db6f0935d3d64980ef Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Mon, 13 May 2024 06:41:10 -0700 Subject: [PATCH 01/23] check size of huge pages before changing number (#315) --- .../resources/collector_reports.yaml.tmpl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index dbd028a..6cd66f2 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -415,10 +415,15 @@ commands: - label: Memory MLC Loaded Latency Test command: |- # measure memory loaded latency + # need at least 2 GB (2,097,152 KB) of huge pages per NUMA node + min_kb=2097152 numa_nodes=$( lscpu | grep "NUMA node(s):" | awk '{print $3}' ) + size_huge_pages_kb=$( cat /proc/meminfo | grep Hugepagesize | awk '{print $2}' ) orig_num_huge_pages=$( cat /proc/sys/vm/nr_hugepages ) - new_num_huge_pages=$( echo "$numa_nodes * 1000" | bc ) - echo $new_num_huge_pages > /proc/sys/vm/nr_hugepages + needed_num_huge_pages=$( echo "$numa_nodes * $min_kb / $size_huge_pages_kb" | bc ) + if [ $needed_num_huge_pages -gt $orig_num_huge_pages ]; then + echo $needed_num_huge_pages > /proc/sys/vm/nr_hugepages + fi mlc --loaded_latency echo $orig_num_huge_pages > /proc/sys/vm/nr_hugepages modprobe: msr @@ -426,10 +431,15 @@ commands: - label: Memory MLC Bandwidth command: |- # measure memory bandwidth matrix + # need at least 2 GB (2,097,152 KB) of huge pages per NUMA node + min_kb=2097152 numa_nodes=$( lscpu | grep "NUMA node(s):" | awk '{print $3}' ) + size_huge_pages_kb=$( cat /proc/meminfo | grep Hugepagesize | awk '{print $2}' ) orig_num_huge_pages=$( cat /proc/sys/vm/nr_hugepages ) - new_num_huge_pages=$( echo "$numa_nodes * 1000" | bc ) - echo $new_num_huge_pages > /proc/sys/vm/nr_hugepages + needed_num_huge_pages=$( echo "$numa_nodes * $min_kb / $size_huge_pages_kb" | bc ) + if [ $needed_num_huge_pages -gt $orig_num_huge_pages ]; then + echo $needed_num_huge_pages > /proc/sys/vm/nr_hugepages + fi mlc --bandwidth_matrix echo $orig_num_huge_pages > /proc/sys/vm/nr_hugepages modprobe: msr From e9f0d94f0167fb974de60343a40138ec9cf743d7 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Mon, 13 May 2024 06:41:26 -0700 Subject: [PATCH 02/23] Reduce open files required by turbostat by limiting output columns (#316) --- cmd/orchestrator/resources/collector_reports.yaml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index 6cd66f2..4e36018 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -461,7 +461,7 @@ commands: - label: CPU Turbo Test command: |- # measure tdp and all-core turbo frequency - ((turbostat -i 2 2>/dev/null &) ; stress-ng --cpu 1 -t 20s 2>&1 ; stress-ng --cpu 0 -t 60s 2>&1 ; pkill -9 -f turbostat) | awk '$0~"stress" {print $0} $1=="Package" || $1=="CPU" || $1=="Core" || $1=="Node" {if(f!=1) print $0;f=1} $1=="-" {print $0}' + ((turbostat --show 'Package','Core','Bzy_MHz','PkgWatt','PkgTmp' -i 2 &) ; stress-ng --cpu 1 -t 20s 2>&1 ; stress-ng --cpu 0 -t 60s 2>&1 ; pkill -9 -f turbostat) | awk '$0~"stress" {print $0} $1=="Package" || $1=="CPU" || $1=="Core" || $1=="Node" {if(f!=1) print $0;f=1} $1=="-" {print $0}' superuser: true modprobe: msr - label: CPU Idle From 1c320d15bf70cbfc3c20801a7ab19c8345ad0d65 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 06:41:48 -0700 Subject: [PATCH 03/23] build(deps): bump golang.org/x/term in /internal/progress (#314) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.19.0 to 0.20.0. - [Commits](https://github.com/golang/term/compare/v0.19.0...v0.20.0) --- internal/progress/go.mod | 4 ++-- internal/progress/go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/progress/go.mod b/internal/progress/go.mod index 99b9e35..58239bb 100644 --- a/internal/progress/go.mod +++ b/internal/progress/go.mod @@ -2,6 +2,6 @@ module github.com/svr-info/internal/progress go 1.22 -require golang.org/x/term v0.19.0 +require golang.org/x/term v0.20.0 -require golang.org/x/sys v0.19.0 // indirect +require golang.org/x/sys v0.20.0 // indirect diff --git a/internal/progress/go.sum b/internal/progress/go.sum index 54492a6..eb6a443 100644 --- a/internal/progress/go.sum +++ b/internal/progress/go.sum @@ -1,4 +1,4 @@ -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= From b78e38773741199cc00a3a2a294d451401caaf02 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 06:42:05 -0700 Subject: [PATCH 04/23] build(deps): bump golang.org/x/term from 0.19.0 to 0.20.0 (#313) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.19.0 to 0.20.0. - [Commits](https://github.com/golang/term/compare/v0.19.0...v0.20.0) --- go.mod | 4 ++-- go.sum | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 60ea59f..4fd9a18 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/intel/svr-info/internal/util v0.0.0-00010101000000-000000000000 github.com/xuri/excelize/v2 v2.8.1 golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 - golang.org/x/term v0.19.0 + golang.org/x/term v0.20.0 golang.org/x/text v0.14.0 gopkg.in/yaml.v2 v2.4.0 ) @@ -68,7 +68,7 @@ require ( golang.org/x/mod v0.17.0 // indirect golang.org/x/net v0.24.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.19.0 // indirect + golang.org/x/sys v0.20.0 // indirect golang.org/x/tools v0.20.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/go.sum b/go.sum index 388c583..6c2762c 100644 --- a/go.sum +++ b/go.sum @@ -40,7 +40,6 @@ github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+ github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= -github.com/go-git/go-git/v5 v5.11.0/go.mod h1:6GFcX2P3NM7FPBfpePbpLd21XxsgdAt+lKqXmCUiUCY= github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -79,8 +78,6 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= -github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -151,15 +148,15 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From 13218028e9c89e25f50c1327d283594799dbd742 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 06:43:38 -0700 Subject: [PATCH 05/23] build(deps): bump golang.org/x/text from 0.14.0 to 0.15.0 (#312) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.14.0 to 0.15.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.14.0...v0.15.0) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4fd9a18..892031c 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/xuri/excelize/v2 v2.8.1 golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 golang.org/x/term v0.20.0 - golang.org/x/text v0.14.0 + golang.org/x/text v0.15.0 gopkg.in/yaml.v2 v2.4.0 ) diff --git a/go.sum b/go.sum index 6c2762c..13d5559 100644 --- a/go.sum +++ b/go.sum @@ -164,8 +164,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= From 1730774a8556ba0d9978ce78d16e60531447a281 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Wed, 15 May 2024 15:37:45 -0700 Subject: [PATCH 06/23] check for zero counter value when check for tma support (#317) --- cmd/pmu2metrics/metadata.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cmd/pmu2metrics/metadata.go b/cmd/pmu2metrics/metadata.go index 3cab212..07d4b09 100644 --- a/cmd/pmu2metrics/metadata.go +++ b/cmd/pmu2metrics/metadata.go @@ -307,7 +307,7 @@ func getTMASupported(perfPath string) (supported bool, output string, err error) err = nil return } - // event values being equal is 2nd indication that these events are not (properly) supported + // event values being zero or equal to each other is 2nd indication that these events are not (properly) supported output = errBuffer.String() vals := make(map[string]float64) lines := strings.Split(output, "\n") @@ -318,11 +318,14 @@ func getTMASupported(perfPath string) (supported bool, output string, err error) if match != nil { vals[match[2]], err = strconv.ParseFloat(match[1], 64) if err != nil { - return + // this should never happen + panic("failed to parse float") } } } - supported = !(vals["TOPDOWN.SLOTS"] == vals["PERF_METRICS.BAD_SPECULATION"]) + topDownSlots := vals["TOPDOWN.SLOTS"] + badSpeculation := vals["PERF_METRICS.BAD_SPECULATION"] + supported = topDownSlots != badSpeculation && topDownSlots != 0 && badSpeculation != 0 return } From b0e3190f4a050d6405f9bf2228364ec3ed77a626 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Wed, 15 May 2024 15:38:03 -0700 Subject: [PATCH 07/23] get PCIe address and NUMA node for NVME drives (#318) --- .../resources/collector_reports.yaml.tmpl | 26 ++++++++++++-- cmd/reporter/report_tables.go | 35 +++++++++++-------- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index 4e36018..bec0d91 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -60,8 +60,30 @@ commands: - label: maximum frequency command: cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq parallel: true - - label: lsblk -r -o - command: lsblk -r -o NAME,MODEL,SIZE,MOUNTPOINT,FSTYPE,RQ-SIZE,MIN-IO -e7 -e1 + - label: disk info + command: |- + echo "NAME|MODEL|SIZE|MOUNTPOINT|FSTYPE|RQ-SIZE|MIN-IO|FIRMWARE|ADDR|NUMA" + lsblk -r -o NAME,MODEL,SIZE,MOUNTPOINT,FSTYPE,RQ-SIZE,MIN-IO -e7 -e1 \ + | cut -d' ' -f1,2,3,4,5,6,7 --output-delimiter='|' \ + | while IFS='|' read -r name model size mountpoint fstype rqsize minio ; + do + fw="" + numa="" + addr="" + if [ "$name" = "NAME" ] ; then + continue + fi + if [ -f /sys/block/"$name"/device/firmware_rev ] ; then + fw=$( cat /sys/block/"$name"/device/firmware_rev ) + fi + if [ -f /sys/block/"$name"/device/numa_node ] ; then + numa=$( cat /sys/block/"$name"/device/numa_node ) + fi + if [ -f /sys/block/"$name"/device/address ] ; then + addr=$( cat /sys/block/"$name"/device/address ) + fi + echo "$name|$model|$size|$mountpoint|$fstype|$rqsize|$minio|$fw|$addr|$numa" + done parallel: true - label: df -h command: df -h diff --git a/cmd/reporter/report_tables.go b/cmd/reporter/report_tables.go index d4b7f29..85b7d4d 100644 --- a/cmd/reporter/report_tables.go +++ b/cmd/reporter/report_tables.go @@ -1442,31 +1442,34 @@ func newDiskTable(sources []*Source, category TableCategory) (table *Table) { Category: category, AllHostValues: []HostValues{}, } + var infoFields = []string{"NAME", "MODEL", "SIZE", "MOUNTPOINT", "FSTYPE", "RQ-SIZE", "MIN-IO", "FIRMWARE", "ADDR", "NUMA"} for _, source := range sources { var hostValues = HostValues{ Name: source.getHostname(), ValueNames: []string{ - "NAME", - "MODEL", - "SIZE", - "MOUNTPOINT", - "FSTYPE", - "RQ-SIZE", - "MIN-IO", - "FwRev", + "Name", + "Model", + "Size", + "Mount Point", + "Type", + "Request Queue Size", + "Minimum I/O Size", + "Firmware Version", + "PCIe Address", + "NUMA Node", }, Values: [][]string{}, } - for i, line := range source.getCommandOutputLines("lsblk -r -o") { - fields := strings.Split(line, " ") - if len(fields) != len(hostValues.ValueNames)-1 { - log.Printf("lsblk field count mismatch: %s", strings.Join(fields, ",")) + for i, line := range source.getCommandOutputLines("disk info") { + fields := strings.Split(line, "|") + if len(fields) != len(infoFields) { + log.Printf("field count mismatch: %s", strings.Join(fields, ",")) continue } if i == 0 { // headers are in the first line for idx, field := range fields { - if field != hostValues.ValueNames[idx] { - log.Printf("lsblk field name mismatch: %s", strings.Join(fields, ",")) + if field != infoFields[idx] { + log.Printf("field name mismatch: %s", strings.Join(fields, ",")) break } } @@ -1475,7 +1478,9 @@ func newDiskTable(sources []*Source, category TableCategory) (table *Table) { // clean up the model name fields[1] = strings.ReplaceAll(fields[1], `\x20`, " ") fields[1] = strings.TrimSpace(fields[1]) - fields = append(fields, source.getDiskFwRev(fields[0])) + if fields[7] == "" { + fields[7] = source.getDiskFwRev(fields[0]) + } hostValues.Values = append(hostValues.Values, fields) } table.AllHostValues = append(table.AllHostValues, hostValues) From ceaec76ea47b498a8cd80d49d062318533aa7117 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Thu, 16 May 2024 13:27:05 -0700 Subject: [PATCH 08/23] bring build instructions up to date in README (#319) --- README.md | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0e80229..a42a858 100644 --- a/README.md +++ b/README.md @@ -50,19 +50,13 @@ For example, Intel® Memory Latency Checker can be downloaded from here: [MLC](h We welcome bug reports, questions and feature requests. Please submit via Github Issues. ## Building svr-info Due to the large number of build dependencies required, a Docker container-based build environment is provided. Assuming your system has Docker installed (instructions not provided here), the following steps are required to build svr-info: -- `builder/build_docker_image` creates the docker image -- `builder/build` runs `make dist` in the container +- `builder/build` creates the necessary docker images and runs make in the container After a successful build, you will find the build output in the `dist` folder. -Other builder commands available: -- `builder/test` runs the automated tests in the container via `make test` -- `builder/shell` starts the container and provides a bash prompt useful for debugging build problems ### Incremental Builds -After a complete build using the build container, you can perform incremental builds directly on your host assuming dependencies are installed there. This can make the code/build/test cycle much quicker than rebuilding everything using the Docker container. You can look at the Dockerfile in the builder directory to get the build dependencies for everything or, more likely, you only need go(lang) so install the latest and get to work. +After a complete build using the build container, you can perform incremental builds directly on your host assuming dependencies are installed there. This can make the code/build/test cycle much quicker than rebuilding everything using the Docker container. -From the project's root directory, you can use the makefile. There are quite a few targets. Most useful may be `make apps`. This will build all the go-based apps. - -If you are working on a single go-based app. You can run `go build` in the app's source directory to build it. +If you are working on a single go-based app. You can run `go build` to build it. ### Including Additional Collection Tools In The Build Additional data collection tools can be built into the svr-info distribution by placing binaries in the bin directory before starting the build. From df0d1a7c980969ab75bb1b8e6e100acd1a072623 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Fri, 24 May 2024 11:33:54 -0700 Subject: [PATCH 09/23] add Processor TDP to Mktg. Claim Table (#321) --- cmd/reporter/report_tables.go | 12 ++++++++---- cmd/reporter/source.go | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cmd/reporter/report_tables.go b/cmd/reporter/report_tables.go index 85b7d4d..236b2ff 100644 --- a/cmd/reporter/report_tables.go +++ b/cmd/reporter/report_tables.go @@ -40,9 +40,9 @@ func newMarketingClaimTable(fullReport *Report, tableNicSummary *Table, tableDis Category: category, AllHostValues: []HostValues{}, } - // BASELINE: 1-node, 2x Intel® Xeon® , xx cores, HT On/Off?, Turbo On/Off?, NUMA xxx, Integrated Accelerators Available [used]: xxx, Total Memory xxx GB (xx slots/ xx GB/ xxxx MHz [run @ xxxx MHz] ), , , , . Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of . - template := "1-node, %sx %s, %s cores, HT %s, Turbo %s, NUMA %s, Integrated Accelerators Available [used]: %s, Total Memory %s, BIOS %s, microcode %s, %s, %s, %s, %s. Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of %s." - var date, socketCount, cpuModel, coreCount, htOnOff, turboOnOff, numaNodes, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion string + // BASELINE: 1-node, 2x Intel® Xeon® , xx cores, 100W TDP, HT On/Off?, Turbo On/Off?, NUMA xxx, Integrated Accelerators Available [used]: xxx, Total Memory xxx GB (xx slots/ xx GB/ xxxx MHz [run @ xxxx MHz] ), , , , . Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of . + template := "1-node, %sx %s, %s cores, %s TDP, HT %s, Turbo %s, NUMA %s, Integrated Accelerators Available [used]: %s, Total Memory %s, BIOS %s, microcode %s, %s, %s, %s, %s. Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of %s." + var date, socketCount, cpuModel, coreCount, tdp, htOnOff, turboOnOff, numaNodes, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion string for sourceIdx, source := range fullReport.Sources { var hostValues = HostValues{ @@ -56,6 +56,10 @@ func newMarketingClaimTable(fullReport *Report, tableNicSummary *Table, tableDis socketCount, _ = fullReport.findTable("CPU").getValue(sourceIdx, "Sockets") cpuModel, _ = fullReport.findTable("CPU").getValue(sourceIdx, "CPU Model") coreCount, _ = fullReport.findTable("CPU").getValue(sourceIdx, "Cores per Socket") + tdp, _ = fullReport.findTable("Power").getValue(sourceIdx, "TDP") + if tdp == "" { + tdp = "?" + } hyperthreading, _ := fullReport.findTable("CPU").getValue(sourceIdx, "Hyperthreading") if hyperthreading == "Enabled" { htOnOff = "On" @@ -83,7 +87,7 @@ func newMarketingClaimTable(fullReport *Report, tableNicSummary *Table, tableDis disks, _ = tableDiskSummary.getValue(sourceIdx, "Disk") operatingSystem, _ = fullReport.findTable("Operating System").getValue(sourceIdx, "OS") kernelVersion, _ = fullReport.findTable("Operating System").getValue(sourceIdx, "Kernel") - claim := fmt.Sprintf(template, socketCount, cpuModel, coreCount, htOnOff, turboOnOff, numaNodes, accelerators, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion, date) + claim := fmt.Sprintf(template, socketCount, cpuModel, coreCount, tdp, htOnOff, turboOnOff, numaNodes, accelerators, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion, date) hostValues.Values = append(hostValues.Values, []string{claim}) table.AllHostValues = append(table.AllHostValues, hostValues) } diff --git a/cmd/reporter/source.go b/cmd/reporter/source.go index cbadef2..c85e7df 100644 --- a/cmd/reporter/source.go +++ b/cmd/reporter/source.go @@ -1013,7 +1013,7 @@ func (s *Source) getTDP() (val string) { msrHex := s.getCommandOutputLine("rdmsr 0x610") msr, err := strconv.ParseInt(msrHex, 16, 0) if err == nil && msr != 0 { - val = fmt.Sprint(msr/8) + " watts" + val = fmt.Sprint(msr/8) + "W" } return } From 0827dcefd666110eb416b442fa506296caf4d161 Mon Sep 17 00:00:00 2001 From: jharper5 Date: Thu, 30 May 2024 09:19:55 -0700 Subject: [PATCH 10/23] fix make format --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5627548..fac628a 100644 --- a/Makefile +++ b/Makefile @@ -121,11 +121,11 @@ test: format_check: @echo "Running gofmt -l to check for code formatting issues..." - @test -z $(shell gofmt -l -s internal/commandfile/ internal/core/ internal/cpu/ internal/progress/ internal/target/ cmd/orchestrator/ cmd/collector/ cmd/reporter/ cmd/pmu2metrics/ cmd/msrread/ cmd/msrwrite/) || { echo "[WARN] Formatting issues detected. Resolve with 'make format'"; exit 1; } + @test -z $(shell gofmt -l -s ./) || { echo "[WARN] Formatting issues detected. Resolve with 'make format'"; exit 1; } @echo "gofmt detected no issues" check: format_check format: - gofmt -l -w -s internal/commandfile/ internal/core/ internal/cpu/ internal/progress/ internal/target/ orchestrator/ collector/ reporter/ pmu2metrics/ rdmsr/ wrmsr/ + gofmt -l -w -s ./ From 6217eb05d348cc6b1bd5ab3c731b7390977fc047 Mon Sep 17 00:00:00 2001 From: jharper5 Date: Fri, 31 May 2024 15:53:37 -0700 Subject: [PATCH 11/23] move to Ubuntu 18.04 to build third-party tools --- cmd/pmu2metrics/metadata.go | 2 ++ third_party/build.Dockerfile | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/pmu2metrics/metadata.go b/cmd/pmu2metrics/metadata.go index 07d4b09..7c55fe7 100644 --- a/cmd/pmu2metrics/metadata.go +++ b/cmd/pmu2metrics/metadata.go @@ -314,6 +314,8 @@ func getTMASupported(perfPath string) (supported bool, output string, err error) // example line: " 784333932 TOPDOWN.SLOTS (59.75%)" re := regexp.MustCompile(`\s+(\d+)\s+(\w*\.*\w*)\s+.*`) for _, line := range lines { + // count may include commas as thousands separators, remove them + line := strings.ReplaceAll(line, ",", "") match := re.FindStringSubmatch(line) if match != nil { vals[match[2]], err = strconv.ParseFloat(match[1], 64) diff --git a/third_party/build.Dockerfile b/third_party/build.Dockerfile index 7ad9c56..9abfd15 100644 --- a/third_party/build.Dockerfile +++ b/third_party/build.Dockerfile @@ -4,7 +4,7 @@ # build image (third_party directory): # $ GITHUB_ACCESS_TOKEN= # $ docker image build -f build.Dockerfile --tag svr-info-third-party:v1 . -FROM ubuntu:16.04 as builder +FROM ubuntu:18.04 as builder ENV LANG en_US.UTF-8 ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y apt-utils locales wget curl git netcat-openbsd software-properties-common jq zip unzip From d25a883fff4cd9a637eab1709f03f2359084af69 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Tue, 4 Jun 2024 08:54:17 -0700 Subject: [PATCH 12/23] add additional field for NVMe drives to disk table (#324) --- .../resources/collector_reports.yaml.tmpl | 48 ++++++++++++++----- cmd/reporter/report_tables.go | 8 +++- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index bec0d91..3579ce8 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -62,27 +62,49 @@ commands: parallel: true - label: disk info command: |- - echo "NAME|MODEL|SIZE|MOUNTPOINT|FSTYPE|RQ-SIZE|MIN-IO|FIRMWARE|ADDR|NUMA" + echo "NAME|MODEL|SIZE|MOUNTPOINT|FSTYPE|RQ-SIZE|MIN-IO|FIRMWARE|ADDR|NUMA|LINKSPEED|LINKWIDTH|MAXLINKSPEED|MAXLINKWIDTH" lsblk -r -o NAME,MODEL,SIZE,MOUNTPOINT,FSTYPE,RQ-SIZE,MIN-IO -e7 -e1 \ | cut -d' ' -f1,2,3,4,5,6,7 --output-delimiter='|' \ | while IFS='|' read -r name model size mountpoint fstype rqsize minio ; do - fw="" - numa="" - addr="" + # skip the lsblk output header if [ "$name" = "NAME" ] ; then continue fi - if [ -f /sys/block/"$name"/device/firmware_rev ] ; then - fw=$( cat /sys/block/"$name"/device/firmware_rev ) - fi - if [ -f /sys/block/"$name"/device/numa_node ] ; then - numa=$( cat /sys/block/"$name"/device/numa_node ) - fi - if [ -f /sys/block/"$name"/device/address ] ; then - addr=$( cat /sys/block/"$name"/device/address ) + fw="" + addr="" + numa="" + curlinkspeed="" + curlinkwidth="" + maxlinkspeed="" + maxlinkwidth="" + # replace \x20 with space in model + model=${model//\\x20/ } + # if name refers to an NVMe device e.g, nvme0n1 - nvme99n99 + if [[ $name =~ ^(nvme[0-9]+)n[0-9]+$ ]]; then + # get the name without the namespace + nvme=${BASH_REMATCH[1]} + if [ -f /sys/block/"$name"/device/firmware_rev ] ; then + fw=$( cat /sys/block/"$name"/device/firmware_rev ) + fi + if [ -f /sys/block/"$name"/device/address ] ; then + addr=$( cat /sys/block/"$name"/device/address ) + fi + if [ -d "/sys/block/$name/device/${nvme}" ]; then + numa=$( cat /sys/block/"$name"/device/"${nvme}"/numa_node ) + curlinkspeed=$( cat /sys/block/"$name"/device/"${nvme}"/device/current_link_speed ) + curlinkwidth=$( cat /sys/block/"$name"/device/"${nvme}"/device/current_link_width ) + maxlinkspeed=$( cat /sys/block/"$name"/device/"${nvme}"/device/max_link_speed ) + maxlinkwidth=$( cat /sys/block/"$name"/device/"${nvme}"/device/max_link_width ) + elif [ -d "/sys/block/$name/device/device" ]; then + numa=$( cat /sys/block/"$name"/device/device/numa_node ) + curlinkspeed=$( cat /sys/block/"$name"/device/device/current_link_speed ) + curlinkwidth=$( cat /sys/block/"$name"/device/device/current_link_width ) + maxlinkspeed=$( cat /sys/block/"$name"/device/device/max_link_speed ) + maxlinkwidth=$( cat /sys/block/"$name"/device/device/max_link_width ) + fi fi - echo "$name|$model|$size|$mountpoint|$fstype|$rqsize|$minio|$fw|$addr|$numa" + echo "$name|$model|$size|$mountpoint|$fstype|$rqsize|$minio|$fw|$addr|$numa|$curlinkspeed|$curlinkwidth|$maxlinkspeed|$maxlinkwidth" done parallel: true - label: df -h diff --git a/cmd/reporter/report_tables.go b/cmd/reporter/report_tables.go index 236b2ff..6abcf31 100644 --- a/cmd/reporter/report_tables.go +++ b/cmd/reporter/report_tables.go @@ -1446,7 +1446,7 @@ func newDiskTable(sources []*Source, category TableCategory) (table *Table) { Category: category, AllHostValues: []HostValues{}, } - var infoFields = []string{"NAME", "MODEL", "SIZE", "MOUNTPOINT", "FSTYPE", "RQ-SIZE", "MIN-IO", "FIRMWARE", "ADDR", "NUMA"} + var infoFields = []string{"NAME", "MODEL", "SIZE", "MOUNTPOINT", "FSTYPE", "RQ-SIZE", "MIN-IO", "FIRMWARE", "ADDR", "NUMA", "LINKSPEED", "LINKWIDTH", "MAXLINKSPEED", "MAXLINKWIDTH"} for _, source := range sources { var hostValues = HostValues{ Name: source.getHostname(), @@ -1461,6 +1461,10 @@ func newDiskTable(sources []*Source, category TableCategory) (table *Table) { "Firmware Version", "PCIe Address", "NUMA Node", + "Link Speed", + "Link Width", + "Max Link Speed", + "Max Link Width", }, Values: [][]string{}, } @@ -1480,8 +1484,8 @@ func newDiskTable(sources []*Source, category TableCategory) (table *Table) { continue } // clean up the model name - fields[1] = strings.ReplaceAll(fields[1], `\x20`, " ") fields[1] = strings.TrimSpace(fields[1]) + // if we don't have a firmware version, try to get it from another source if fields[7] == "" { fields[7] = source.getDiskFwRev(fields[0]) } From 99ac2de27e094ab53f7385278ca3b7a7cf85148e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:54:37 -0700 Subject: [PATCH 13/23] build(deps): bump golang.org/x/text from 0.15.0 to 0.16.0 (#325) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.15.0 to 0.16.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.15.0...v0.16.0) --- go.mod | 8 ++++---- go.sum | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 892031c..b57c604 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/xuri/excelize/v2 v2.8.1 golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 golang.org/x/term v0.20.0 - golang.org/x/text v0.15.0 + golang.org/x/text v0.16.0 gopkg.in/yaml.v2 v2.4.0 ) @@ -64,11 +64,11 @@ require ( github.com/xuri/nfp v0.0.0-20240318013403-ab9948c2c4a7 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.22.0 // indirect + golang.org/x/crypto v0.23.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.24.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect - golang.org/x/tools v0.20.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/go.sum b/go.sum index 13d5559..5eea1f1 100644 --- a/go.sum +++ b/go.sum @@ -111,8 +111,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 h1:ESSUROHIBHg7USnszlcdmjBEwdMj9VUvU+OPk4yl2mc= golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4= @@ -128,8 +128,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -164,14 +164,14 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= -golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From 54bf9cb9697c438f77102c8bd0e9e611d75bc5f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:57:42 -0700 Subject: [PATCH 14/23] build(deps): bump golang.org/x/term in /internal/progress (#327) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.20.0 to 0.21.0. - [Commits](https://github.com/golang/term/compare/v0.20.0...v0.21.0) --- internal/progress/go.mod | 4 ++-- internal/progress/go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/progress/go.mod b/internal/progress/go.mod index 58239bb..9fd395c 100644 --- a/internal/progress/go.mod +++ b/internal/progress/go.mod @@ -2,6 +2,6 @@ module github.com/svr-info/internal/progress go 1.22 -require golang.org/x/term v0.20.0 +require golang.org/x/term v0.21.0 -require golang.org/x/sys v0.20.0 // indirect +require golang.org/x/sys v0.21.0 // indirect diff --git a/internal/progress/go.sum b/internal/progress/go.sum index eb6a443..6208220 100644 --- a/internal/progress/go.sum +++ b/internal/progress/go.sum @@ -1,4 +1,4 @@ -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= -golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= From 45dd21535d4ca24ac532981c517a827586d8fe14 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:58:02 -0700 Subject: [PATCH 15/23] build(deps): bump golang.org/x/term from 0.20.0 to 0.21.0 (#326) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.20.0 to 0.21.0. - [Commits](https://github.com/golang/term/compare/v0.20.0...v0.21.0) --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index b57c604..8c12606 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/intel/svr-info/internal/util v0.0.0-00010101000000-000000000000 github.com/xuri/excelize/v2 v2.8.1 golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 - golang.org/x/term v0.20.0 + golang.org/x/term v0.21.0 golang.org/x/text v0.16.0 gopkg.in/yaml.v2 v2.4.0 ) @@ -68,7 +68,7 @@ require ( golang.org/x/mod v0.17.0 // indirect golang.org/x/net v0.25.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.20.0 // indirect + golang.org/x/sys v0.21.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/go.sum b/go.sum index 5eea1f1..b76bf38 100644 --- a/go.sum +++ b/go.sum @@ -148,15 +148,15 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= -golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From ad3693dbbe6c1468b7d886a5ad51f484f1ccf1aa Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Fri, 7 Jun 2024 06:52:07 -0700 Subject: [PATCH 16/23] add PMU driver version to PMU table (#328) * add PMU driver version to PMU table * put PMU driver at top of table --- cmd/orchestrator/resources/collector_reports.yaml.tmpl | 4 ++++ cmd/reporter/report_tables.go | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index 3579ce8..52c0acb 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -286,6 +286,10 @@ commands: superuser: true modprobe: msr parallel: true + - label: pmu driver version + command: dmesg | grep -A 1 "Intel PMU driver" | tail -1 | awk '{print $NF}' + superuser: true + parallel: true - label: lspci -vmm command: lspci -vmm parallel: true diff --git a/cmd/reporter/report_tables.go b/cmd/reporter/report_tables.go index 6abcf31..006503b 100644 --- a/cmd/reporter/report_tables.go +++ b/cmd/reporter/report_tables.go @@ -1614,6 +1614,7 @@ func newPMUTable(sources []*Source, category TableCategory) (table *Table) { var hostValues = HostValues{ Name: source.getHostname(), ValueNames: []string{ + "PMU Driver Version", "cpu_cycles", "instructions", "ref_cycles", @@ -1629,12 +1630,13 @@ func newPMUTable(sources []*Source, category TableCategory) (table *Table) { }, Values: [][]string{}, } - lines := source.getCommandOutputLines("msrbusy") var vals []string + vals = append(vals, source.getCommandOutputLine("pmu driver version")) + lines := source.getCommandOutputLines("msrbusy") if len(lines) == 2 { - vals = strings.Split(lines[1], "|") + vals = append(vals, strings.Split(lines[1], "|")...) } else { - for range hostValues.ValueNames { + for i := 0; i < len(hostValues.ValueNames)-1; i++ { vals = append(vals, "") } } From a619518b70d027bbdcce469917dafb225310193f Mon Sep 17 00:00:00 2001 From: jharper5 Date: Wed, 19 Jun 2024 05:30:19 -0700 Subject: [PATCH 17/23] fix 404 in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a42a858..bcf84a4 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Data can be collected from a single remote target by providing the login credent ./svr-info -ip 10.100.222.123 -user fred -key ~/.ssh/id_rsa ``` ## Multiple Targets -Data can be collected from multiple remote targets by placing login credentials of the targets in a 'targets' file and then referencing that targets file on the svr-info command line. See the included [targets.example](src/orchestrator/targets.example) file for the required file format. +Data can be collected from multiple remote targets by placing login credentials of the targets in a 'targets' file and then referencing that targets file on the svr-info command line. See the included [targets.example](cmd/orchestrator/targets.example) file for the required file format. ``` ./svr-info -targets ``` From 557341496779a3d09ddde12f76c5083102b8839f Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Mon, 24 Jun 2024 10:55:33 -0700 Subject: [PATCH 18/23] add elc table (#320) * add elc table * add elc to brief excel report * add insights rule --- .../resources/collector_reports.yaml.tmpl | 80 ++++++++++++++++--- cmd/reporter/report.go | 3 + cmd/reporter/report_tables.go | 43 ++++++++++ cmd/reporter/resources/insights.grl | 15 +++- cmd/reporter/source.go | 37 +++++++++ third_party/build.Dockerfile | 2 +- 6 files changed, 167 insertions(+), 13 deletions(-) diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index 52c0acb..5b5f852 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -243,19 +243,77 @@ commands: command: pcm-tpmi 2 0x18 -d -b 15:21 superuser: true parallel: true - - label: active idle utilization point + - label: efficiency latency control command: |- - msrwrite 0xb0 0x80000694 # must write this value to this MSR before reading 0xb1 - msrread -f 15:8 0xb1 # ACTIVE IDLE - UTILIZATION POINT - superuser: true - modprobe: msr - parallel: true - - label: active idle mesh frequency - command: |- - msrwrite 0xb0 0x80000694 # must write this value to this MSR before reading 0xb1 - msrread -f 7:0 0xb1 # ACTIVE IDLE - MESH FREQUENCY + # Script derived from bhs-power-mode script in Intel PCM repository + # Run the pcm-tpmi command to determine I/O and compute dies + output=$(pcm-tpmi 2 0x10 -d -b 26:26) + + # Parse the output to build lists of I/O and compute dies + io_dies=() + compute_dies=() + declare -A die_types + while read -r line; do + if [[ $line == *"instance 0"* ]]; then + die=$(echo "$line" | grep -oP 'entry \K[0-9]+') + if [[ $line == *"value 1"* ]]; then + die_types[$die]="IO" + io_dies+=("$die") + elif [[ $line == *"value 0"* ]]; then + die_types[$die]="Compute" + compute_dies+=("$die") + fi + fi + done <<< "$output" + + # Function to extract and calculate metrics from the value + extract_and_print_metrics() { + local value=$1 + local socket_id=$2 + local die=$3 + local die_type=${die_types[$die]} + + # Extract bits and calculate metrics + local min_ratio=$(( (value >> 15) & 0x7F )) + local max_ratio=$(( (value >> 8) & 0x7F )) + local eff_latency_ctrl_ratio=$(( (value >> 22) & 0x7F )) + local eff_latency_ctrl_low_threshold=$(( (value >> 32) & 0x7F )) + local eff_latency_ctrl_high_threshold=$(( (value >> 40) & 0x7F )) + local eff_latency_ctrl_high_threshold_enable=$(( (value >> 39) & 0x1 )) + + # Convert to MHz or percentage + min_ratio=$(( min_ratio * 100 )) + max_ratio=$(( max_ratio * 100 )) + eff_latency_ctrl_ratio=$(( eff_latency_ctrl_ratio * 100 )) + eff_latency_ctrl_low_threshold=$(( (eff_latency_ctrl_low_threshold * 100) / 127 )) + eff_latency_ctrl_high_threshold=$(( (eff_latency_ctrl_high_threshold * 100) / 127 )) + + # Print metrics + echo -n "$socket_id,$die,$die_type,$min_ratio,$max_ratio,$eff_latency_ctrl_ratio," + if [ $die_type == "IO" ] ; then + echo "$eff_latency_ctrl_low_threshold,$eff_latency_ctrl_high_threshold,$eff_latency_ctrl_high_threshold_enable" + else + echo ",," + fi + } + + # Print CSV header + echo "Socket,Die,Type,MIN_RATIO (MHz),MAX_RATIO (MHz),ELC_RATIO (MHz),ELC_LOW_THRESHOLD (%),ELC_HIGH_THRESHOLD (%),ELC_HIGH_THRESHOLD_ENABLE" + + # Iterate over all dies and run pcm-tpmi for each to get the metrics + for die in "${!die_types[@]}"; do + output=$(pcm-tpmi 2 0x18 -d -e "$die") + + # Parse the output and extract metrics for each socket + while read -r line; do + if [[ $line == *"Read value"* ]]; then + value=$(echo "$line" | grep -oP 'value \K[0-9]+') + socket_id=$(echo "$line" | grep -oP 'instance \K[0-9]+') + extract_and_print_metrics "$value" "$socket_id" "$die" + fi + done <<< "$output" + done superuser: true - modprobe: msr parallel: true - label: ipmitool sel time get command: LC_ALL=C ipmitool sel time get diff --git a/cmd/reporter/report.go b/cmd/reporter/report.go index 7dc954e..e6ebdeb 100644 --- a/cmd/reporter/report.go +++ b/cmd/reporter/report.go @@ -45,6 +45,7 @@ func NewConfigurationReport(sources []*Source, CPUdb cpudb.CPUDB) (report *Repor newPowerTable(sources, Power), newUncoreTable(sources, CPUdb, Power), + newEfficiencyLatencyControlTable(sources, Power), }..., ) @@ -94,6 +95,7 @@ func NewBriefReport(sources []*Source, fullReport *Report, CPUdb cpudb.CPUDB) (r tableDiskSummary := newDiskSummaryTable(fullReport.findTable("Disk"), Storage) tableNicSummary := newNICSummaryTable(fullReport.findTable("NIC"), Network) tableAcceleratorSummary := newAcceleratorSummaryTable(fullReport.findTable("Accelerator"), CPUCategory) + tableEfficiencyLatencyControlSummary := newEfficiencyLatencyControlSummaryTable(fullReport.findTable("Efficiency Latency Control"), Power) report.Tables = append(report.Tables, []*Table{ fullReport.findTable("Host"), @@ -108,6 +110,7 @@ func NewBriefReport(sources []*Source, fullReport *Report, CPUdb cpudb.CPUDB) (r newBIOSSummaryTable(fullReport.findTable("BIOS"), Software), newOperatingSystemBriefTable(fullReport.findTable("Operating System"), Software), fullReport.findTable("Power"), + tableEfficiencyLatencyControlSummary, newVulnerabilitySummaryTable(fullReport.findTable("Vulnerability"), Security), newMarketingClaimTable(fullReport, tableNicSummary, tableDiskSummary, tableAcceleratorSummary, NoCategory), }..., diff --git a/cmd/reporter/report_tables.go b/cmd/reporter/report_tables.go index 006503b..32f9231 100644 --- a/cmd/reporter/report_tables.go +++ b/cmd/reporter/report_tables.go @@ -1018,6 +1018,49 @@ func newPowerTable(sources []*Source, category TableCategory) (table *Table) { return } +func newEfficiencyLatencyControlTable(sources []*Source, category TableCategory) (table *Table) { + table = &Table{ + Name: "Efficiency Latency Control", + Category: category, + AllHostValues: []HostValues{}, + } + for _, source := range sources { + var hostValues = HostValues{ + Name: source.getHostname(), + } + hostValues.ValueNames, hostValues.Values = source.getEfficiencyLatencyControl() + table.AllHostValues = append(table.AllHostValues, hostValues) + } + return +} + +func newEfficiencyLatencyControlSummaryTable(tableELC *Table, category TableCategory) (table *Table) { + table = &Table{ + Name: "Efficiency Latency Control", + Category: category, + AllHostValues: []HostValues{}, + } + for _, srcHv := range tableELC.AllHostValues { + var hostValues = HostValues{ + Name: srcHv.Name, + ValueNames: []string{"ELC Mode"}, + } + var modes []string + for _, row := range srcHv.Values { + if row[9] != "" { + modes = append(modes, row[9]) + } + } + hostValues.Values = make([][]string, 1) + hostValues.Values[0] = append(hostValues.Values[0], strings.Join(modes, ", ")) + if hostValues.Values[0][0] == "" { + hostValues.Values[0][0] = "N/A" + } + table.AllHostValues = append(table.AllHostValues, hostValues) + } + return +} + func newGPUTable(sources []*Source, category TableCategory) (table *Table) { table = &Table{ Name: "GPU", diff --git a/cmd/reporter/resources/insights.grl b/cmd/reporter/resources/insights.grl index 508ad62..eec6e68 100644 --- a/cmd/reporter/resources/insights.grl +++ b/cmd/reporter/resources/insights.grl @@ -22,7 +22,8 @@ rule DIMMSpeed { (Report.GetValue("Configuration", "CPU", "Microarchitecture") == "CLX" && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 2933) || (Report.GetValue("Configuration", "CPU", "Microarchitecture") == "ICX" && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 3200) || (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("SPR") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 4800) || - (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("EMR") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 5600) + (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("EMR") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 5600) || + (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("SRF") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 6400) ) then Report.AddInsight( @@ -120,6 +121,18 @@ rule FrequencyGovernor { Retract("FrequencyGovernor"); } +rule ELCMode { + when + Report.GetValuesFromColumn("Configuration", "Efficiency Latency Control", 9).Count("Default") != 0 || + Report.GetValuesFromColumn("Configuration", "Efficiency Latency Control", 9).Count("Custom") != 0 + then + Report.AddInsight( + "Efficiency Latency Control mode is not set to 'Latency Optimized' on all IO dies.", + "Consider setting the Efficiency Latency Control mode to 'Latency Optimized'." + ); + Retract("ELCMode"); +} + rule TurboBoost { when Report.GetValue("Configuration", "CPU", "Intel Turbo Boost") != "" && diff --git a/cmd/reporter/source.go b/cmd/reporter/source.go index c85e7df..a971335 100644 --- a/cmd/reporter/source.go +++ b/cmd/reporter/source.go @@ -1246,3 +1246,40 @@ func (s *Source) getPMUMetrics() (orderedMetricNames []string, timeStamps []floa } return } + +func (s *Source) getEfficiencyLatencyControl() (valueNames []string, values [][]string) { + output := strings.Join(s.getCommandOutputLines("efficiency latency control"), "\n") + if output == "" { + return + } + r := csv.NewReader(strings.NewReader(output)) + rows, err := r.ReadAll() + if err != nil { + log.Printf("failed to read ELC CSV") + return + } + if len(rows) < 2 { + log.Printf("no ELC data found") + return + } + // first row is headers / valueNames + valueNames = rows[0] + // 2nd-nth rows are values + values = rows[1:] + // let's add an interpretation of the values in an additional column + valueNames = append(valueNames, "Mode") + for i, row := range values { + var mode string + if row[2] == "IO" { + if row[5] == "0" && row[6] == "0" && row[7] == "0" { + mode = "Latency Optimized" + } else if row[5] == "800" && row[6] == "10" && row[7] == "94" { + mode = "Default" + } else { + mode = "Custom" + } + } + values[i] = append(values[i], mode) + } + return +} diff --git a/third_party/build.Dockerfile b/third_party/build.Dockerfile index 9abfd15..0c72d59 100644 --- a/third_party/build.Dockerfile +++ b/third_party/build.Dockerfile @@ -12,7 +12,7 @@ RUN locale-gen en_US.UTF-8 && echo "LANG=en_US.UTF-8" > /etc/default/locale RUN add-apt-repository ppa:git-core/ppa -y RUN apt-get update && apt-get install -y git build-essential autotools-dev automake \ gawk zlib1g-dev libtool libaio-dev libaio1 pandoc pkgconf libcap-dev docbook-utils \ - libreadline-dev default-jre default-jdk cmake flex bison + libreadline-dev default-jre default-jdk cmake flex bison libssl-dev ENV JAVA_HOME=/usr/lib/jvm/java-1.11.0-openjdk-amd64 From d1ec91cb1dd765e339b03034da5dacab8f8a5be8 Mon Sep 17 00:00:00 2001 From: jharper5 Date: Mon, 24 Jun 2024 16:01:24 -0700 Subject: [PATCH 19/23] update code of conduct file extension to match GitHub expectations --- CODE_OF_CONDUCT.MD => CODE_OF_CONDUCT.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename CODE_OF_CONDUCT.MD => CODE_OF_CONDUCT.md (100%) diff --git a/CODE_OF_CONDUCT.MD b/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.MD rename to CODE_OF_CONDUCT.md From b574798ffc693be5b72de1b6ba7f560b70e3ea19 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Tue, 25 Jun 2024 14:42:04 -0700 Subject: [PATCH 20/23] support pmu profiling on AWS ICX and SPR VMs...and SRF systems (#323) * support version 2 PMU driver on AWS ICX and SPR VMs * support version 2 PMU driver on AWS ICX and SPR VMs * add additional test targets on AWS and update image IDs * support version 2 PMU driver on AWS ICX and SPR VMs * change to nofixedtma * revert some image ids * support version 2 PMU driver on AWS ICX and SPR VMs * change to nofixedtma * revert ci * update with latest * add srf events & metrics * fix getPMUDriverVersion func * fix pcm build --- cmd/pmu2metrics/event_defs.go | 80 ++-- cmd/pmu2metrics/metadata.go | 62 +++- cmd/pmu2metrics/metric_defs.go | 11 +- .../resources/emr_nofixedtma_events.txt | 138 +++++++ .../resources/emr_nofixedtma_metrics.json | 349 ++++++++++++++++++ .../resources/icx_nofixedtma_events.txt | 148 ++++++++ .../resources/icx_nofixedtma_metrics.json | 329 +++++++++++++++++ .../resources/spr_nofixedtma_events.txt | 138 +++++++ .../resources/spr_nofixedtma_metrics.json | 349 ++++++++++++++++++ cmd/pmu2metrics/resources/srf_events.txt | 102 ++++- cmd/pmu2metrics/resources/srf_metrics.json | 267 +++++++++++++- 11 files changed, 1904 insertions(+), 69 deletions(-) create mode 100644 cmd/pmu2metrics/resources/emr_nofixedtma_events.txt create mode 100644 cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json create mode 100644 cmd/pmu2metrics/resources/icx_nofixedtma_events.txt create mode 100644 cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json create mode 100644 cmd/pmu2metrics/resources/spr_nofixedtma_events.txt create mode 100644 cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json diff --git a/cmd/pmu2metrics/event_defs.go b/cmd/pmu2metrics/event_defs.go index 70b78e1..818559c 100644 --- a/cmd/pmu2metrics/event_defs.go +++ b/cmd/pmu2metrics/event_defs.go @@ -39,7 +39,14 @@ func LoadEventGroups(eventDefinitionOverridePath string, metadata Metadata) (gro return } } else { - if file, err = resources.Open(filepath.Join("resources", fmt.Sprintf("%s_events.txt", strings.ToLower(metadata.Microarchitecture)[:3]))); err != nil { + uarch := strings.ToLower(metadata.Microarchitecture)[:3] + // use alternate events/metrics when TMA fixed counters are not supported + alternate := "" + if (uarch == "icx" || uarch == "spr" || uarch == "emr") && !metadata.FixedCounterTMASupported { + alternate = "_nofixedtma" + } + eventFileName := fmt.Sprintf("%s%s_events.txt", uarch, alternate) + if file, err = resources.Open(filepath.Join("resources", eventFileName)); err != nil { return } } @@ -56,11 +63,7 @@ func LoadEventGroups(eventDefinitionOverridePath string, metadata Metadata) (gro if event, err = parseEventDefinition(line[:len(line)-1]); err != nil { return } - var collectable bool - if collectable, err = isCollectableEvent(event, metadata); err != nil { - return - } - if collectable { + if isCollectableEvent(event, metadata) { group = append(group, event) } else { uncollectableEvents.Add(event.Name) @@ -80,20 +83,20 @@ func LoadEventGroups(eventDefinitionOverridePath string, metadata Metadata) (gro } // expand uncore groups for all uncore devices groups, err = expandUncoreGroups(groups, metadata) - // "fixed" PMU counters are not supported on (most) IaaS VMs, so we add a separate group - if !isUncoreSupported(metadata) { - group = GroupDefinition{EventDefinition{Raw: "cpu-cycles"}, EventDefinition{Raw: "instructions"}} - if metadata.RefCyclesSupported { - group = append(group, EventDefinition{Raw: "ref-cycles"}) - } - groups = append(groups, group) - group = GroupDefinition{EventDefinition{Raw: "cpu-cycles:k"}, EventDefinition{Raw: "instructions"}} - if metadata.RefCyclesSupported { - group = append(group, EventDefinition{Raw: "ref-cycles:k"}) - } - groups = append(groups, group) + // // "fixed" PMU counters are not supported on (most) IaaS VMs, so we add a separate group + // if !isUncoreSupported(metadata) { + // group = GroupDefinition{EventDefinition{Raw: "cpu-cycles"}, EventDefinition{Raw: "instructions"}} + // if metadata.RefCyclesSupported { + // group = append(group, EventDefinition{Raw: "ref-cycles"}) + // } + // groups = append(groups, group) + // group = GroupDefinition{EventDefinition{Raw: "cpu-cycles:k"}, EventDefinition{Raw: "instructions"}} + // if metadata.RefCyclesSupported { + // group = append(group, EventDefinition{Raw: "ref-cycles:k"}) + // } + // groups = append(groups, group) - } + // } if uncollectableEvents.Cardinality() != 0 && gCmdLineArgs.verbose { log.Printf("Uncollectable events: %s", uncollectableEvents) } @@ -113,16 +116,14 @@ func isUncoreSupported(metadata Metadata) (supported bool) { } // isCollectableEvent confirms if given event can be collected on the platform -func isCollectableEvent(event EventDefinition, metadata Metadata) (collectable bool, err error) { - collectable = true - // TMA - if !metadata.TMASupported && (event.Name == "TOPDOWN.SLOTS" || strings.HasPrefix(event.Name, "PERF_METRICS.")) { - collectable = false - return +func isCollectableEvent(event EventDefinition, metadata Metadata) bool { + // fixed-counter TMA + if !metadata.FixedCounterTMASupported && (event.Name == "TOPDOWN.SLOTS" || strings.HasPrefix(event.Name, "PERF_METRICS.")) { + return false } // short-circuit for cpu events if event.Device == "cpu" && !strings.HasPrefix(event.Name, "OCR") { - return + return true } // short-circuit off-core response events if event.Device == "cpu" && @@ -130,15 +131,14 @@ func isCollectableEvent(event EventDefinition, metadata Metadata) (collectable b isUncoreSupported(metadata) && !(gCmdLineArgs.scope == ScopeProcess) && !(gCmdLineArgs.scope == ScopeCgroup) { - return + return true } // exclude uncore events when // - their corresponding device is not found // - not in system-wide collection scope if event.Device != "cpu" && event.Device != "" { if gCmdLineArgs.scope == ScopeProcess || gCmdLineArgs.scope == ScopeCgroup { - collectable = false - return + return false } deviceExists := false for uncoreDeviceName := range metadata.DeviceIDs { @@ -148,33 +148,29 @@ func isCollectableEvent(event EventDefinition, metadata Metadata) (collectable b } } if !deviceExists { - collectable = false + return false } else if !strings.Contains(event.Raw, "umask") && !strings.Contains(event.Raw, "event") { - collectable = false + return false } - return + return true } // if we got this far, event.Device is empty // is ref-cycles supported? if !metadata.RefCyclesSupported && strings.Contains(event.Name, "ref-cycles") { - collectable = false - return + return false } // no uncore means we're on a VM where cpu fixed cycles are likely not supported - if strings.Contains(event.Name, "cpu-cycles") && !isUncoreSupported(metadata) { - collectable = false - return - } + // if strings.Contains(event.Name, "cpu-cycles") && !isUncoreSupported(metadata) { + // return false + // } // no cstate and power events when collecting at process or cgroup scope if (gCmdLineArgs.scope == ScopeProcess || gCmdLineArgs.scope == ScopeCgroup) && (strings.Contains(event.Name, "cstate_") || strings.Contains(event.Name, "power/energy")) { - collectable = false - return + return false } // finally, if it isn't in the perf list output, it isn't collectable name := strings.Split(event.Name, ":")[0] - collectable = strings.Contains(metadata.PerfSupportedEvents, name) - return + return strings.Contains(metadata.PerfSupportedEvents, name) } // parseEventDefinition parses one line from the event definition file into a representative structure diff --git a/cmd/pmu2metrics/metadata.go b/cmd/pmu2metrics/metadata.go index 7c55fe7..3323827 100644 --- a/cmd/pmu2metrics/metadata.go +++ b/cmd/pmu2metrics/metadata.go @@ -28,23 +28,30 @@ import ( // Metadata is the representation of the platform's state and capabilities type Metadata struct { - CoresPerSocket int `yaml:"CoresPerSocket"` - CPUSocketMap map[int]int - DeviceIDs map[string][]int `yaml:"DeviceIDs"` - Microarchitecture string `yaml:"Microarchitecture"` - ModelName string - PerfSupportedEvents string `yaml:"PerfSupportedEvents"` - RefCyclesSupported bool `yaml:"RefCyclesSupported"` - SocketCount int `yaml:"SocketCount"` - ThreadsPerCore int `yaml:"ThreadsPerCore"` - TMASupported bool `yaml:"TMASupported"` - TSC int `yaml:"TSC"` - TSCFrequencyHz int `yaml:"TSCFrequencyHz"` + CoresPerSocket int `yaml:"CoresPerSocket"` + CPUSocketMap map[int]int + DeviceIDs map[string][]int `yaml:"DeviceIDs"` + FixedCounterTMASupported bool `yaml:"FixedCounterTMASupported"` + Microarchitecture string `yaml:"Microarchitecture"` + ModelName string + PerfSupportedEvents string `yaml:"PerfSupportedEvents"` + PMUDriverVersion string `yaml:"PMUDriverVersion"` + RefCyclesSupported bool `yaml:"RefCyclesSupported"` + SocketCount int `yaml:"SocketCount"` + ThreadsPerCore int `yaml:"ThreadsPerCore"` + TSC int `yaml:"TSC"` + TSCFrequencyHz int `yaml:"TSCFrequencyHz"` } // LoadMetadata - populates and returns a Metadata structure containing state of the // system. func LoadMetadata(perfPath string) (metadata Metadata, err error) { + // PMU driver version + metadata.PMUDriverVersion, err = getPMUDriverVersion() + if err != nil { + err = fmt.Errorf("failed to retrieve PMU driver version: %v", err) + return + } // reduce startup time by running the three perf commands in their own threads while // the rest of the metadata is being collected slowFuncChannel := make(chan error) @@ -68,15 +75,15 @@ func LoadMetadata(perfPath string) (metadata Metadata, err error) { } slowFuncChannel <- err }() - // TMA + // Fixed-counter TMA events go func() { var err error var output string - if metadata.TMASupported, output, err = getTMASupported(perfPath); err != nil { + if metadata.FixedCounterTMASupported, output, err = getFixedCounterTMASupported(perfPath); err != nil { err = fmt.Errorf("failed to determine if TMA is supported: %v", err) } - if !metadata.TMASupported && gCmdLineArgs.verbose { - log.Printf("TMA not supported:\n%s\n", output) + if !metadata.FixedCounterTMASupported && gCmdLineArgs.verbose { + log.Printf("TMA fixed counter not supported:\n%s\n", output) } slowFuncChannel <- err }() @@ -178,7 +185,8 @@ func (md Metadata) String() string { "TSC Frequency (Hz): %d, "+ "TSC: %d, "+ "ref-cycles supported: %t, "+ - "TMA events supported: %t, ", + "Fixed Counter TMA events supported: %t, "+ + "PMU Driver version: %s, ", md.ModelName, md.Microarchitecture, md.SocketCount, @@ -187,7 +195,8 @@ func (md Metadata) String() string { md.TSCFrequencyHz, md.TSC, md.RefCyclesSupported, - md.TMASupported) + md.FixedCounterTMASupported, + md.PMUDriverVersion) for deviceName, deviceIds := range md.DeviceIDs { var ids []string for _, id := range deviceIds { @@ -294,8 +303,9 @@ func getRefCyclesSupported(perfPath string) (supported bool, output string, err return } -// getTMASupported - checks if the TMA events are supported by perf -func getTMASupported(perfPath string) (supported bool, output string, err error) { +// getFixedCounterTMASupported - checks if the fixed TMA counter events are supported by perf +// We check for the TOPDOWN.SLOTS and PERF_METRICS.BAD_SPECULATION events as an indicator of support for fixed TMA counter support +func getFixedCounterTMASupported(perfPath string) (supported bool, output string, err error) { cmd := exec.Command(perfPath, "stat", "-a", "-e", "'{cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/,cpu/event=0x00,umask=0x81,period=10000003,name='PERF_METRICS.BAD_SPECULATION'/}'", "sleep", ".1") var outBuffer, errBuffer bytes.Buffer cmd.Stderr = &errBuffer @@ -331,6 +341,18 @@ func getTMASupported(perfPath string) (supported bool, output string, err error) return } +func getPMUDriverVersion() (version string, err error) { + cmd := exec.Command("sh", "-c", `dmesg | grep -A 1 "Intel PMU driver" | tail -1 | awk '{print $NF}'`) + var outBuffer, errBuffer bytes.Buffer + cmd.Stderr = &errBuffer + cmd.Stdout = &outBuffer + if err = cmd.Run(); err != nil { + return + } + version = strings.TrimSpace(outBuffer.String()) + return +} + // createCPUSocketMap creates a map from CPU number to socket number func createCPUSocketMap(coresPerSocket int, sockets int, hyperthreading bool) (cpuSocketMap map[int]int) { // Create an empty map diff --git a/cmd/pmu2metrics/metric_defs.go b/cmd/pmu2metrics/metric_defs.go index c733bad..434ea67 100644 --- a/cmd/pmu2metrics/metric_defs.go +++ b/cmd/pmu2metrics/metric_defs.go @@ -39,7 +39,14 @@ func LoadMetricDefinitions(metricDefinitionOverridePath string, selectedMetrics return } } else { - if bytes, err = resources.ReadFile(filepath.Join("resources", fmt.Sprintf("%s_metrics.json", strings.ToLower(metadata.Microarchitecture)[:3]))); err != nil { + uarch := strings.ToLower(metadata.Microarchitecture)[:3] + // use alternate events/metrics when TMA fixed counters are not supported + alternate := "" + if (uarch == "icx" || uarch == "spr" || uarch == "emr") && !metadata.FixedCounterTMASupported { + alternate = "_nofixedtma" + } + metricFileName := fmt.Sprintf("%s%s_metrics.json", uarch, alternate) + if bytes, err = resources.ReadFile(filepath.Join("resources", metricFileName)); err != nil { return } } @@ -112,7 +119,7 @@ func ConfigureMetrics(metrics []MetricDefinition, evaluatorFunctions map[string] metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[CHAS_PER_SOCKET]", chasPerSocket) metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[SOCKET_COUNT]", socketCount) metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[HYPERTHREADING_ON]", hyperThreadingOn) - metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[const_thread_count]", threadsPerCore) + metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[CONST_THREAD_COUNT]", threadsPerCore) // get a list of the variables in the expression metrics[metricIdx].Variables = make(map[string]int) expressionIdx := 0 diff --git a/cmd/pmu2metrics/resources/emr_nofixedtma_events.txt b/cmd/pmu2metrics/resources/emr_nofixedtma_events.txt new file mode 100644 index 0000000..d767656 --- /dev/null +++ b/cmd/pmu2metrics/resources/emr_nofixedtma_events.txt @@ -0,0 +1,138 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Sapphire Rapids and Emerald Rapids event list for platforms that don't have support for the fixed counter +# TMA events, e.g., some AWS VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x9c,umask=0x01,name='IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xae,umask=0x01,name='UOPS_ISSUED.ANY'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x61,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0xa4,umask=0x08,name='TOPDOWN.BR_MISPREDICT_SLOTS'/, +cpu/event=0xa4,umask=0x10,name='TOPDOWN.MEMORY_BOUND_SLOTS'/, +cpu/event=0xc2,umask=0x01,name='UOPS_RETIRED.HEAVY'/, +cpu/event=0xe5,umask=0x03,name='MEM_UOP_RETIRED.ANY'/, +cpu/event=0xc0,umask=0x10,name='INST_RETIRED.MACRO_FUSED'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xb7,umask=0x02,period=2000003,name='EXE.AMX_BUSY'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +#OCR +cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#UPI +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +#CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; + +#power +power/energy-pkg/, +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json b/cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json new file mode 100644 index 0000000..587d6b1 --- /dev/null +++ b/cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json @@ -0,0 +1,349 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [UOPS_RETIRED.MS:c1:e1] / ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....AMX_Busy(%)", + "expression": "100 * ( [EXE.AMX_BUSY] / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [MEM_UOP_RETIRED.ANY] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Fused_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [INST_RETIRED.MACRO_FUSED] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Non_Fused_Branches(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * ( [BR_INST_RETIRED.ALL_BRANCHES] - [INST_RETIRED.MACRO_FUSED] ) / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/icx_nofixedtma_events.txt b/cmd/pmu2metrics/resources/icx_nofixedtma_events.txt new file mode 100644 index 0000000..00c16cc --- /dev/null +++ b/cmd/pmu2metrics/resources/icx_nofixedtma_events.txt @@ -0,0 +1,148 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Icelake event list for platforms that don't have support for the fixed counter TMA events, e.g., some AWS +# VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xc3,umask=0x01,cmask=0x01,edge=0x01,period=100003,name='MACHINE_CLEARS.COUNT'/, +cpu/event=0xc5,umask=0x00,period=50021,name='BR_MISP_RETIRED.ALL_BRANCHES'/, +cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu/event=0x48,umask=0x02,period=1000003,name='L1D_PEND_MISS.FB_FULL_PERIODS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x80,umask=0x04,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,name='ICACHE_TAG.STALLS'/, +cpu/event=0x79,umask=0x30,name='IDQ.MS_SWITCHES'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x0d,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xab,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0x0D,umask=0x01,name='INT_MISC.CLEARS_COUNT'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xd0,umask=0x83,name='MEM_INST_RETIRED.ANY'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd0,umask=0x21,cmask=0x00,period=100007,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, +cpu/event=0xd1,umask=0x08,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x05,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0x14,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIVIDER_ACTIVE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x30,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0x56,umask=0x01,period=100003,name='UOPS_DECODED.DEC0'/, +cpu/event=0x56,umask=0x01,cmask=0x01,period=100003,name='UOPS_DECODED.DEC0:c1'/, +cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +# OCR +cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; + +# C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +# UPI +upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +# CHA +cha/event=0x00,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; +cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +# memory read/writes +imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; + +# power +power/energy-pkg/, +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json b/cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json new file mode 100644 index 0000000..5325629 --- /dev/null +++ b/cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json @@ -0,0 +1,329 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycles", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[instructions] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "(([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [IDQ.MS_SWITCHES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) + ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) - ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [MEM_INST_RETIRED.ANY] / [instructions] )" + }, + { + "name": "metric_TMA_....Branch_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [BR_INST_RETIRED.ALL_BRANCHES] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) - ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/spr_nofixedtma_events.txt b/cmd/pmu2metrics/resources/spr_nofixedtma_events.txt new file mode 100644 index 0000000..d767656 --- /dev/null +++ b/cmd/pmu2metrics/resources/spr_nofixedtma_events.txt @@ -0,0 +1,138 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Sapphire Rapids and Emerald Rapids event list for platforms that don't have support for the fixed counter +# TMA events, e.g., some AWS VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x9c,umask=0x01,name='IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xae,umask=0x01,name='UOPS_ISSUED.ANY'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x61,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0xa4,umask=0x08,name='TOPDOWN.BR_MISPREDICT_SLOTS'/, +cpu/event=0xa4,umask=0x10,name='TOPDOWN.MEMORY_BOUND_SLOTS'/, +cpu/event=0xc2,umask=0x01,name='UOPS_RETIRED.HEAVY'/, +cpu/event=0xe5,umask=0x03,name='MEM_UOP_RETIRED.ANY'/, +cpu/event=0xc0,umask=0x10,name='INST_RETIRED.MACRO_FUSED'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xb7,umask=0x02,period=2000003,name='EXE.AMX_BUSY'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +#OCR +cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#UPI +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +#CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; + +#power +power/energy-pkg/, +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json b/cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json new file mode 100644 index 0000000..587d6b1 --- /dev/null +++ b/cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json @@ -0,0 +1,349 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [UOPS_RETIRED.MS:c1:e1] / ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....AMX_Busy(%)", + "expression": "100 * ( [EXE.AMX_BUSY] / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [MEM_UOP_RETIRED.ANY] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Fused_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [INST_RETIRED.MACRO_FUSED] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Non_Fused_Branches(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * ( [BR_INST_RETIRED.ALL_BRANCHES] - [INST_RETIRED.MACRO_FUSED] ) / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/srf_events.txt b/cmd/pmu2metrics/resources/srf_events.txt index b57637d..49b3fe1 100644 --- a/cmd/pmu2metrics/resources/srf_events.txt +++ b/cmd/pmu2metrics/resources/srf_events.txt @@ -5,13 +5,107 @@ # SierraForest event list +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +cpu/event=0x08,umask=0x08,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/, +cpu/event=0x08,umask=0xe,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x49,umask=0xe,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x02,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/, +cpu/event=0x12,umask=0x04,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, cpu-cycles, ref-cycles, instructions; -cpu-cycles:k, -ref-cycles:k, -instructions:k; +cpu/event=0x2e,umask=0x41,name='LONGEST_LAT_CACHE.MISS'/, +cpu/event=0x2e,umask=0x4f,name='LONGEST_LAT_CACHE.REFERENCE'/, +cpu/event=0x85,umask=0xe,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0xd0,umask=0x21,name='MEM_UOPS_RETIRED.LOCK_LOADS'/, +cpu/event=0xd1,umask=0x02,name='MEM_LOAD_UOPS_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,name='MEM_LOAD_UOPS_RETIRED.L1_MISS'/, +cpu/event=0xd1,umask=0x1,name='MEM_LOAD_UOPS_RETIRED.L1_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x71,umask=0x00,name='TOPDOWN_FE_BOUND.ALL'/, +cpu/event=0x71,umask=0x20,name='TOPDOWN_FE_BOUND.ICACHE'/, +cpu/event=0x71,umask=0x10,name='TOPDOWN_FE_BOUND.ITLB_MISS'/, +cpu/event=0x71,umask=0x72,name='TOPDOWN_FE_BOUND.FRONTEND_LATENCY'/, +cpu/event=0x71,umask=0x40,name='TOPDOWN_FE_BOUND.BRANCH_RESTEER'/, +cpu/event=0x71,umask=0x8d,name='TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x80,umask=0x02,name='ICACHE.MISSES'/, +cpu/event=0x05,umask=0xf4,name='LD_HEAD.L1_BOUND_AT_RET'/, +cpu/event=0x72,umask=0x00,name='TOPDOWN_RETIRING.ALL'/, +cpu/event=0x73,umask=0x03,name='TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS'/, +cpu/event=0x73,umask=0x04,name='TOPDOWN_BAD_SPECULATION.MISPREDICT'/, +cpu/event=0x73,umask=0x00,name='TOPDOWN_BAD_SPECULATION.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x05,umask=0xff,name='LD_HEAD.ANY_AT_RET'/, +cpu/event=0x04,umask=0x07,name='MEM_SCHEDULER_BLOCK.ALL'/, +cpu/event=0x04,umask=0x01,name='MEM_SCHEDULER_BLOCK.ST_BUF'/, +cpu/event=0x74,umask=0x02,name='TOPDOWN_BE_BOUND.MEM_SCHEDULER'/, +cpu/event=0x74,umask=0x10,name='TOPDOWN_BE_BOUND.SERIALIZATION'/, +cpu/event=0x74,umask=0x00,name='TOPDOWN_BE_BOUND.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x05,umask=0x81,name='LD_HEAD.L1_MISS_AT_RET'/, +cpu/event=0x34,umask=0x6f,name='MEM_BOUND_STALLS_LOAD.ALL'/, +cpu/event=0x34,umask=0x01,name='MEM_BOUND_STALLS_LOAD.L2_HIT'/, +cpu/event=0x34,umask=0x06,name='MEM_BOUND_STALLS_LOAD.LLC_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x8000100000004477,name='OCR.READS_TO_CORE.OUTSTANDING'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x100000014477,name='OCR.READS_TO_CORE.ANY_RESPONSE'/; + +cpu/event=0xB7,umask=0x01,offcore_rsp=0x101030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xB7,umask=0x01,offcore_rsp=0x100830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x10244,name='OCR.L2_CODE_MISS'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x10070,name='OCR.HWPF_L2.ANY_RESPONSE'/; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x1010003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x1008003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD'/; + +#CHA (Cache) +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0x00C827FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT'/, +cha/event=0x35,umask=0x00C8A7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF'/, +cha/event=0x35,umask=0x00C80FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0x00C88FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/; + +cha/event=0x35,umask=0x00CCD7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0x00C807FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO'/, +cha/event=0x35,umask=0x00C887FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF'/, +cha/event=0x35,umask=0x00CCC7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0x00C8F3FF04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0x00CC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0x00CD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/; + +cha/event=0x36,umask=0x00C827FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT'/; + +cha/event=0x36,umask=0x00C807FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xCF,name='UNC_M_CAS_COUNT_SCH0.RD'/, +imc/event=0x05,umask=0xF0,name='UNC_M_CAS_COUNT_SCH0.WR'/, +imc/event=0x06,umask=0xCF,name='UNC_M_CAS_COUNT_SCH1.RD'/, +imc/event=0x06,umask=0xF0,name='UNC_M_CAS_COUNT_SCH1.WR'/; #C6 cstate_core/c6-residency/; @@ -19,4 +113,4 @@ cstate_pkg/c6-residency/; #power power/energy-pkg/, -power/energy-ram/; \ No newline at end of file +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/srf_metrics.json b/cmd/pmu2metrics/resources/srf_metrics.json index 1f38f6e..615d432 100644 --- a/cmd/pmu2metrics/resources/srf_metrics.json +++ b/cmd/pmu2metrics/resources/srf_metrics.json @@ -36,5 +36,270 @@ "name": "metric_giga_instructions_per_sec", "expression": "[instructions] / 1000000000", "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_UOPS_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]" + }, + { + "name": "metric_L1D demand data read MPI", + "name-txn": "metric_L1D demand data read misses per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1-I code read misses (w/ prefetches) per txn", + "expression": "[ICACHE.MISSES] / [instructions]", + "expression-txn": "[ICACHE.MISSES] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[LONGEST_LAT_CACHE.REFERENCE] / [instructions]", + "expression-txn": "[LONGEST_LAT_CACHE.REFERENCE] / [TXN]" + }, + { + "name": "metric_L2 code MPI", + "name-txn": "metric_L2 code misses per txn", + "expression": "[OCR.L2_CODE_MISS] / [instructions]", + "expression-txn": "[OCR.L2_CODE_MISS] / [TXN]" + }, + { + "name": "metric_L2 Any local request that HITM in another module (per instr)", + "name-txn": "metric_L2 Any local request that HITM in another module per txn", + "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_L2 Any local request that HIT in another module and forwarded(per instr)", + "name-txn": "metric_L2 Any local request that HIT in another module and forwarded per txn", + "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_L2 all L2 prefetches(per instr)", + "name-txn": "metric_L2 all L2 prefetches per txn", + "expression": "[OCR.HWPF_L2.ANY_RESPONSE] / [instructions]", + "expression-txn": "[OCR.HWPF_L2.ANY_RESPONSE] / [TXN]" + }, + { + "name": "metric_data_read_L2_Miss_Latency_using_ORO_events(ns)", + "expression": "( 1000000000 * ([OCR.READS_TO_CORE.OUTSTANDING] / [OCR.READS_TO_CORE.ANY_RESPONSE]) / ([cpu-cycles] / [TSC] * [SYSTEM_TSC_FREQ]) )" + }, + { + "name": "metric_L3 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L3 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[LONGEST_LAT_CACHE.MISS] / [instructions]", + "expression-txn": "[LONGEST_LAT_CACHE.MISS] / [TXN]" + }, + { + "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [instructions]", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand RFO miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO] / [UNC_CHA_TOR_INSERTS.IA_MISS_RFO]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([LONGEST_LAT_CACHE.MISS]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD] + [UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 4KB page load MPI", + "name-txn": "metric_DTLB (2nd level) 4KB page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 1GB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 1GB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_LATENCY] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ICACHE] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ITLB_MISS] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....Branch_Resteer(%)", + "expression": "100*([TOPDOWN_FE_BOUND.BRANCH_RESTEER] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( [TOPDOWN_BAD_SPECULATION.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100*([TOPDOWN_BAD_SPECULATION.MISPREDICT] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100*([TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN_BE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100*min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100*([LD_HEAD.L1_BOUND_AT_RET] / [cpu-cycles])" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100*([MEM_BOUND_STALLS_LOAD.L2_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.L2_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100*([MEM_BOUND_STALLS_LOAD.LLC_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.LLC_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100*(([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL])" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100*max(0, 1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles]) - min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))))" + }, + { + "name": "metric_TMA_....Serialization(%)", + "expression": "100*([TOPDOWN_BE_BOUND.SERIALIZATION] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [TOPDOWN_RETIRING.ALL] / ( 6 * [cpu-cycles] ) )" } -] \ No newline at end of file +] From 08b06d01f2e5208c6ea2f99c04ced2a569e1a47f Mon Sep 17 00:00:00 2001 From: jharper5 Date: Wed, 26 Jun 2024 14:23:02 -0700 Subject: [PATCH 21/23] const_thread_count to CONST_THREAD_COUNT --- cmd/pmu2metrics/resources/bdx_metrics.json | 10 +++++----- cmd/pmu2metrics/resources/clx_metrics.json | 18 +++++++++--------- cmd/pmu2metrics/resources/skx_metrics.json | 18 +++++++++--------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cmd/pmu2metrics/resources/bdx_metrics.json b/cmd/pmu2metrics/resources/bdx_metrics.json index 7e46419..cef23e6 100644 --- a/cmd/pmu2metrics/resources/bdx_metrics.json +++ b/cmd/pmu2metrics/resources/bdx_metrics.json @@ -346,22 +346,22 @@ }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [const_thread_count]) if ([const_thread_count] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) ", + "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [CONST_THREAD_COUNT]) if ([CONST_THREAD_COUNT] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]) ", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [CONST_THREAD_COUNT]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [CONST_THREAD_COUNT]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_3m(%)", - "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [CONST_THREAD_COUNT]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -390,7 +390,7 @@ }, { "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]))", "origin": "perfspect" } ] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/clx_metrics.json b/cmd/pmu2metrics/resources/clx_metrics.json index 9974e96..f167036 100644 --- a/cmd/pmu2metrics/resources/clx_metrics.json +++ b/cmd/pmu2metrics/resources/clx_metrics.json @@ -202,7 +202,7 @@ }, { "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", + "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", "origin": "perfspect" }, { @@ -284,12 +284,12 @@ }, { "name": "metric_TMA_Info_cycles_both_threads_active(%)", - "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)", + "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [CONST_THREAD_COUNT] > 1 else 0)", "origin": "perfspect" }, { "name": "metric_TMA_Info_CoreIPC", - "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -298,7 +298,7 @@ }, { "name": "metric_TMA_..Frontend_Latency(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])", + "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -328,7 +328,7 @@ }, { "name": "metric_TMA_..Frontend_Bandwidth(%)", - "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]))", "origin": "perfspect" }, { @@ -416,17 +416,17 @@ }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -460,7 +460,7 @@ }, { "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])))", "origin": "perfspect" } ] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/skx_metrics.json b/cmd/pmu2metrics/resources/skx_metrics.json index 9974e96..f167036 100644 --- a/cmd/pmu2metrics/resources/skx_metrics.json +++ b/cmd/pmu2metrics/resources/skx_metrics.json @@ -202,7 +202,7 @@ }, { "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", + "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", "origin": "perfspect" }, { @@ -284,12 +284,12 @@ }, { "name": "metric_TMA_Info_cycles_both_threads_active(%)", - "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)", + "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [CONST_THREAD_COUNT] > 1 else 0)", "origin": "perfspect" }, { "name": "metric_TMA_Info_CoreIPC", - "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -298,7 +298,7 @@ }, { "name": "metric_TMA_..Frontend_Latency(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])", + "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -328,7 +328,7 @@ }, { "name": "metric_TMA_..Frontend_Bandwidth(%)", - "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]))", "origin": "perfspect" }, { @@ -416,17 +416,17 @@ }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -460,7 +460,7 @@ }, { "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])))", "origin": "perfspect" } ] \ No newline at end of file From 525bc5fa270c66ea0d50e2e91f35b8134c4300f0 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Wed, 26 Jun 2024 17:59:59 -0700 Subject: [PATCH 22/23] fix corrupt Excel report when value is NaN (#330) --- cmd/reporter/report_generator_xlsx.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/reporter/report_generator_xlsx.go b/cmd/reporter/report_generator_xlsx.go index 79705e4..bd43e75 100644 --- a/cmd/reporter/report_generator_xlsx.go +++ b/cmd/reporter/report_generator_xlsx.go @@ -6,6 +6,7 @@ package main import ( "fmt" + "math" "os" "path/filepath" "strconv" @@ -67,11 +68,10 @@ func renderExcelTable(tableHeaders []string, tableValues [][]string, f *excelize for _, header := range tableHeaders { // if possible, convert strings to floats before inserting into the sheet floatValue, err := strconv.ParseFloat(header, 64) - if err == nil { + if err == nil && !math.IsNaN(floatValue) { // if it's a number, right align it f.SetCellFloat(reportSheetName, cellName(col, row), floatValue, 1, 64) f.SetCellStyle(reportSheetName, cellName(col, row), cellName(col, row), boldAlignLeft) } else { - f.SetCellStr(reportSheetName, cellName(col, row), header) f.SetCellStyle(reportSheetName, cellName(col, row), cellName(col, row), bold) } @@ -85,7 +85,7 @@ func renderExcelTable(tableHeaders []string, tableValues [][]string, f *excelize for rowIdx, value := range rowValues { // if possible, convert strings to floats before inserting into the sheet floatValue, err := strconv.ParseFloat(value, 64) - if err == nil { + if err == nil && !math.IsNaN(floatValue) { // if it's a number, right align it f.SetCellFloat(reportSheetName, cellName(col, row), floatValue, 1, 64) f.SetCellStyle(reportSheetName, cellName(col, row), cellName(col, row), alignLeft) } else { From 10cc4f4a08b313468be3102b5234c243f3d304d7 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Thu, 27 Jun 2024 14:17:58 -0700 Subject: [PATCH 23/23] version and 2.11 release notes (#329) * version and 2.11 release notes * add bug fix note --- RELEASE_NOTES | 15 ++++++++++++++- version.txt | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 215584b..fbd5c92 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -3,9 +3,22 @@ Intel® System Health Inspector (AKA svr-info) Fully Supported Platforms - Xeon Micro-Architectures: SRF,EMR,SPR,CPX,ICX,CLX,SKX,BDX,HSX -- Operating Systems: Ubuntu 16.04, 18.04, 20.04, 22.04, CentOS 7, Amazon Linux 2, Debian 11, RHEL 9, Rocky Linux 8 +- Operating Systems: Ubuntu 18.04, 20.04, 22.04, 24.04, CentOS 7, Amazon Linux 2, Debian 11, RHEL 9, Rocky Linux 8 Note: svr-info may work on other micro-architectures and Linux distributions, but has not been thoroughly tested +2.11.0 +Features Added +- Report Efficiency Latency Control (ELC) configuration on SRF and GNR +- Report PCIe address, NUMA node, and more for NVME drives +- Add support for PMU profiling on AWS ICX and SPR VMs ... and SRF servers. +- Report L3 size on GNR +- Processor TDP added to Summary field of Excel 'brief' report + +Bugs Fixed +- Fix turbo benchmark on systems with many cores, e.g., SRF +- Fix memory performance benchmark when run on systems with large huge page sizes +- Fix Excel report corruption when NaN values result from '-profile pmu' + 2.10.0 Features Added - Support for Sierra Forest Xeon CPUs diff --git a/version.txt b/version.txt index f161b5d..ed0edc8 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.10.0 \ No newline at end of file +2.11.0 \ No newline at end of file