diff --git a/CODE_OF_CONDUCT.MD b/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.MD rename to CODE_OF_CONDUCT.md diff --git a/Makefile b/Makefile index 5627548..fac628a 100644 --- a/Makefile +++ b/Makefile @@ -121,11 +121,11 @@ test: format_check: @echo "Running gofmt -l to check for code formatting issues..." - @test -z $(shell gofmt -l -s internal/commandfile/ internal/core/ internal/cpu/ internal/progress/ internal/target/ cmd/orchestrator/ cmd/collector/ cmd/reporter/ cmd/pmu2metrics/ cmd/msrread/ cmd/msrwrite/) || { echo "[WARN] Formatting issues detected. Resolve with 'make format'"; exit 1; } + @test -z $(shell gofmt -l -s ./) || { echo "[WARN] Formatting issues detected. Resolve with 'make format'"; exit 1; } @echo "gofmt detected no issues" check: format_check format: - gofmt -l -w -s internal/commandfile/ internal/core/ internal/cpu/ internal/progress/ internal/target/ orchestrator/ collector/ reporter/ pmu2metrics/ rdmsr/ wrmsr/ + gofmt -l -w -s ./ diff --git a/README.md b/README.md index 0e80229..bcf84a4 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Data can be collected from a single remote target by providing the login credent ./svr-info -ip 10.100.222.123 -user fred -key ~/.ssh/id_rsa ``` ## Multiple Targets -Data can be collected from multiple remote targets by placing login credentials of the targets in a 'targets' file and then referencing that targets file on the svr-info command line. See the included [targets.example](src/orchestrator/targets.example) file for the required file format. +Data can be collected from multiple remote targets by placing login credentials of the targets in a 'targets' file and then referencing that targets file on the svr-info command line. See the included [targets.example](cmd/orchestrator/targets.example) file for the required file format. ``` ./svr-info -targets ``` @@ -50,19 +50,13 @@ For example, Intel® Memory Latency Checker can be downloaded from here: [MLC](h We welcome bug reports, questions and feature requests. Please submit via Github Issues. ## Building svr-info Due to the large number of build dependencies required, a Docker container-based build environment is provided. Assuming your system has Docker installed (instructions not provided here), the following steps are required to build svr-info: -- `builder/build_docker_image` creates the docker image -- `builder/build` runs `make dist` in the container +- `builder/build` creates the necessary docker images and runs make in the container After a successful build, you will find the build output in the `dist` folder. -Other builder commands available: -- `builder/test` runs the automated tests in the container via `make test` -- `builder/shell` starts the container and provides a bash prompt useful for debugging build problems ### Incremental Builds -After a complete build using the build container, you can perform incremental builds directly on your host assuming dependencies are installed there. This can make the code/build/test cycle much quicker than rebuilding everything using the Docker container. You can look at the Dockerfile in the builder directory to get the build dependencies for everything or, more likely, you only need go(lang) so install the latest and get to work. +After a complete build using the build container, you can perform incremental builds directly on your host assuming dependencies are installed there. This can make the code/build/test cycle much quicker than rebuilding everything using the Docker container. -From the project's root directory, you can use the makefile. There are quite a few targets. Most useful may be `make apps`. This will build all the go-based apps. - -If you are working on a single go-based app. You can run `go build` in the app's source directory to build it. +If you are working on a single go-based app. You can run `go build` to build it. ### Including Additional Collection Tools In The Build Additional data collection tools can be built into the svr-info distribution by placing binaries in the bin directory before starting the build. diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 215584b..fbd5c92 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -3,9 +3,22 @@ Intel® System Health Inspector (AKA svr-info) Fully Supported Platforms - Xeon Micro-Architectures: SRF,EMR,SPR,CPX,ICX,CLX,SKX,BDX,HSX -- Operating Systems: Ubuntu 16.04, 18.04, 20.04, 22.04, CentOS 7, Amazon Linux 2, Debian 11, RHEL 9, Rocky Linux 8 +- Operating Systems: Ubuntu 18.04, 20.04, 22.04, 24.04, CentOS 7, Amazon Linux 2, Debian 11, RHEL 9, Rocky Linux 8 Note: svr-info may work on other micro-architectures and Linux distributions, but has not been thoroughly tested +2.11.0 +Features Added +- Report Efficiency Latency Control (ELC) configuration on SRF and GNR +- Report PCIe address, NUMA node, and more for NVME drives +- Add support for PMU profiling on AWS ICX and SPR VMs ... and SRF servers. +- Report L3 size on GNR +- Processor TDP added to Summary field of Excel 'brief' report + +Bugs Fixed +- Fix turbo benchmark on systems with many cores, e.g., SRF +- Fix memory performance benchmark when run on systems with large huge page sizes +- Fix Excel report corruption when NaN values result from '-profile pmu' + 2.10.0 Features Added - Support for Sierra Forest Xeon CPUs diff --git a/cmd/orchestrator/resources/collector_reports.yaml.tmpl b/cmd/orchestrator/resources/collector_reports.yaml.tmpl index dbd028a..5b5f852 100644 --- a/cmd/orchestrator/resources/collector_reports.yaml.tmpl +++ b/cmd/orchestrator/resources/collector_reports.yaml.tmpl @@ -60,8 +60,52 @@ commands: - label: maximum frequency command: cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq parallel: true - - label: lsblk -r -o - command: lsblk -r -o NAME,MODEL,SIZE,MOUNTPOINT,FSTYPE,RQ-SIZE,MIN-IO -e7 -e1 + - label: disk info + command: |- + echo "NAME|MODEL|SIZE|MOUNTPOINT|FSTYPE|RQ-SIZE|MIN-IO|FIRMWARE|ADDR|NUMA|LINKSPEED|LINKWIDTH|MAXLINKSPEED|MAXLINKWIDTH" + lsblk -r -o NAME,MODEL,SIZE,MOUNTPOINT,FSTYPE,RQ-SIZE,MIN-IO -e7 -e1 \ + | cut -d' ' -f1,2,3,4,5,6,7 --output-delimiter='|' \ + | while IFS='|' read -r name model size mountpoint fstype rqsize minio ; + do + # skip the lsblk output header + if [ "$name" = "NAME" ] ; then + continue + fi + fw="" + addr="" + numa="" + curlinkspeed="" + curlinkwidth="" + maxlinkspeed="" + maxlinkwidth="" + # replace \x20 with space in model + model=${model//\\x20/ } + # if name refers to an NVMe device e.g, nvme0n1 - nvme99n99 + if [[ $name =~ ^(nvme[0-9]+)n[0-9]+$ ]]; then + # get the name without the namespace + nvme=${BASH_REMATCH[1]} + if [ -f /sys/block/"$name"/device/firmware_rev ] ; then + fw=$( cat /sys/block/"$name"/device/firmware_rev ) + fi + if [ -f /sys/block/"$name"/device/address ] ; then + addr=$( cat /sys/block/"$name"/device/address ) + fi + if [ -d "/sys/block/$name/device/${nvme}" ]; then + numa=$( cat /sys/block/"$name"/device/"${nvme}"/numa_node ) + curlinkspeed=$( cat /sys/block/"$name"/device/"${nvme}"/device/current_link_speed ) + curlinkwidth=$( cat /sys/block/"$name"/device/"${nvme}"/device/current_link_width ) + maxlinkspeed=$( cat /sys/block/"$name"/device/"${nvme}"/device/max_link_speed ) + maxlinkwidth=$( cat /sys/block/"$name"/device/"${nvme}"/device/max_link_width ) + elif [ -d "/sys/block/$name/device/device" ]; then + numa=$( cat /sys/block/"$name"/device/device/numa_node ) + curlinkspeed=$( cat /sys/block/"$name"/device/device/current_link_speed ) + curlinkwidth=$( cat /sys/block/"$name"/device/device/current_link_width ) + maxlinkspeed=$( cat /sys/block/"$name"/device/device/max_link_speed ) + maxlinkwidth=$( cat /sys/block/"$name"/device/device/max_link_width ) + fi + fi + echo "$name|$model|$size|$mountpoint|$fstype|$rqsize|$minio|$fw|$addr|$numa|$curlinkspeed|$curlinkwidth|$maxlinkspeed|$maxlinkwidth" + done parallel: true - label: df -h command: df -h @@ -199,19 +243,77 @@ commands: command: pcm-tpmi 2 0x18 -d -b 15:21 superuser: true parallel: true - - label: active idle utilization point + - label: efficiency latency control command: |- - msrwrite 0xb0 0x80000694 # must write this value to this MSR before reading 0xb1 - msrread -f 15:8 0xb1 # ACTIVE IDLE - UTILIZATION POINT - superuser: true - modprobe: msr - parallel: true - - label: active idle mesh frequency - command: |- - msrwrite 0xb0 0x80000694 # must write this value to this MSR before reading 0xb1 - msrread -f 7:0 0xb1 # ACTIVE IDLE - MESH FREQUENCY + # Script derived from bhs-power-mode script in Intel PCM repository + # Run the pcm-tpmi command to determine I/O and compute dies + output=$(pcm-tpmi 2 0x10 -d -b 26:26) + + # Parse the output to build lists of I/O and compute dies + io_dies=() + compute_dies=() + declare -A die_types + while read -r line; do + if [[ $line == *"instance 0"* ]]; then + die=$(echo "$line" | grep -oP 'entry \K[0-9]+') + if [[ $line == *"value 1"* ]]; then + die_types[$die]="IO" + io_dies+=("$die") + elif [[ $line == *"value 0"* ]]; then + die_types[$die]="Compute" + compute_dies+=("$die") + fi + fi + done <<< "$output" + + # Function to extract and calculate metrics from the value + extract_and_print_metrics() { + local value=$1 + local socket_id=$2 + local die=$3 + local die_type=${die_types[$die]} + + # Extract bits and calculate metrics + local min_ratio=$(( (value >> 15) & 0x7F )) + local max_ratio=$(( (value >> 8) & 0x7F )) + local eff_latency_ctrl_ratio=$(( (value >> 22) & 0x7F )) + local eff_latency_ctrl_low_threshold=$(( (value >> 32) & 0x7F )) + local eff_latency_ctrl_high_threshold=$(( (value >> 40) & 0x7F )) + local eff_latency_ctrl_high_threshold_enable=$(( (value >> 39) & 0x1 )) + + # Convert to MHz or percentage + min_ratio=$(( min_ratio * 100 )) + max_ratio=$(( max_ratio * 100 )) + eff_latency_ctrl_ratio=$(( eff_latency_ctrl_ratio * 100 )) + eff_latency_ctrl_low_threshold=$(( (eff_latency_ctrl_low_threshold * 100) / 127 )) + eff_latency_ctrl_high_threshold=$(( (eff_latency_ctrl_high_threshold * 100) / 127 )) + + # Print metrics + echo -n "$socket_id,$die,$die_type,$min_ratio,$max_ratio,$eff_latency_ctrl_ratio," + if [ $die_type == "IO" ] ; then + echo "$eff_latency_ctrl_low_threshold,$eff_latency_ctrl_high_threshold,$eff_latency_ctrl_high_threshold_enable" + else + echo ",," + fi + } + + # Print CSV header + echo "Socket,Die,Type,MIN_RATIO (MHz),MAX_RATIO (MHz),ELC_RATIO (MHz),ELC_LOW_THRESHOLD (%),ELC_HIGH_THRESHOLD (%),ELC_HIGH_THRESHOLD_ENABLE" + + # Iterate over all dies and run pcm-tpmi for each to get the metrics + for die in "${!die_types[@]}"; do + output=$(pcm-tpmi 2 0x18 -d -e "$die") + + # Parse the output and extract metrics for each socket + while read -r line; do + if [[ $line == *"Read value"* ]]; then + value=$(echo "$line" | grep -oP 'value \K[0-9]+') + socket_id=$(echo "$line" | grep -oP 'instance \K[0-9]+') + extract_and_print_metrics "$value" "$socket_id" "$die" + fi + done <<< "$output" + done superuser: true - modprobe: msr parallel: true - label: ipmitool sel time get command: LC_ALL=C ipmitool sel time get @@ -242,6 +344,10 @@ commands: superuser: true modprobe: msr parallel: true + - label: pmu driver version + command: dmesg | grep -A 1 "Intel PMU driver" | tail -1 | awk '{print $NF}' + superuser: true + parallel: true - label: lspci -vmm command: lspci -vmm parallel: true @@ -415,10 +521,15 @@ commands: - label: Memory MLC Loaded Latency Test command: |- # measure memory loaded latency + # need at least 2 GB (2,097,152 KB) of huge pages per NUMA node + min_kb=2097152 numa_nodes=$( lscpu | grep "NUMA node(s):" | awk '{print $3}' ) + size_huge_pages_kb=$( cat /proc/meminfo | grep Hugepagesize | awk '{print $2}' ) orig_num_huge_pages=$( cat /proc/sys/vm/nr_hugepages ) - new_num_huge_pages=$( echo "$numa_nodes * 1000" | bc ) - echo $new_num_huge_pages > /proc/sys/vm/nr_hugepages + needed_num_huge_pages=$( echo "$numa_nodes * $min_kb / $size_huge_pages_kb" | bc ) + if [ $needed_num_huge_pages -gt $orig_num_huge_pages ]; then + echo $needed_num_huge_pages > /proc/sys/vm/nr_hugepages + fi mlc --loaded_latency echo $orig_num_huge_pages > /proc/sys/vm/nr_hugepages modprobe: msr @@ -426,10 +537,15 @@ commands: - label: Memory MLC Bandwidth command: |- # measure memory bandwidth matrix + # need at least 2 GB (2,097,152 KB) of huge pages per NUMA node + min_kb=2097152 numa_nodes=$( lscpu | grep "NUMA node(s):" | awk '{print $3}' ) + size_huge_pages_kb=$( cat /proc/meminfo | grep Hugepagesize | awk '{print $2}' ) orig_num_huge_pages=$( cat /proc/sys/vm/nr_hugepages ) - new_num_huge_pages=$( echo "$numa_nodes * 1000" | bc ) - echo $new_num_huge_pages > /proc/sys/vm/nr_hugepages + needed_num_huge_pages=$( echo "$numa_nodes * $min_kb / $size_huge_pages_kb" | bc ) + if [ $needed_num_huge_pages -gt $orig_num_huge_pages ]; then + echo $needed_num_huge_pages > /proc/sys/vm/nr_hugepages + fi mlc --bandwidth_matrix echo $orig_num_huge_pages > /proc/sys/vm/nr_hugepages modprobe: msr @@ -451,7 +567,7 @@ commands: - label: CPU Turbo Test command: |- # measure tdp and all-core turbo frequency - ((turbostat -i 2 2>/dev/null &) ; stress-ng --cpu 1 -t 20s 2>&1 ; stress-ng --cpu 0 -t 60s 2>&1 ; pkill -9 -f turbostat) | awk '$0~"stress" {print $0} $1=="Package" || $1=="CPU" || $1=="Core" || $1=="Node" {if(f!=1) print $0;f=1} $1=="-" {print $0}' + ((turbostat --show 'Package','Core','Bzy_MHz','PkgWatt','PkgTmp' -i 2 &) ; stress-ng --cpu 1 -t 20s 2>&1 ; stress-ng --cpu 0 -t 60s 2>&1 ; pkill -9 -f turbostat) | awk '$0~"stress" {print $0} $1=="Package" || $1=="CPU" || $1=="Core" || $1=="Node" {if(f!=1) print $0;f=1} $1=="-" {print $0}' superuser: true modprobe: msr - label: CPU Idle diff --git a/cmd/pmu2metrics/event_defs.go b/cmd/pmu2metrics/event_defs.go index 70b78e1..818559c 100644 --- a/cmd/pmu2metrics/event_defs.go +++ b/cmd/pmu2metrics/event_defs.go @@ -39,7 +39,14 @@ func LoadEventGroups(eventDefinitionOverridePath string, metadata Metadata) (gro return } } else { - if file, err = resources.Open(filepath.Join("resources", fmt.Sprintf("%s_events.txt", strings.ToLower(metadata.Microarchitecture)[:3]))); err != nil { + uarch := strings.ToLower(metadata.Microarchitecture)[:3] + // use alternate events/metrics when TMA fixed counters are not supported + alternate := "" + if (uarch == "icx" || uarch == "spr" || uarch == "emr") && !metadata.FixedCounterTMASupported { + alternate = "_nofixedtma" + } + eventFileName := fmt.Sprintf("%s%s_events.txt", uarch, alternate) + if file, err = resources.Open(filepath.Join("resources", eventFileName)); err != nil { return } } @@ -56,11 +63,7 @@ func LoadEventGroups(eventDefinitionOverridePath string, metadata Metadata) (gro if event, err = parseEventDefinition(line[:len(line)-1]); err != nil { return } - var collectable bool - if collectable, err = isCollectableEvent(event, metadata); err != nil { - return - } - if collectable { + if isCollectableEvent(event, metadata) { group = append(group, event) } else { uncollectableEvents.Add(event.Name) @@ -80,20 +83,20 @@ func LoadEventGroups(eventDefinitionOverridePath string, metadata Metadata) (gro } // expand uncore groups for all uncore devices groups, err = expandUncoreGroups(groups, metadata) - // "fixed" PMU counters are not supported on (most) IaaS VMs, so we add a separate group - if !isUncoreSupported(metadata) { - group = GroupDefinition{EventDefinition{Raw: "cpu-cycles"}, EventDefinition{Raw: "instructions"}} - if metadata.RefCyclesSupported { - group = append(group, EventDefinition{Raw: "ref-cycles"}) - } - groups = append(groups, group) - group = GroupDefinition{EventDefinition{Raw: "cpu-cycles:k"}, EventDefinition{Raw: "instructions"}} - if metadata.RefCyclesSupported { - group = append(group, EventDefinition{Raw: "ref-cycles:k"}) - } - groups = append(groups, group) + // // "fixed" PMU counters are not supported on (most) IaaS VMs, so we add a separate group + // if !isUncoreSupported(metadata) { + // group = GroupDefinition{EventDefinition{Raw: "cpu-cycles"}, EventDefinition{Raw: "instructions"}} + // if metadata.RefCyclesSupported { + // group = append(group, EventDefinition{Raw: "ref-cycles"}) + // } + // groups = append(groups, group) + // group = GroupDefinition{EventDefinition{Raw: "cpu-cycles:k"}, EventDefinition{Raw: "instructions"}} + // if metadata.RefCyclesSupported { + // group = append(group, EventDefinition{Raw: "ref-cycles:k"}) + // } + // groups = append(groups, group) - } + // } if uncollectableEvents.Cardinality() != 0 && gCmdLineArgs.verbose { log.Printf("Uncollectable events: %s", uncollectableEvents) } @@ -113,16 +116,14 @@ func isUncoreSupported(metadata Metadata) (supported bool) { } // isCollectableEvent confirms if given event can be collected on the platform -func isCollectableEvent(event EventDefinition, metadata Metadata) (collectable bool, err error) { - collectable = true - // TMA - if !metadata.TMASupported && (event.Name == "TOPDOWN.SLOTS" || strings.HasPrefix(event.Name, "PERF_METRICS.")) { - collectable = false - return +func isCollectableEvent(event EventDefinition, metadata Metadata) bool { + // fixed-counter TMA + if !metadata.FixedCounterTMASupported && (event.Name == "TOPDOWN.SLOTS" || strings.HasPrefix(event.Name, "PERF_METRICS.")) { + return false } // short-circuit for cpu events if event.Device == "cpu" && !strings.HasPrefix(event.Name, "OCR") { - return + return true } // short-circuit off-core response events if event.Device == "cpu" && @@ -130,15 +131,14 @@ func isCollectableEvent(event EventDefinition, metadata Metadata) (collectable b isUncoreSupported(metadata) && !(gCmdLineArgs.scope == ScopeProcess) && !(gCmdLineArgs.scope == ScopeCgroup) { - return + return true } // exclude uncore events when // - their corresponding device is not found // - not in system-wide collection scope if event.Device != "cpu" && event.Device != "" { if gCmdLineArgs.scope == ScopeProcess || gCmdLineArgs.scope == ScopeCgroup { - collectable = false - return + return false } deviceExists := false for uncoreDeviceName := range metadata.DeviceIDs { @@ -148,33 +148,29 @@ func isCollectableEvent(event EventDefinition, metadata Metadata) (collectable b } } if !deviceExists { - collectable = false + return false } else if !strings.Contains(event.Raw, "umask") && !strings.Contains(event.Raw, "event") { - collectable = false + return false } - return + return true } // if we got this far, event.Device is empty // is ref-cycles supported? if !metadata.RefCyclesSupported && strings.Contains(event.Name, "ref-cycles") { - collectable = false - return + return false } // no uncore means we're on a VM where cpu fixed cycles are likely not supported - if strings.Contains(event.Name, "cpu-cycles") && !isUncoreSupported(metadata) { - collectable = false - return - } + // if strings.Contains(event.Name, "cpu-cycles") && !isUncoreSupported(metadata) { + // return false + // } // no cstate and power events when collecting at process or cgroup scope if (gCmdLineArgs.scope == ScopeProcess || gCmdLineArgs.scope == ScopeCgroup) && (strings.Contains(event.Name, "cstate_") || strings.Contains(event.Name, "power/energy")) { - collectable = false - return + return false } // finally, if it isn't in the perf list output, it isn't collectable name := strings.Split(event.Name, ":")[0] - collectable = strings.Contains(metadata.PerfSupportedEvents, name) - return + return strings.Contains(metadata.PerfSupportedEvents, name) } // parseEventDefinition parses one line from the event definition file into a representative structure diff --git a/cmd/pmu2metrics/metadata.go b/cmd/pmu2metrics/metadata.go index 3cab212..3323827 100644 --- a/cmd/pmu2metrics/metadata.go +++ b/cmd/pmu2metrics/metadata.go @@ -28,23 +28,30 @@ import ( // Metadata is the representation of the platform's state and capabilities type Metadata struct { - CoresPerSocket int `yaml:"CoresPerSocket"` - CPUSocketMap map[int]int - DeviceIDs map[string][]int `yaml:"DeviceIDs"` - Microarchitecture string `yaml:"Microarchitecture"` - ModelName string - PerfSupportedEvents string `yaml:"PerfSupportedEvents"` - RefCyclesSupported bool `yaml:"RefCyclesSupported"` - SocketCount int `yaml:"SocketCount"` - ThreadsPerCore int `yaml:"ThreadsPerCore"` - TMASupported bool `yaml:"TMASupported"` - TSC int `yaml:"TSC"` - TSCFrequencyHz int `yaml:"TSCFrequencyHz"` + CoresPerSocket int `yaml:"CoresPerSocket"` + CPUSocketMap map[int]int + DeviceIDs map[string][]int `yaml:"DeviceIDs"` + FixedCounterTMASupported bool `yaml:"FixedCounterTMASupported"` + Microarchitecture string `yaml:"Microarchitecture"` + ModelName string + PerfSupportedEvents string `yaml:"PerfSupportedEvents"` + PMUDriverVersion string `yaml:"PMUDriverVersion"` + RefCyclesSupported bool `yaml:"RefCyclesSupported"` + SocketCount int `yaml:"SocketCount"` + ThreadsPerCore int `yaml:"ThreadsPerCore"` + TSC int `yaml:"TSC"` + TSCFrequencyHz int `yaml:"TSCFrequencyHz"` } // LoadMetadata - populates and returns a Metadata structure containing state of the // system. func LoadMetadata(perfPath string) (metadata Metadata, err error) { + // PMU driver version + metadata.PMUDriverVersion, err = getPMUDriverVersion() + if err != nil { + err = fmt.Errorf("failed to retrieve PMU driver version: %v", err) + return + } // reduce startup time by running the three perf commands in their own threads while // the rest of the metadata is being collected slowFuncChannel := make(chan error) @@ -68,15 +75,15 @@ func LoadMetadata(perfPath string) (metadata Metadata, err error) { } slowFuncChannel <- err }() - // TMA + // Fixed-counter TMA events go func() { var err error var output string - if metadata.TMASupported, output, err = getTMASupported(perfPath); err != nil { + if metadata.FixedCounterTMASupported, output, err = getFixedCounterTMASupported(perfPath); err != nil { err = fmt.Errorf("failed to determine if TMA is supported: %v", err) } - if !metadata.TMASupported && gCmdLineArgs.verbose { - log.Printf("TMA not supported:\n%s\n", output) + if !metadata.FixedCounterTMASupported && gCmdLineArgs.verbose { + log.Printf("TMA fixed counter not supported:\n%s\n", output) } slowFuncChannel <- err }() @@ -178,7 +185,8 @@ func (md Metadata) String() string { "TSC Frequency (Hz): %d, "+ "TSC: %d, "+ "ref-cycles supported: %t, "+ - "TMA events supported: %t, ", + "Fixed Counter TMA events supported: %t, "+ + "PMU Driver version: %s, ", md.ModelName, md.Microarchitecture, md.SocketCount, @@ -187,7 +195,8 @@ func (md Metadata) String() string { md.TSCFrequencyHz, md.TSC, md.RefCyclesSupported, - md.TMASupported) + md.FixedCounterTMASupported, + md.PMUDriverVersion) for deviceName, deviceIds := range md.DeviceIDs { var ids []string for _, id := range deviceIds { @@ -294,8 +303,9 @@ func getRefCyclesSupported(perfPath string) (supported bool, output string, err return } -// getTMASupported - checks if the TMA events are supported by perf -func getTMASupported(perfPath string) (supported bool, output string, err error) { +// getFixedCounterTMASupported - checks if the fixed TMA counter events are supported by perf +// We check for the TOPDOWN.SLOTS and PERF_METRICS.BAD_SPECULATION events as an indicator of support for fixed TMA counter support +func getFixedCounterTMASupported(perfPath string) (supported bool, output string, err error) { cmd := exec.Command(perfPath, "stat", "-a", "-e", "'{cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/,cpu/event=0x00,umask=0x81,period=10000003,name='PERF_METRICS.BAD_SPECULATION'/}'", "sleep", ".1") var outBuffer, errBuffer bytes.Buffer cmd.Stderr = &errBuffer @@ -307,22 +317,39 @@ func getTMASupported(perfPath string) (supported bool, output string, err error) err = nil return } - // event values being equal is 2nd indication that these events are not (properly) supported + // event values being zero or equal to each other is 2nd indication that these events are not (properly) supported output = errBuffer.String() vals := make(map[string]float64) lines := strings.Split(output, "\n") // example line: " 784333932 TOPDOWN.SLOTS (59.75%)" re := regexp.MustCompile(`\s+(\d+)\s+(\w*\.*\w*)\s+.*`) for _, line := range lines { + // count may include commas as thousands separators, remove them + line := strings.ReplaceAll(line, ",", "") match := re.FindStringSubmatch(line) if match != nil { vals[match[2]], err = strconv.ParseFloat(match[1], 64) if err != nil { - return + // this should never happen + panic("failed to parse float") } } } - supported = !(vals["TOPDOWN.SLOTS"] == vals["PERF_METRICS.BAD_SPECULATION"]) + topDownSlots := vals["TOPDOWN.SLOTS"] + badSpeculation := vals["PERF_METRICS.BAD_SPECULATION"] + supported = topDownSlots != badSpeculation && topDownSlots != 0 && badSpeculation != 0 + return +} + +func getPMUDriverVersion() (version string, err error) { + cmd := exec.Command("sh", "-c", `dmesg | grep -A 1 "Intel PMU driver" | tail -1 | awk '{print $NF}'`) + var outBuffer, errBuffer bytes.Buffer + cmd.Stderr = &errBuffer + cmd.Stdout = &outBuffer + if err = cmd.Run(); err != nil { + return + } + version = strings.TrimSpace(outBuffer.String()) return } diff --git a/cmd/pmu2metrics/metric_defs.go b/cmd/pmu2metrics/metric_defs.go index c733bad..434ea67 100644 --- a/cmd/pmu2metrics/metric_defs.go +++ b/cmd/pmu2metrics/metric_defs.go @@ -39,7 +39,14 @@ func LoadMetricDefinitions(metricDefinitionOverridePath string, selectedMetrics return } } else { - if bytes, err = resources.ReadFile(filepath.Join("resources", fmt.Sprintf("%s_metrics.json", strings.ToLower(metadata.Microarchitecture)[:3]))); err != nil { + uarch := strings.ToLower(metadata.Microarchitecture)[:3] + // use alternate events/metrics when TMA fixed counters are not supported + alternate := "" + if (uarch == "icx" || uarch == "spr" || uarch == "emr") && !metadata.FixedCounterTMASupported { + alternate = "_nofixedtma" + } + metricFileName := fmt.Sprintf("%s%s_metrics.json", uarch, alternate) + if bytes, err = resources.ReadFile(filepath.Join("resources", metricFileName)); err != nil { return } } @@ -112,7 +119,7 @@ func ConfigureMetrics(metrics []MetricDefinition, evaluatorFunctions map[string] metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[CHAS_PER_SOCKET]", chasPerSocket) metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[SOCKET_COUNT]", socketCount) metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[HYPERTHREADING_ON]", hyperThreadingOn) - metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[const_thread_count]", threadsPerCore) + metrics[metricIdx].Expression = strings.ReplaceAll(metrics[metricIdx].Expression, "[CONST_THREAD_COUNT]", threadsPerCore) // get a list of the variables in the expression metrics[metricIdx].Variables = make(map[string]int) expressionIdx := 0 diff --git a/cmd/pmu2metrics/resources/bdx_metrics.json b/cmd/pmu2metrics/resources/bdx_metrics.json index 7e46419..cef23e6 100644 --- a/cmd/pmu2metrics/resources/bdx_metrics.json +++ b/cmd/pmu2metrics/resources/bdx_metrics.json @@ -346,22 +346,22 @@ }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [const_thread_count]) if ([const_thread_count] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) ", + "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [CONST_THREAD_COUNT]) if ([CONST_THREAD_COUNT] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]) ", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [CONST_THREAD_COUNT]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [CONST_THREAD_COUNT]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_3m(%)", - "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [CONST_THREAD_COUNT]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -390,7 +390,7 @@ }, { "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]))", "origin": "perfspect" } ] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/clx_metrics.json b/cmd/pmu2metrics/resources/clx_metrics.json index 9974e96..f167036 100644 --- a/cmd/pmu2metrics/resources/clx_metrics.json +++ b/cmd/pmu2metrics/resources/clx_metrics.json @@ -202,7 +202,7 @@ }, { "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", + "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", "origin": "perfspect" }, { @@ -284,12 +284,12 @@ }, { "name": "metric_TMA_Info_cycles_both_threads_active(%)", - "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)", + "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [CONST_THREAD_COUNT] > 1 else 0)", "origin": "perfspect" }, { "name": "metric_TMA_Info_CoreIPC", - "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -298,7 +298,7 @@ }, { "name": "metric_TMA_..Frontend_Latency(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])", + "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -328,7 +328,7 @@ }, { "name": "metric_TMA_..Frontend_Bandwidth(%)", - "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]))", "origin": "perfspect" }, { @@ -416,17 +416,17 @@ }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -460,7 +460,7 @@ }, { "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])))", "origin": "perfspect" } ] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/emr_nofixedtma_events.txt b/cmd/pmu2metrics/resources/emr_nofixedtma_events.txt new file mode 100644 index 0000000..d767656 --- /dev/null +++ b/cmd/pmu2metrics/resources/emr_nofixedtma_events.txt @@ -0,0 +1,138 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Sapphire Rapids and Emerald Rapids event list for platforms that don't have support for the fixed counter +# TMA events, e.g., some AWS VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x9c,umask=0x01,name='IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xae,umask=0x01,name='UOPS_ISSUED.ANY'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x61,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0xa4,umask=0x08,name='TOPDOWN.BR_MISPREDICT_SLOTS'/, +cpu/event=0xa4,umask=0x10,name='TOPDOWN.MEMORY_BOUND_SLOTS'/, +cpu/event=0xc2,umask=0x01,name='UOPS_RETIRED.HEAVY'/, +cpu/event=0xe5,umask=0x03,name='MEM_UOP_RETIRED.ANY'/, +cpu/event=0xc0,umask=0x10,name='INST_RETIRED.MACRO_FUSED'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xb7,umask=0x02,period=2000003,name='EXE.AMX_BUSY'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +#OCR +cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#UPI +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +#CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; + +#power +power/energy-pkg/, +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json b/cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json new file mode 100644 index 0000000..587d6b1 --- /dev/null +++ b/cmd/pmu2metrics/resources/emr_nofixedtma_metrics.json @@ -0,0 +1,349 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [UOPS_RETIRED.MS:c1:e1] / ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....AMX_Busy(%)", + "expression": "100 * ( [EXE.AMX_BUSY] / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [MEM_UOP_RETIRED.ANY] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Fused_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [INST_RETIRED.MACRO_FUSED] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Non_Fused_Branches(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * ( [BR_INST_RETIRED.ALL_BRANCHES] - [INST_RETIRED.MACRO_FUSED] ) / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/icx_nofixedtma_events.txt b/cmd/pmu2metrics/resources/icx_nofixedtma_events.txt new file mode 100644 index 0000000..00c16cc --- /dev/null +++ b/cmd/pmu2metrics/resources/icx_nofixedtma_events.txt @@ -0,0 +1,148 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Icelake event list for platforms that don't have support for the fixed counter TMA events, e.g., some AWS +# VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xc3,umask=0x01,cmask=0x01,edge=0x01,period=100003,name='MACHINE_CLEARS.COUNT'/, +cpu/event=0xc5,umask=0x00,period=50021,name='BR_MISP_RETIRED.ALL_BRANCHES'/, +cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu/event=0x48,umask=0x02,period=1000003,name='L1D_PEND_MISS.FB_FULL_PERIODS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x80,umask=0x04,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,name='ICACHE_TAG.STALLS'/, +cpu/event=0x79,umask=0x30,name='IDQ.MS_SWITCHES'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x0d,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xab,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0x0D,umask=0x01,name='INT_MISC.CLEARS_COUNT'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xd0,umask=0x83,name='MEM_INST_RETIRED.ANY'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd0,umask=0x21,cmask=0x00,period=100007,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, +cpu/event=0xd1,umask=0x08,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x05,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0x14,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIVIDER_ACTIVE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x30,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0x56,umask=0x01,period=100003,name='UOPS_DECODED.DEC0'/, +cpu/event=0x56,umask=0x01,cmask=0x01,period=100003,name='UOPS_DECODED.DEC0:c1'/, +cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +# OCR +cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; + +# C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +# UPI +upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +# CHA +cha/event=0x00,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; +cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +# memory read/writes +imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; + +# power +power/energy-pkg/, +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json b/cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json new file mode 100644 index 0000000..5325629 --- /dev/null +++ b/cmd/pmu2metrics/resources/icx_nofixedtma_metrics.json @@ -0,0 +1,329 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycles", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[instructions] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "(([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [IDQ.MS_SWITCHES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) + ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) - ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [MEM_INST_RETIRED.ANY] / [instructions] )" + }, + { + "name": "metric_TMA_....Branch_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [BR_INST_RETIRED.ALL_BRANCHES] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) - ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/skx_metrics.json b/cmd/pmu2metrics/resources/skx_metrics.json index 9974e96..f167036 100644 --- a/cmd/pmu2metrics/resources/skx_metrics.json +++ b/cmd/pmu2metrics/resources/skx_metrics.json @@ -202,7 +202,7 @@ }, { "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", + "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [CONST_THREAD_COUNT])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", "origin": "perfspect" }, { @@ -284,12 +284,12 @@ }, { "name": "metric_TMA_Info_cycles_both_threads_active(%)", - "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)", + "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [CONST_THREAD_COUNT] > 1 else 0)", "origin": "perfspect" }, { "name": "metric_TMA_Info_CoreIPC", - "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -298,7 +298,7 @@ }, { "name": "metric_TMA_..Frontend_Latency(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])", + "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -328,7 +328,7 @@ }, { "name": "metric_TMA_..Frontend_Bandwidth(%)", - "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT]))", "origin": "perfspect" }, { @@ -416,17 +416,17 @@ }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([CONST_THREAD_COUNT] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])", "origin": "perfspect" }, { @@ -460,7 +460,7 @@ }, { "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [CONST_THREAD_COUNT])))", "origin": "perfspect" } ] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/spr_nofixedtma_events.txt b/cmd/pmu2metrics/resources/spr_nofixedtma_events.txt new file mode 100644 index 0000000..d767656 --- /dev/null +++ b/cmd/pmu2metrics/resources/spr_nofixedtma_events.txt @@ -0,0 +1,138 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Sapphire Rapids and Emerald Rapids event list for platforms that don't have support for the fixed counter +# TMA events, e.g., some AWS VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x9c,umask=0x01,name='IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xae,umask=0x01,name='UOPS_ISSUED.ANY'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x61,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0xa4,umask=0x08,name='TOPDOWN.BR_MISPREDICT_SLOTS'/, +cpu/event=0xa4,umask=0x10,name='TOPDOWN.MEMORY_BOUND_SLOTS'/, +cpu/event=0xc2,umask=0x01,name='UOPS_RETIRED.HEAVY'/, +cpu/event=0xe5,umask=0x03,name='MEM_UOP_RETIRED.ANY'/, +cpu/event=0xc0,umask=0x10,name='INST_RETIRED.MACRO_FUSED'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xb7,umask=0x02,period=2000003,name='EXE.AMX_BUSY'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +#OCR +cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#UPI +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +#CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; + +#power +power/energy-pkg/, +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json b/cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json new file mode 100644 index 0000000..587d6b1 --- /dev/null +++ b/cmd/pmu2metrics/resources/spr_nofixedtma_metrics.json @@ -0,0 +1,349 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [UOPS_RETIRED.MS:c1:e1] / ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....AMX_Busy(%)", + "expression": "100 * ( [EXE.AMX_BUSY] / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [MEM_UOP_RETIRED.ANY] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Fused_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [INST_RETIRED.MACRO_FUSED] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Non_Fused_Branches(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * ( [BR_INST_RETIRED.ALL_BRANCHES] - [INST_RETIRED.MACRO_FUSED] ) / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/cmd/pmu2metrics/resources/srf_events.txt b/cmd/pmu2metrics/resources/srf_events.txt index b57637d..49b3fe1 100644 --- a/cmd/pmu2metrics/resources/srf_events.txt +++ b/cmd/pmu2metrics/resources/srf_events.txt @@ -5,13 +5,107 @@ # SierraForest event list +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +cpu/event=0x08,umask=0x08,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/, +cpu/event=0x08,umask=0xe,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x49,umask=0xe,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x02,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/, +cpu/event=0x12,umask=0x04,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, cpu-cycles, ref-cycles, instructions; -cpu-cycles:k, -ref-cycles:k, -instructions:k; +cpu/event=0x2e,umask=0x41,name='LONGEST_LAT_CACHE.MISS'/, +cpu/event=0x2e,umask=0x4f,name='LONGEST_LAT_CACHE.REFERENCE'/, +cpu/event=0x85,umask=0xe,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0xd0,umask=0x21,name='MEM_UOPS_RETIRED.LOCK_LOADS'/, +cpu/event=0xd1,umask=0x02,name='MEM_LOAD_UOPS_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,name='MEM_LOAD_UOPS_RETIRED.L1_MISS'/, +cpu/event=0xd1,umask=0x1,name='MEM_LOAD_UOPS_RETIRED.L1_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x71,umask=0x00,name='TOPDOWN_FE_BOUND.ALL'/, +cpu/event=0x71,umask=0x20,name='TOPDOWN_FE_BOUND.ICACHE'/, +cpu/event=0x71,umask=0x10,name='TOPDOWN_FE_BOUND.ITLB_MISS'/, +cpu/event=0x71,umask=0x72,name='TOPDOWN_FE_BOUND.FRONTEND_LATENCY'/, +cpu/event=0x71,umask=0x40,name='TOPDOWN_FE_BOUND.BRANCH_RESTEER'/, +cpu/event=0x71,umask=0x8d,name='TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x80,umask=0x02,name='ICACHE.MISSES'/, +cpu/event=0x05,umask=0xf4,name='LD_HEAD.L1_BOUND_AT_RET'/, +cpu/event=0x72,umask=0x00,name='TOPDOWN_RETIRING.ALL'/, +cpu/event=0x73,umask=0x03,name='TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS'/, +cpu/event=0x73,umask=0x04,name='TOPDOWN_BAD_SPECULATION.MISPREDICT'/, +cpu/event=0x73,umask=0x00,name='TOPDOWN_BAD_SPECULATION.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x05,umask=0xff,name='LD_HEAD.ANY_AT_RET'/, +cpu/event=0x04,umask=0x07,name='MEM_SCHEDULER_BLOCK.ALL'/, +cpu/event=0x04,umask=0x01,name='MEM_SCHEDULER_BLOCK.ST_BUF'/, +cpu/event=0x74,umask=0x02,name='TOPDOWN_BE_BOUND.MEM_SCHEDULER'/, +cpu/event=0x74,umask=0x10,name='TOPDOWN_BE_BOUND.SERIALIZATION'/, +cpu/event=0x74,umask=0x00,name='TOPDOWN_BE_BOUND.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x05,umask=0x81,name='LD_HEAD.L1_MISS_AT_RET'/, +cpu/event=0x34,umask=0x6f,name='MEM_BOUND_STALLS_LOAD.ALL'/, +cpu/event=0x34,umask=0x01,name='MEM_BOUND_STALLS_LOAD.L2_HIT'/, +cpu/event=0x34,umask=0x06,name='MEM_BOUND_STALLS_LOAD.LLC_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x8000100000004477,name='OCR.READS_TO_CORE.OUTSTANDING'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x100000014477,name='OCR.READS_TO_CORE.ANY_RESPONSE'/; + +cpu/event=0xB7,umask=0x01,offcore_rsp=0x101030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xB7,umask=0x01,offcore_rsp=0x100830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x10244,name='OCR.L2_CODE_MISS'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x10070,name='OCR.HWPF_L2.ANY_RESPONSE'/; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x1010003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x1008003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD'/; + +#CHA (Cache) +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0x00C827FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT'/, +cha/event=0x35,umask=0x00C8A7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF'/, +cha/event=0x35,umask=0x00C80FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0x00C88FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/; + +cha/event=0x35,umask=0x00CCD7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0x00C807FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO'/, +cha/event=0x35,umask=0x00C887FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF'/, +cha/event=0x35,umask=0x00CCC7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0x00C8F3FF04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0x00CC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0x00CD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/; + +cha/event=0x36,umask=0x00C827FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT'/; + +cha/event=0x36,umask=0x00C807FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xCF,name='UNC_M_CAS_COUNT_SCH0.RD'/, +imc/event=0x05,umask=0xF0,name='UNC_M_CAS_COUNT_SCH0.WR'/, +imc/event=0x06,umask=0xCF,name='UNC_M_CAS_COUNT_SCH1.RD'/, +imc/event=0x06,umask=0xF0,name='UNC_M_CAS_COUNT_SCH1.WR'/; #C6 cstate_core/c6-residency/; @@ -19,4 +113,4 @@ cstate_pkg/c6-residency/; #power power/energy-pkg/, -power/energy-ram/; \ No newline at end of file +power/energy-ram/; diff --git a/cmd/pmu2metrics/resources/srf_metrics.json b/cmd/pmu2metrics/resources/srf_metrics.json index 1f38f6e..615d432 100644 --- a/cmd/pmu2metrics/resources/srf_metrics.json +++ b/cmd/pmu2metrics/resources/srf_metrics.json @@ -36,5 +36,270 @@ "name": "metric_giga_instructions_per_sec", "expression": "[instructions] / 1000000000", "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_UOPS_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]" + }, + { + "name": "metric_L1D demand data read MPI", + "name-txn": "metric_L1D demand data read misses per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1-I code read misses (w/ prefetches) per txn", + "expression": "[ICACHE.MISSES] / [instructions]", + "expression-txn": "[ICACHE.MISSES] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[LONGEST_LAT_CACHE.REFERENCE] / [instructions]", + "expression-txn": "[LONGEST_LAT_CACHE.REFERENCE] / [TXN]" + }, + { + "name": "metric_L2 code MPI", + "name-txn": "metric_L2 code misses per txn", + "expression": "[OCR.L2_CODE_MISS] / [instructions]", + "expression-txn": "[OCR.L2_CODE_MISS] / [TXN]" + }, + { + "name": "metric_L2 Any local request that HITM in another module (per instr)", + "name-txn": "metric_L2 Any local request that HITM in another module per txn", + "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_L2 Any local request that HIT in another module and forwarded(per instr)", + "name-txn": "metric_L2 Any local request that HIT in another module and forwarded per txn", + "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_L2 all L2 prefetches(per instr)", + "name-txn": "metric_L2 all L2 prefetches per txn", + "expression": "[OCR.HWPF_L2.ANY_RESPONSE] / [instructions]", + "expression-txn": "[OCR.HWPF_L2.ANY_RESPONSE] / [TXN]" + }, + { + "name": "metric_data_read_L2_Miss_Latency_using_ORO_events(ns)", + "expression": "( 1000000000 * ([OCR.READS_TO_CORE.OUTSTANDING] / [OCR.READS_TO_CORE.ANY_RESPONSE]) / ([cpu-cycles] / [TSC] * [SYSTEM_TSC_FREQ]) )" + }, + { + "name": "metric_L3 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L3 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[LONGEST_LAT_CACHE.MISS] / [instructions]", + "expression-txn": "[LONGEST_LAT_CACHE.MISS] / [TXN]" + }, + { + "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [instructions]", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand RFO miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO] / [UNC_CHA_TOR_INSERTS.IA_MISS_RFO]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([LONGEST_LAT_CACHE.MISS]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD] + [UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 4KB page load MPI", + "name-txn": "metric_DTLB (2nd level) 4KB page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 1GB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 1GB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_LATENCY] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ICACHE] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ITLB_MISS] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....Branch_Resteer(%)", + "expression": "100*([TOPDOWN_FE_BOUND.BRANCH_RESTEER] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( [TOPDOWN_BAD_SPECULATION.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100*([TOPDOWN_BAD_SPECULATION.MISPREDICT] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100*([TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN_BE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100*min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100*([LD_HEAD.L1_BOUND_AT_RET] / [cpu-cycles])" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100*([MEM_BOUND_STALLS_LOAD.L2_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.L2_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100*([MEM_BOUND_STALLS_LOAD.LLC_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.LLC_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100*(([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL])" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100*max(0, 1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles]) - min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))))" + }, + { + "name": "metric_TMA_....Serialization(%)", + "expression": "100*([TOPDOWN_BE_BOUND.SERIALIZATION] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [TOPDOWN_RETIRING.ALL] / ( 6 * [cpu-cycles] ) )" } -] \ No newline at end of file +] diff --git a/cmd/reporter/report.go b/cmd/reporter/report.go index 7dc954e..e6ebdeb 100644 --- a/cmd/reporter/report.go +++ b/cmd/reporter/report.go @@ -45,6 +45,7 @@ func NewConfigurationReport(sources []*Source, CPUdb cpudb.CPUDB) (report *Repor newPowerTable(sources, Power), newUncoreTable(sources, CPUdb, Power), + newEfficiencyLatencyControlTable(sources, Power), }..., ) @@ -94,6 +95,7 @@ func NewBriefReport(sources []*Source, fullReport *Report, CPUdb cpudb.CPUDB) (r tableDiskSummary := newDiskSummaryTable(fullReport.findTable("Disk"), Storage) tableNicSummary := newNICSummaryTable(fullReport.findTable("NIC"), Network) tableAcceleratorSummary := newAcceleratorSummaryTable(fullReport.findTable("Accelerator"), CPUCategory) + tableEfficiencyLatencyControlSummary := newEfficiencyLatencyControlSummaryTable(fullReport.findTable("Efficiency Latency Control"), Power) report.Tables = append(report.Tables, []*Table{ fullReport.findTable("Host"), @@ -108,6 +110,7 @@ func NewBriefReport(sources []*Source, fullReport *Report, CPUdb cpudb.CPUDB) (r newBIOSSummaryTable(fullReport.findTable("BIOS"), Software), newOperatingSystemBriefTable(fullReport.findTable("Operating System"), Software), fullReport.findTable("Power"), + tableEfficiencyLatencyControlSummary, newVulnerabilitySummaryTable(fullReport.findTable("Vulnerability"), Security), newMarketingClaimTable(fullReport, tableNicSummary, tableDiskSummary, tableAcceleratorSummary, NoCategory), }..., diff --git a/cmd/reporter/report_generator_xlsx.go b/cmd/reporter/report_generator_xlsx.go index 79705e4..bd43e75 100644 --- a/cmd/reporter/report_generator_xlsx.go +++ b/cmd/reporter/report_generator_xlsx.go @@ -6,6 +6,7 @@ package main import ( "fmt" + "math" "os" "path/filepath" "strconv" @@ -67,11 +68,10 @@ func renderExcelTable(tableHeaders []string, tableValues [][]string, f *excelize for _, header := range tableHeaders { // if possible, convert strings to floats before inserting into the sheet floatValue, err := strconv.ParseFloat(header, 64) - if err == nil { + if err == nil && !math.IsNaN(floatValue) { // if it's a number, right align it f.SetCellFloat(reportSheetName, cellName(col, row), floatValue, 1, 64) f.SetCellStyle(reportSheetName, cellName(col, row), cellName(col, row), boldAlignLeft) } else { - f.SetCellStr(reportSheetName, cellName(col, row), header) f.SetCellStyle(reportSheetName, cellName(col, row), cellName(col, row), bold) } @@ -85,7 +85,7 @@ func renderExcelTable(tableHeaders []string, tableValues [][]string, f *excelize for rowIdx, value := range rowValues { // if possible, convert strings to floats before inserting into the sheet floatValue, err := strconv.ParseFloat(value, 64) - if err == nil { + if err == nil && !math.IsNaN(floatValue) { // if it's a number, right align it f.SetCellFloat(reportSheetName, cellName(col, row), floatValue, 1, 64) f.SetCellStyle(reportSheetName, cellName(col, row), cellName(col, row), alignLeft) } else { diff --git a/cmd/reporter/report_tables.go b/cmd/reporter/report_tables.go index d4b7f29..32f9231 100644 --- a/cmd/reporter/report_tables.go +++ b/cmd/reporter/report_tables.go @@ -40,9 +40,9 @@ func newMarketingClaimTable(fullReport *Report, tableNicSummary *Table, tableDis Category: category, AllHostValues: []HostValues{}, } - // BASELINE: 1-node, 2x Intel® Xeon® , xx cores, HT On/Off?, Turbo On/Off?, NUMA xxx, Integrated Accelerators Available [used]: xxx, Total Memory xxx GB (xx slots/ xx GB/ xxxx MHz [run @ xxxx MHz] ), , , , . Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of . - template := "1-node, %sx %s, %s cores, HT %s, Turbo %s, NUMA %s, Integrated Accelerators Available [used]: %s, Total Memory %s, BIOS %s, microcode %s, %s, %s, %s, %s. Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of %s." - var date, socketCount, cpuModel, coreCount, htOnOff, turboOnOff, numaNodes, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion string + // BASELINE: 1-node, 2x Intel® Xeon® , xx cores, 100W TDP, HT On/Off?, Turbo On/Off?, NUMA xxx, Integrated Accelerators Available [used]: xxx, Total Memory xxx GB (xx slots/ xx GB/ xxxx MHz [run @ xxxx MHz] ), , , , . Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of . + template := "1-node, %sx %s, %s cores, %s TDP, HT %s, Turbo %s, NUMA %s, Integrated Accelerators Available [used]: %s, Total Memory %s, BIOS %s, microcode %s, %s, %s, %s, %s. Software: WORKLOAD+VERSION, COMPILER, LIBRARIES, OTHER_SW. Test by Intel as of %s." + var date, socketCount, cpuModel, coreCount, tdp, htOnOff, turboOnOff, numaNodes, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion string for sourceIdx, source := range fullReport.Sources { var hostValues = HostValues{ @@ -56,6 +56,10 @@ func newMarketingClaimTable(fullReport *Report, tableNicSummary *Table, tableDis socketCount, _ = fullReport.findTable("CPU").getValue(sourceIdx, "Sockets") cpuModel, _ = fullReport.findTable("CPU").getValue(sourceIdx, "CPU Model") coreCount, _ = fullReport.findTable("CPU").getValue(sourceIdx, "Cores per Socket") + tdp, _ = fullReport.findTable("Power").getValue(sourceIdx, "TDP") + if tdp == "" { + tdp = "?" + } hyperthreading, _ := fullReport.findTable("CPU").getValue(sourceIdx, "Hyperthreading") if hyperthreading == "Enabled" { htOnOff = "On" @@ -83,7 +87,7 @@ func newMarketingClaimTable(fullReport *Report, tableNicSummary *Table, tableDis disks, _ = tableDiskSummary.getValue(sourceIdx, "Disk") operatingSystem, _ = fullReport.findTable("Operating System").getValue(sourceIdx, "OS") kernelVersion, _ = fullReport.findTable("Operating System").getValue(sourceIdx, "Kernel") - claim := fmt.Sprintf(template, socketCount, cpuModel, coreCount, htOnOff, turboOnOff, numaNodes, accelerators, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion, date) + claim := fmt.Sprintf(template, socketCount, cpuModel, coreCount, tdp, htOnOff, turboOnOff, numaNodes, accelerators, installedMem, biosVersion, uCodeVersion, nics, disks, operatingSystem, kernelVersion, date) hostValues.Values = append(hostValues.Values, []string{claim}) table.AllHostValues = append(table.AllHostValues, hostValues) } @@ -1014,6 +1018,49 @@ func newPowerTable(sources []*Source, category TableCategory) (table *Table) { return } +func newEfficiencyLatencyControlTable(sources []*Source, category TableCategory) (table *Table) { + table = &Table{ + Name: "Efficiency Latency Control", + Category: category, + AllHostValues: []HostValues{}, + } + for _, source := range sources { + var hostValues = HostValues{ + Name: source.getHostname(), + } + hostValues.ValueNames, hostValues.Values = source.getEfficiencyLatencyControl() + table.AllHostValues = append(table.AllHostValues, hostValues) + } + return +} + +func newEfficiencyLatencyControlSummaryTable(tableELC *Table, category TableCategory) (table *Table) { + table = &Table{ + Name: "Efficiency Latency Control", + Category: category, + AllHostValues: []HostValues{}, + } + for _, srcHv := range tableELC.AllHostValues { + var hostValues = HostValues{ + Name: srcHv.Name, + ValueNames: []string{"ELC Mode"}, + } + var modes []string + for _, row := range srcHv.Values { + if row[9] != "" { + modes = append(modes, row[9]) + } + } + hostValues.Values = make([][]string, 1) + hostValues.Values[0] = append(hostValues.Values[0], strings.Join(modes, ", ")) + if hostValues.Values[0][0] == "" { + hostValues.Values[0][0] = "N/A" + } + table.AllHostValues = append(table.AllHostValues, hostValues) + } + return +} + func newGPUTable(sources []*Source, category TableCategory) (table *Table) { table = &Table{ Name: "GPU", @@ -1442,40 +1489,49 @@ func newDiskTable(sources []*Source, category TableCategory) (table *Table) { Category: category, AllHostValues: []HostValues{}, } + var infoFields = []string{"NAME", "MODEL", "SIZE", "MOUNTPOINT", "FSTYPE", "RQ-SIZE", "MIN-IO", "FIRMWARE", "ADDR", "NUMA", "LINKSPEED", "LINKWIDTH", "MAXLINKSPEED", "MAXLINKWIDTH"} for _, source := range sources { var hostValues = HostValues{ Name: source.getHostname(), ValueNames: []string{ - "NAME", - "MODEL", - "SIZE", - "MOUNTPOINT", - "FSTYPE", - "RQ-SIZE", - "MIN-IO", - "FwRev", + "Name", + "Model", + "Size", + "Mount Point", + "Type", + "Request Queue Size", + "Minimum I/O Size", + "Firmware Version", + "PCIe Address", + "NUMA Node", + "Link Speed", + "Link Width", + "Max Link Speed", + "Max Link Width", }, Values: [][]string{}, } - for i, line := range source.getCommandOutputLines("lsblk -r -o") { - fields := strings.Split(line, " ") - if len(fields) != len(hostValues.ValueNames)-1 { - log.Printf("lsblk field count mismatch: %s", strings.Join(fields, ",")) + for i, line := range source.getCommandOutputLines("disk info") { + fields := strings.Split(line, "|") + if len(fields) != len(infoFields) { + log.Printf("field count mismatch: %s", strings.Join(fields, ",")) continue } if i == 0 { // headers are in the first line for idx, field := range fields { - if field != hostValues.ValueNames[idx] { - log.Printf("lsblk field name mismatch: %s", strings.Join(fields, ",")) + if field != infoFields[idx] { + log.Printf("field name mismatch: %s", strings.Join(fields, ",")) break } } continue } // clean up the model name - fields[1] = strings.ReplaceAll(fields[1], `\x20`, " ") fields[1] = strings.TrimSpace(fields[1]) - fields = append(fields, source.getDiskFwRev(fields[0])) + // if we don't have a firmware version, try to get it from another source + if fields[7] == "" { + fields[7] = source.getDiskFwRev(fields[0]) + } hostValues.Values = append(hostValues.Values, fields) } table.AllHostValues = append(table.AllHostValues, hostValues) @@ -1601,6 +1657,7 @@ func newPMUTable(sources []*Source, category TableCategory) (table *Table) { var hostValues = HostValues{ Name: source.getHostname(), ValueNames: []string{ + "PMU Driver Version", "cpu_cycles", "instructions", "ref_cycles", @@ -1616,12 +1673,13 @@ func newPMUTable(sources []*Source, category TableCategory) (table *Table) { }, Values: [][]string{}, } - lines := source.getCommandOutputLines("msrbusy") var vals []string + vals = append(vals, source.getCommandOutputLine("pmu driver version")) + lines := source.getCommandOutputLines("msrbusy") if len(lines) == 2 { - vals = strings.Split(lines[1], "|") + vals = append(vals, strings.Split(lines[1], "|")...) } else { - for range hostValues.ValueNames { + for i := 0; i < len(hostValues.ValueNames)-1; i++ { vals = append(vals, "") } } diff --git a/cmd/reporter/resources/insights.grl b/cmd/reporter/resources/insights.grl index 508ad62..eec6e68 100644 --- a/cmd/reporter/resources/insights.grl +++ b/cmd/reporter/resources/insights.grl @@ -22,7 +22,8 @@ rule DIMMSpeed { (Report.GetValue("Configuration", "CPU", "Microarchitecture") == "CLX" && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 2933) || (Report.GetValue("Configuration", "CPU", "Microarchitecture") == "ICX" && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 3200) || (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("SPR") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 4800) || - (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("EMR") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 5600) + (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("EMR") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 5600) || + (Report.GetValue("Configuration", "CPU", "Microarchitecture").Contains("SRF") && Report.GetValueAsInt("Configuration", "DIMM", "Speed") < 6400) ) then Report.AddInsight( @@ -120,6 +121,18 @@ rule FrequencyGovernor { Retract("FrequencyGovernor"); } +rule ELCMode { + when + Report.GetValuesFromColumn("Configuration", "Efficiency Latency Control", 9).Count("Default") != 0 || + Report.GetValuesFromColumn("Configuration", "Efficiency Latency Control", 9).Count("Custom") != 0 + then + Report.AddInsight( + "Efficiency Latency Control mode is not set to 'Latency Optimized' on all IO dies.", + "Consider setting the Efficiency Latency Control mode to 'Latency Optimized'." + ); + Retract("ELCMode"); +} + rule TurboBoost { when Report.GetValue("Configuration", "CPU", "Intel Turbo Boost") != "" && diff --git a/cmd/reporter/source.go b/cmd/reporter/source.go index cbadef2..a971335 100644 --- a/cmd/reporter/source.go +++ b/cmd/reporter/source.go @@ -1013,7 +1013,7 @@ func (s *Source) getTDP() (val string) { msrHex := s.getCommandOutputLine("rdmsr 0x610") msr, err := strconv.ParseInt(msrHex, 16, 0) if err == nil && msr != 0 { - val = fmt.Sprint(msr/8) + " watts" + val = fmt.Sprint(msr/8) + "W" } return } @@ -1246,3 +1246,40 @@ func (s *Source) getPMUMetrics() (orderedMetricNames []string, timeStamps []floa } return } + +func (s *Source) getEfficiencyLatencyControl() (valueNames []string, values [][]string) { + output := strings.Join(s.getCommandOutputLines("efficiency latency control"), "\n") + if output == "" { + return + } + r := csv.NewReader(strings.NewReader(output)) + rows, err := r.ReadAll() + if err != nil { + log.Printf("failed to read ELC CSV") + return + } + if len(rows) < 2 { + log.Printf("no ELC data found") + return + } + // first row is headers / valueNames + valueNames = rows[0] + // 2nd-nth rows are values + values = rows[1:] + // let's add an interpretation of the values in an additional column + valueNames = append(valueNames, "Mode") + for i, row := range values { + var mode string + if row[2] == "IO" { + if row[5] == "0" && row[6] == "0" && row[7] == "0" { + mode = "Latency Optimized" + } else if row[5] == "800" && row[6] == "10" && row[7] == "94" { + mode = "Default" + } else { + mode = "Custom" + } + } + values[i] = append(values[i], mode) + } + return +} diff --git a/go.mod b/go.mod index 60ea59f..8c12606 100644 --- a/go.mod +++ b/go.mod @@ -30,8 +30,8 @@ require ( github.com/intel/svr-info/internal/util v0.0.0-00010101000000-000000000000 github.com/xuri/excelize/v2 v2.8.1 golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 - golang.org/x/term v0.19.0 - golang.org/x/text v0.14.0 + golang.org/x/term v0.21.0 + golang.org/x/text v0.16.0 gopkg.in/yaml.v2 v2.4.0 ) @@ -64,11 +64,11 @@ require ( github.com/xuri/nfp v0.0.0-20240318013403-ab9948c2c4a7 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.22.0 // indirect + golang.org/x/crypto v0.23.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.24.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.19.0 // indirect - golang.org/x/tools v0.20.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/go.sum b/go.sum index 388c583..b76bf38 100644 --- a/go.sum +++ b/go.sum @@ -40,7 +40,6 @@ github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+ github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= -github.com/go-git/go-git/v5 v5.11.0/go.mod h1:6GFcX2P3NM7FPBfpePbpLd21XxsgdAt+lKqXmCUiUCY= github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -79,8 +78,6 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= -github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -114,8 +111,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 h1:ESSUROHIBHg7USnszlcdmjBEwdMj9VUvU+OPk4yl2mc= golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4= @@ -131,8 +128,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -151,15 +148,15 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -167,14 +164,14 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= -golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/progress/go.mod b/internal/progress/go.mod index 99b9e35..9fd395c 100644 --- a/internal/progress/go.mod +++ b/internal/progress/go.mod @@ -2,6 +2,6 @@ module github.com/svr-info/internal/progress go 1.22 -require golang.org/x/term v0.19.0 +require golang.org/x/term v0.21.0 -require golang.org/x/sys v0.19.0 // indirect +require golang.org/x/sys v0.21.0 // indirect diff --git a/internal/progress/go.sum b/internal/progress/go.sum index 54492a6..6208220 100644 --- a/internal/progress/go.sum +++ b/internal/progress/go.sum @@ -1,4 +1,4 @@ -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= diff --git a/third_party/build.Dockerfile b/third_party/build.Dockerfile index 7ad9c56..0c72d59 100644 --- a/third_party/build.Dockerfile +++ b/third_party/build.Dockerfile @@ -4,7 +4,7 @@ # build image (third_party directory): # $ GITHUB_ACCESS_TOKEN= # $ docker image build -f build.Dockerfile --tag svr-info-third-party:v1 . -FROM ubuntu:16.04 as builder +FROM ubuntu:18.04 as builder ENV LANG en_US.UTF-8 ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y apt-utils locales wget curl git netcat-openbsd software-properties-common jq zip unzip @@ -12,7 +12,7 @@ RUN locale-gen en_US.UTF-8 && echo "LANG=en_US.UTF-8" > /etc/default/locale RUN add-apt-repository ppa:git-core/ppa -y RUN apt-get update && apt-get install -y git build-essential autotools-dev automake \ gawk zlib1g-dev libtool libaio-dev libaio1 pandoc pkgconf libcap-dev docbook-utils \ - libreadline-dev default-jre default-jdk cmake flex bison + libreadline-dev default-jre default-jdk cmake flex bison libssl-dev ENV JAVA_HOME=/usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/version.txt b/version.txt index f161b5d..ed0edc8 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.10.0 \ No newline at end of file +2.11.0 \ No newline at end of file