From acc456ed9048e551cf2dc4c7f5795ea98d8eef47 Mon Sep 17 00:00:00 2001 From: hilldani Date: Fri, 19 May 2023 10:11:03 -0700 Subject: [PATCH] expanded TMA, updated html, similarity analysis, older kernel uncore detection (#32) --- Makefile | 1 + README.md | 19 +- _version.txt | 2 +- events/clx_skx.txt | 49 +- events/icx.txt | 236 +++++---- events/metric_icx.json | 48 +- events/metric_skx_clx.json | 755 ++++++++++++++------------- events/metric_spr.json | 37 +- events/spr.txt | 117 +++-- perf-collect.py | 19 +- perf-postprocess.py | 91 +++- similarity-analyzer/dopca.py | 696 +++++++++++++----------- similarity-analyzer/requirements.txt | 2 + src/base.html | 515 ++++++++++++++++++ src/basic_stats.py | 228 -------- src/icicle.py | 112 ---- src/perf_helpers.py | 2 + src/report.py | 55 -- 18 files changed, 1686 insertions(+), 1298 deletions(-) create mode 100644 src/base.html delete mode 100644 src/basic_stats.py delete mode 100644 src/icicle.py delete mode 100644 src/report.py diff --git a/Makefile b/Makefile index b32c931..1ba0b43 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,7 @@ build-public/postprocess: --add-data "./events/metric_bdx.json:." \ --add-data "./events/metric_icx.json:." \ --add-data "./events/metric_spr.json:." \ + --add-data "./src/base.html:." \ --runtime-tmpdir . \ --exclude-module readline cp $(TMPDIR)/dist/perf-postprocess build/ diff --git a/README.md b/README.md index 9f6811d..ad10f85 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # PerfSpect · [![Build](https://github.com/intel/PerfSpect/actions/workflows/build.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/build.yml)[![License](https://img.shields.io/badge/License-BSD--3-blue)](https://github.com/intel/PerfSpect/blob/master/LICENSE) -[Quick Start](#quick-start-requires-perf-installed) | [Requirements](#requirements) | [Build from source](#build-from-source) | [Caveats](#caveats) | [How to contribute](#how-to-contribute) +[Quick Start](#quick-start-requires-perf-installed) | [Output](#output) | [Requirements](#requirements) | [Build from source](#build-from-source) | [Caveats](#caveats) | [How to contribute](#how-to-contribute) PerfSpect is a system performance characterization tool built on top of linux perf. It contains two parts @@ -30,13 +30,22 @@ sudo ./perf-collect --timeout 10 ./perf-postprocess ``` +## Output + +perf-collect outputs: +1. `perfstat.csv`: raw event counts with system metadata + +perf-postprocess outputs: +1. `metric_out.sys.average.csv`: average metrics +2. `metric_out.sys.csv`: metric values at every interval +3. `metric_out.html`: html view of a few select metrics + +![basic_stats](https://raw.githubusercontent.com/wiki/intel/PerfSpect/newhtml.gif) + ## Deploy in Kubernetes Modify the template [deamonset.yml](docs/daemonset.yml) to deploy in kubernetes -![basic_stats](https://raw.githubusercontent.com/wiki/intel/PerfSpect/basic_stats.JPG) -![perfspect-demo1](https://raw.githubusercontent.com/wiki/intel/PerfSpect/demo.gif) - ## Requirements ### Packages: @@ -75,8 +84,6 @@ make ## Caveats 1. The tool can collect only the counters supported by underlying linux perf version. -2. If you run into locale issues - `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 4519: ordinal not in range(128)`, more than likely the locales needs to be set appropriately. You could also try running post-process step with `LC_ALL=C.UTF-8 LANG=C.UTF-8 ./perf-postprocess -r result.csv` -3. The html report creation is not yet supported for cid collection. ## How to contribute diff --git a/_version.txt b/_version.txt index 9d4f823..963ed7c 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.2.9 +1.2.10 diff --git a/events/clx_skx.txt b/events/clx_skx.txt index 59f51d6..b4d8bee 100644 --- a/events/clx_skx.txt +++ b/events/clx_skx.txt @@ -35,11 +35,25 @@ cstate_core/c6-residency/; cstate_pkg/c6-residency/; cpu/event=0xb0,umask=0x10,period=100003,name='OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0xa8,umask=0x01,period=2000003,name='LSD.UOPS'/, cpu-cycles, ref-cycles, instructions; +cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, +cpu/event=0xd1,umask=0x08,period=100003,name='MEM_LOAD_RETIRED.L1_MISS'/, +cpu/event=0x48,umask=0x02,cmask=0x01,period=2000003,name='L1D_PEND_MISS.FB_FULL:c1'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=2000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xa3,umask=0x10,cmask=0x16,period=2000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, +cpu/event=0xa3,umask=0x08,cmask=0x08,period=2000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + + #avx related power levels cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, @@ -48,9 +62,10 @@ cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/; cpu/event=0x3c,umask=0x0,period=2000003,name='CPU_CLK_UNHALTED.THREAD_ANY'/, cpu/event=0x9c,umask=0x01,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, -cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, +cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/; #INT_MISC.RECOVERY_CYCLES_ANY -cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/; +cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/, +cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES'/; cpu/event=0x79,umask=0x30,period=2000003,name='IDQ.MS_UOPS'/, cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, @@ -67,7 +82,7 @@ cpu-cycles; cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=2000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0x08,umask=0x20,period=2000003,name='DTLB_LOAD_MISSES.STLB_HIT'/, +cpu/event=0x08,umask=0x20,cmask=0x01,period=2000003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, cpu-cycles; @@ -90,18 +105,17 @@ cpu/event=0x60,umask=0x10,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTST cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, cpu-cycles; -cpu/event=0x60,umask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD'/, cpu/event=0x60,umask=0x01,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD'/, cpu-cycles; cpu/event=0x80,umask=0x4,name='ICACHE_16B.IFDATA_STALL'/, -cpu/event=0x80,umask=0x4,cmask=0x1,edge=0x1,name='ICACHE_16B_c1_e1_IFDATA_STALL'/, +cpu/event=0x80,umask=0x4,cmask=0x1,edge=0x1,name='ICACHE_16B.IFDATA_STALL:c1:e1'/, cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, cpu/event=0x85,umask=0x10,period=100003,name='ITLB_MISSES.WALK_ACTIVE'/, instructions, cpu-cycles; -cpu/event=0x49,umask=0x20,period=100003,name='DTLB_STORE_MISSES.STLB_HIT'/, +cpu/event=0x49,umask=0x20,cmask=0x01,period=100003,name='DTLB_STORE_MISSES.STLB_HIT:c1'/, cpu/event=0x49,umask=0x10,period=100003,name='DTLB_STORE_MISSES.WALK_ACTIVE'/, cpu/event=0x14,umask=0x01,period=2000003,name='ARITH.DIVIDER_ACTIVE'/, cpu/event=0xb1,umask=0x02,inv=0x1,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_NONE'/, @@ -111,7 +125,6 @@ cpu-cycles; cpu/event=0xb1,umask=0x02,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_1'/, cpu/event=0xb1,umask=0x02,cmask=0x2,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_2'/, cpu/event=0xb1,umask=0x02,cmask=0x3,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_3'/, -cpu/event=0xb1,umask=0x02,cmask=0x4,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_4'/, cpu-cycles; cpu/event=0x3c,umask=0x2,name='CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE'/, @@ -129,11 +142,6 @@ cpu/event=0xb7,umask=0x01,offcore_rsp=0x3FB80007F7,name='OCR.ALL_READS.L3_MISS_L imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; -#UNC_M_RPQ_INSERTS/OCCUPANCY -imc/event=0x10,umask=0x0,name='UNC_M_RPQ_INSERTS'/, -imc/event=0x80,umask=0x0,name='UNC_M_RPQ_OCCUPANCY'/, -imc/event=0,umask=0,name='UNC_M_CLOCKTICKS'/; - #demand reads local and remote collected separately cha/event=0x35,umask=0x21,config1=0x4043200000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40432'/, cha/event=0x36,umask=0x21,config1=0x4043200000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432'/; @@ -141,16 +149,10 @@ cha/event=0x35,umask=0x21,config1=0x4043100000000,name='UNC_CHA_TOR_INSERTS.IA_M cha/event=0x36,umask=0x21,config1=0x4043100000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431'/; #UNC_CHA_TOR_INSERTS.IA_MISS_CRD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD -cha/event=0x35,umask=0x21,config1=0x12CC023300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233'/, -cha/event=0x36,umask=0x21,config1=0x12CC023300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x12CC0233'/; +cha/event=0x35,umask=0x21,config1=0x12CC023300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233'/; #UNC_CHA_TOR_INSERTS.IA_MISS_RFO,UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO -cha/event=0x35,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033'/, -cha/event=0x36,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x12C40033'/; - -#UNC_CHA_TOR_INSERTS.IA_HIT_DRD,UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD -cha/event=0x35,umask=0x11,config1=0x4043300000000,name='UNC_CHA_TOR_INSERTS.IA_HIT.0x40433'/, -cha/event=0x36,umask=0x11,config1=0x4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_HIT.0x40433'/; +cha/event=0x35,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033'/; #UNC_CHA_TOR_INSERTS.IA_MISS_DEMAND_RD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_DEMAND_RD (demand data only - both local and remote) cha/event=0x35,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40433'/, @@ -158,9 +160,7 @@ cha/event=0x36,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA cha/event=0x0,umask=0x0,name='UNC_CHA_CLOCKTICKS'/; #UNC_CHA_TOR_INSERTS.IA_MISS_DRD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD -cha/event=0x35,umask=0x21,config1=0x12D4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433'/, -cha/event=0x36,umask=0x21,config1=0x12D4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x12D40433'/; -#cha/event=0xa5,umask=0x02,name='UNC_CHA_FAST_ASSERTED.HORZ'/; +cha/event=0x35,umask=0x21,config1=0x12D4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433'/; #IO bandwidth iio/event=0x83,umask=0x04,ch_mask=0x00,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0'/, @@ -180,7 +180,6 @@ upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; - #power related power/energy-pkg/, power/energy-ram/; diff --git a/events/icx.txt b/events/icx.txt index 822ff50..87afb61 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -4,67 +4,27 @@ ########################################################################################################### # Icelake event list (default) - cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, -cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, -cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xe6,umask=0x01,period=100003,name='BACLEARS.ANY'/, +cpu/event=0x0d,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, +cpu/event=0xc3,umask=0x01,cmask=0x01,edge=0x01,period=100003,name='MACHINE_CLEARS.COUNT'/, +cpu/event=0xc5,umask=0x00,period=50021,name='BR_MISP_RETIRED.ALL_BRANCHES'/, +cpu/event=0xb1,umask=0x01,cmask=0x03,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, -cpu/event=0xd1,umask=0x08,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, -cpu/event=0xd1,umask=0x04,period=100021,name='MEM_LOAD_RETIRED.L3_HIT'/, -cpu/event=0x48,umask=0x02,period=1000003,name='L1D_PEND_MISS.FB_FULL_PERIODS'/, -cpu-cycles, -ref-cycles; - -cpu/event=0xd2,umask=0x04,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD'/, -cpu/event=0xd2,umask=0x02,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD'/, -cpu/event=0xd2,umask=0x01,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS'/, -cpu/event=0x48,umask=0x04,period=1000003,name='L1D_PEND_MISS.L2_STALL'/, -cpu-cycles; - -cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, -cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, -cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, -cpu-cycles; - -cpu/event=0xa3,umask=0x08,cmask=0x08,period=1000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, -cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, -cpu/event=0x03,umask=0x02,period=100003,name='LD_BLOCKS.STORE_FORWARD'/, -cpu-cycles; - - cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, -cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, -cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, +cpu/event=0xb0,umask=0x10,period=100003,name='OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x80,umask=0x4,period=500009,name='ICACHE_16B.IFDATA_STALL'/, -cpu/event=0x80,umask=0x4,cmask=0x01,edge=0x01,period=500009,name='ICACHE_16B_c1_e1_IFDATA_STALL'/, -cpu/event=0x85,umask=0x10,period=100003,name='ITLB_MISSES.WALK_ACTIVE'/, -instructions, -cpu-cycles; - -cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, -cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, -cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, -cpu-cycles:k, -ref-cycles:k, -instructions:k; - -#C6 -cstate_core/c6-residency/; -cstate_pkg/c6-residency/; - -cpu/event=0xa8,umask=0x01,period=2000003,name='LSD.UOPS'/, -#avx related power levels +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, cpu/event=0x28,umask=0x20,period=200003,name='CORE_POWER.LVL2_TURBO_LICENSE'/, @@ -72,23 +32,31 @@ cpu-cycles, ref-cycles, instructions; -cpu/event=0x9c,umask=0x01,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, -cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, -cpu/event=0xd2,umask=0x02,period=1000003,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT'/, -cpu/event=0xd2,umask=0x04,period=1000003,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM'/, -cpu-cycles; +# OCR group 1 (ICX PMU supports a maximum of two OCR counters per group) +cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0xb0,umask=0x10,period=100003,name='OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, -cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_64B.IFTAG_STALL'/, +# OCR group 2 (ICX PMU supports a maximum of two OCR counters per group) +cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x60,umask=0x08,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, -cpu/event=0x60,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:c4'/, -cpu-cycles; +cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu/event=0x80,umask=0x4,period=500009,name='ICACHE_16B.IFDATA_STALL'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_64B.IFTAG_STALL'/, +cpu-cycles, +ref-cycles, +instructions; #TMA related cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/, @@ -96,78 +64,129 @@ cpu/event=0x00,umask=0x81,period=10000003,name='PERF_METRICS.BAD_SPECULATION'/, cpu/event=0x00,umask=0x83,period=10000003,name='PERF_METRICS.BACKEND_BOUND'/, cpu/event=0x00,umask=0x82,period=10000003,name='PERF_METRICS.FRONTEND_BOUND'/, cpu/event=0x00,umask=0x80,period=10000003,name='PERF_METRICS.RETIRING'/, +cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, cpu/event=0x0d,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, -cpu/event=0x0d,umask=0x01,cmask=0x01,edge=0x01,period=500009,name='INT_MISC.RECOVERY_CYCLES:c1:e1'/; - +cpu/event=0x0d,umask=0x01,cmask=0x01,edge=0x01,period=500009,name='INT_MISC.RECOVERY_CYCLES:c1:e1'/, +cpu-cycles, +ref-cycles, +instructions; +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x05,period=2000003,name='IDQ.DSB_CYCLES_OK'/, cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, cpu/event=0xa6,umask=0x80,period=2000003,name='EXE_ACTIVITY.3_PORTS_UTIL:u0x80'/, cpu/event=0xa2,umask=0x02,period=2000003,name='RESOURCE_STALLS.SCOREBOARD'/, +cpu/event=0x14,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIVIDER_ACTIVE'/, +cpu-cycles, +ref-cycles, instructions; -cpu/event=0x0d,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, -cpu/event=0xe6,umask=0x01,period=100003,name='BACLEARS.ANY'/, -cpu/event=0xc3,umask=0x01,cmask=0x01,edge=0x01,period=100003,name='MACHINE_CLEARS.COUNT'/, -cpu/event=0xc5,umask=0x00,period=50021,name='BR_MISP_RETIRED.ALL_BRANCHES'/, -cpu-cycles; - -cpu/event=0x3c,umask=0x02,period=25003,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, -cpu/event=0x3c,umask=0x08,period=2000003,name='CPU_CLK_UNHALTED.REF_DISTRIBUTED'/, -cpu/event=0x3c,umask=0x01,period=25003,name='CPU_CLK_UNHALTED.REF_XCLK_ANY'/, -cpu/event=0x14,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIVIDER_ACTIVE'/; +cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x08,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, +cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, +cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu-cycles, +ref-cycles, +instructions; -#offcore response -cpu/event=0xb7,umask=0x01,offcore_rsp=0x10003C0001,name='OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x8003C0001,name='OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=100007,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0x24,umask=0xe2,cmask=0x00,period=200003,name='L2_RQSTS.ALL_RFO'/, +cpu/event=0xd0,umask=0x82,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.ALL_STORES'/, +cpu/event=0x24,umask=0xc2,cmask=0x00,period=200003,name='L2_RQSTS.RFO_HIT'/, cpu-cycles, -ref-cycles; +ref-cycles, +instructions; -# OCR group 1 (ICX PMU supports a maximum of two OCR counters per group) -cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/; +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, +cpu/event=0xd1,umask=0x08,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, +cpu/event=0x48,umask=0x02,period=1000003,name='L1D_PEND_MISS.FB_FULL_PERIODS'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; +cpu/event=0xa3,umask=0x08,cmask=0x08,period=1000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, +cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0x60,umask=0x08,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, +cpu/event=0x60,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:c4'/, +cpu-cycles, +ref-cycles, +instructions; -# OCR group 2 (ICX PMU supports a maximum of two OCR counters per group) -cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/; +cpu/event=0xb7,umask=0x01,offcore_rsp=0x10003C0001,name='OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x8003C0001,name='OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD'/, +cpu/event=0xd2,umask=0x02,period=1000003,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT'/, +cpu/event=0xd2,umask=0x04,period=1000003,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM'/, +cpu-cycles, +ref-cycles, +instructions; -#power related -power/energy-pkg/, -power/energy-ram/; +cpu/event=0xd3,umask=0x02,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM'/, +cpu/event=0xd3,umask=0x01,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM'/, +cpu/event=0xd3,umask=0x08,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD'/, +cpu/event=0xd3,umask=0x04,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM'/, +cpu/event=0xb1,umask=0x10,cmask=0x00,period=2000003,name='UOPS_EXECUTED.X87'/, +cpu/event=0xb1,umask=0x01,cmask=0x00,period=2000003,name='UOPS_EXECUTED.THREAD'/, +cpu/event=0xc7,umask=0x02,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, +cpu/event=0xc7,umask=0x04,umask=0xfc,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, -cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, -cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, -cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0xd1,umask=0x80,cmask=0x00,period=100003,name='MEM_LOAD_RETIRED.LOCAL_PMM'/, +cpu/event=0xd3,umask=0x10,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM'/, +cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0xb1,umask=0x01,cmask=0x03,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, cpu/event=0x79,umask=0x30,period=100003,name='IDQ.MS_UOPS'/, cpu/event=0x56,umask=0x01,period=100003,name='UOPS_DECODED.DEC0'/, cpu/event=0x56,umask=0x01,cmask=0x01,period=100003,name='UOPS_DECODED.DEC0:c1'/, -cpu-cycles; +cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, +cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, +cpu/event=0x3c,umask=0x02,period=25003,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, +cpu/event=0x3c,umask=0x08,period=2000003,name='CPU_CLK_UNHALTED.REF_DISTRIBUTED'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; -cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0x60,umask=0x04,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO'/, +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x10003C0002,name='OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM'/, cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0x08,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, -cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, -cpu-cycles; +cpu-cycles, +ref-cycles, +instructions; -#UPI related -upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, -upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, -upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; -cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, -cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; +#power related +power/energy-pkg/, +power/energy-ram/; -cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, -cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/; +# UPI related +upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; +# CHA events +cha/event=0x00,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; +cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD'/, @@ -175,11 +194,6 @@ cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; -cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, -cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; - -cha/event=0x00,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; - #memory read/writes imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; \ No newline at end of file diff --git a/events/metric_icx.json b/events/metric_icx.json index a31e7b2..0f3679f 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -215,6 +215,14 @@ "name": "metric_TMA_..Fetch_Bandwidth(%)", "expression": "100 * ( max( 0 , ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) - ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS] ) ) ) )" }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, { "name": "metric_TMA_Bad_Speculation(%)", "expression": "100 * ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) )" @@ -244,8 +252,8 @@ "expression": "100 * ( min( ( 7 ) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE] , max( [CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [CYCLE_ACTIVITY.CYCLES_L1D_MISS] , 0 ) ) / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_......Store_Fwd_Blk(%)", - "expression": "100 * ( min( ( 13 * [LD_BLOCKS.STORE_FORWARD] / ( [cpu-cycles] ) ) , ( 1 ) ) )" + "name": "metric_TMA_......Lock_Latency(%)", + "expression": "100 * ( min( ( ( 16 * max( 0 , [MEM_INST_RETIRED.LOCK_LOADS] - [L2_RQSTS.ALL_RFO] ) + ( [MEM_INST_RETIRED.LOCK_LOADS] / [MEM_INST_RETIRED.ALL_STORES] ) * ( ( 10 ) * [L2_RQSTS.RFO_HIT] + ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO] ) ) ) ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_....L2_Bound(%)", @@ -255,21 +263,13 @@ "name": "metric_TMA_....L3_Bound(%)", "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" }, - { - "name": "metric_TMA_......Contested_Accesses(%)", - "expression": "100 * ( min( ( ( ( ( 48 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM] * ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD] ) ) ) + ( ( 47.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS] ) ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" - }, { "name": "metric_TMA_......Data_Sharing(%)", "expression": "100 * ( min( ( ( ( 47.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM] * ( 1 - ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD] ) ) ) ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { - "name": "metric_TMA_......L3_Hit_Latency(%)", - "expression": "100 * ( min( ( ( ( 23 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_RETIRED.L3_HIT] ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" - }, - { - "name": "metric_TMA_......SQ_Full(%)", - "expression": "100 * ( [L1D_PEND_MISS.L2_STALL] / ( [cpu-cycles] ) )" + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( min( ( ( ( [CYCLE_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) + ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) - ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) ) ) - ( min( ( ( ( ( 1 - ( ( ( 19 * ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + 10 * ( ( [MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) ) / ( ( 19 * ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + 10 * ( ( [MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) ) + ( 25 * ( ( [MEM_LOAD_RETIRED.LOCAL_PMM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) + 33 * ( ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) ) ) ) ) ) * ( [CYCLE_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) + ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) - ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) ) ) ) if ( ( 1000000 ) * ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM] + [MEM_LOAD_RETIRED.LOCAL_PMM] ) > [MEM_LOAD_RETIRED.L1_MISS] ) else 0 ) ) , ( 1 ) ) ) ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_......MEM_Bandwidth(%)", @@ -284,12 +284,12 @@ "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_..Core_Bound(%)", - "expression": "100 * ( max( 0 , ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) ) ) )" + "name": "metric_TMA_......False_Sharing(%)", + "expression": "100 * ( min( ( ( 48 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) * [OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM] / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { - "name": "metric_TMA_....Divider(%)", - "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) ) ) )" }, { "name": "metric_TMA_....Ports_Utilization(%)", @@ -319,6 +319,18 @@ "name": "metric_TMA_..Light_Operations(%)", "expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )" }, + { + "name": "metric_TMA_....FP_Arith(%)", + "expression": "100 * ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) ) )" + }, + { + "name": "metric_TMA_......FP_Scalar(%)", + "expression": "100 * ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) )" + }, + { + "name": "metric_TMA_......FP_Vector(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" + }, { "name": "metric_TMA_..Heavy_Operations(%)", "expression": "100 * ( ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] )" @@ -327,10 +339,6 @@ "name": "metric_TMA_....Microcode_Sequencer(%)", "expression": "100 * ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) )" }, - { - "name": "metric_TMA_Info_Core_CoreIPC", - "expression": "[instructions] / [CPU_CLK_UNHALTED.DISTRIBUTED]" - }, { "name": "metric_TMA_Info_System_SMT_2T_Utilization", "expression": "1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED] if [SOCKET_COUNT] > 1 else 0" diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index 2b207e8..8ac91ba 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -1,359 +1,398 @@ [ - { - "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([SYSTEM_TSC_FREQ] / 1000000000)" - }, - { - "name": "metric_CPU utilization %", - "expression": "100 * [ref-cycles] / [TSC]" - }, - { - "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [TSC]" - }, - { - "name": "metric_CPI", - "expression": "[cpu-cycles] / [instructions]" - }, - { - "name": "metric_kernel_CPI", - "expression": "[cpu-cycles:k] / [instructions:k]" - }, - { - "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "tags": "transaction", - "expression": "[L1D.REPLACEMENT] / [instructions]" - }, - { - "name": "metric_L1D demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]" - }, - { - "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" - }, - { - "name": "metric_L2 demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]" - }, - { - "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "expression": "[L2_LINES_IN.ALL] / [instructions]" - }, - { - "name": "metric_L2 demand data read MPI", - "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]" - }, - { - "name": "metric_L2 demand code MPI", - "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" - }, - { - "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [instructions]" - }, - { - "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]" - }, - { - "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [instructions]" - }, - { - "name": "metric_LLC total HITM (per instr)", - "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]" - }, - { - "name": "metric_LLC total HIT clean line forwards (per instr)", - "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]" - }, - { - "name": "metric_Average LLC data read miss latency (in clks)", - "expression": "[OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD] / [OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD]" - }, - { - "name": "metric_Average LLC data read miss latency (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40433]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" - }, - { - "name": "metric_Average LLC data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40432]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" - }, - { - "name": "metric_Average LLC data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" - }, - { - "name": "metric_ITLB MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" - }, - { - "name": "metric_ITLB large page MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" - }, - { - "name": "metric_DTLB load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" - }, - { - "name": "metric_DTLB 4KB page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]" - }, - { - "name": "metric_DTLB 2MB large page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" - }, - { - "name": "metric_DTLB 1GB large page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]" - }, - { - "name": "metric_DTLB store MPI", - "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" - }, - { - "name": "metric_DTLB load miss latency (in core clks)", - "expression": "[DTLB_LOAD_MISSES.WALK_ACTIVE] / [DTLB_LOAD_MISSES.WALK_COMPLETED]" - }, - { - "name": "metric_DTLB store miss latency (in core clks)", - "expression": "[DTLB_STORE_MISSES.WALK_ACTIVE] / [DTLB_STORE_MISSES.WALK_COMPLETED]" - }, - { - "name": "metric_ITLB miss latency (in core clks)", - "expression": "[ITLB_MISSES.WALK_ACTIVE] / [ITLB_MISSES.WALK_COMPLETED]" - }, - { - "name": "metric_NUMA %_Reads addressed to local DRAM", - "expression": "100 * [UNC_CHA_TOR_INSERTS.IA_MISS.0x40432] / ([UNC_CHA_TOR_INSERTS.IA_MISS.0x40432] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431])" - }, - { - "name": "metric_NUMA %_Reads addressed to remote DRAM", - "expression": "100 * [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431] / ([UNC_CHA_TOR_INSERTS.IA_MISS.0x40432] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431])" - }, - { - "name": "metric_UPI Data transmit BW (MB/sec) (only data)", - "expression": "[UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9) / 1000000" - }, - { - "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))" - }, - { - "name": "metric_uncore frequency GHz", - "expression": "[UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000" - }, - { - "name": "metric_package power (watts)", - "expression": "[power/energy-pkg/]" - }, - { - "name": "metric_DRAM power (watts)", - "expression": "[power/energy-ram/]" - }, - { - "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [TSC]" - }, - { - "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" - }, - { - "name": "metric_core % cycles in non AVX license", - "expression": "(100 * [CORE_POWER.LVL0_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])" - }, - { - "name": "metric_core % cycles in AVX2 license", - "expression": "(100 * [CORE_POWER.LVL1_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])" - }, - { - "name": "metric_core % cycles in AVX-512 license", - "expression": "(100 * [CORE_POWER.LVL2_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])" - }, - { - "name": "metric_core initiated local dram read bandwidth (MB/sec)", - "expression": "[OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP] * 64 / 1000000" - }, - { - "name": "metric_core initiated remote dram read bandwidth (MB/sec)", - "expression": "[OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP_ocr_msr_3fB80007f7] * 64 / 1000000" - }, - { - "name": "metric_memory bandwidth read (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.RD] * 64 / 1000000" - }, - { - "name": "metric_memory bandwidth write (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.WR] * 64 / 1000000" - }, - { - "name": "metric_memory bandwidth total (MB/sec)", - "expression": "([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000" - }, - { - "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", - "expression": "([UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3]) * 4 / 1000000" - }, - { - "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", - "expression": "([UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3]) * 4 / 1000000" - }, - { - "name": "metric_TMA_Info_cycles_both_threads_active(%)", - "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)" - }, - { - "name": "metric_TMA_Info_CoreIPC", - "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_Frontend_Bound(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CORE] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" - }, - { - "name": "metric_TMA_..Frontend_Latency(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])" - }, - { - "name": "metric_TMA_....ICache_Misses(%)", - "expression": "100 * ([ICACHE_16B.IFDATA_STALL] + 2 * [ICACHE_16B_c1_e1_IFDATA_STALL]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....ITLB_Misses(%)", - "expression": "100 * [ICACHE_64B.IFTAG_STALL] / [cpu-cycles]" - }, - { - "name": "metric_TMA_....Branch_Resteers(%)", - "expression": "100 * ([INT_MISC.CLEAR_RESTEER_CYCLES] + 9 * [BACLEARS.ANY]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......Mispredicts_Resteers(%)", - "expression": "100 * [INT_MISC.CLEAR_RESTEER_CYCLES] * ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......Clears_Resteers(%)", - "expression": "100 * [INT_MISC.CLEAR_RESTEER_CYCLES] * (1 - ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT]))) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......Unknown_Branches_Resteers(%)", - "expression": "100 * (9 * [BACLEARS.ANY]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_..Frontend_Bandwidth(%)", - "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" - }, - { - "name": "metric_TMA_Bad_Speculation(%)", - "expression": "100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + ((4 * [INT_MISC.RECOVERY_CYCLES_ANY]) / [const_thread_count])) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " - }, - { - "name": "metric_TMA_..Branch_Mispredicts(%)", - "expression": "100 * ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) " - }, - { - "name": "metric_TMA_..Machine_Clears(%)", - "expression": "100 * ([MACHINE_CLEARS.COUNT] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_Backend_Bound(%)", - "expression": "100 - (100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " - }, - { - "name": "metric_TMA_..Memory_Bound(%)", - "expression": "100 * (1 - (([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) / ([EXE_ACTIVITY.EXE_BOUND_0_PORTS] + [EXE_ACTIVITY.1_PORTS_UTIL] + ([EXE_ACTIVITY.2_PORTS_UTIL] if ([instructions] / [cpu-cycles]) > 1.8 else 0) + [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES])" - }, - { - "name": "metric_TMA_....L1_Bound(%)", - "expression": "100 * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......DTLB_Load(%)", - "expression": "100 * (7 * [DTLB_LOAD_MISSES.STLB_HIT] + [DTLB_LOAD_MISSES.WALK_ACTIVE]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......Store_Fwd_Blk(%)", - "expression": "100 * (13 * [LD_BLOCKS.STORE_FORWARD]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....L2_Bound(%)", - "expression": "100 * ([CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....L3_Bound(%)", - "expression": "100 * ([CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......L3_Latency(%)", - "expression": "100 * (((min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD], [cpu-cycles])) / [cpu-cycles]) - ((min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles])) / [cpu-cycles]))" - }, - { - "name": "metric_TMA_......L3_Bandwidth(%)", - "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6], [cpu-cycles])) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......SQ_Full(%)", - "expression": "100 * ([OFFCORE_REQUESTS_BUFFER.SQ_FULL] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_....MEM_Bound(%)", - "expression": "100 * [CYCLE_ACTIVITY.STALLS_L3_MISS] / [cpu-cycles]" - }, - { - "name": "metric_TMA_......MEM_Bandwidth(%)", - "expression": "100 * min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......MEM_Latency(%)", - "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD] , [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles]))/ [cpu-cycles]" - }, - { - "name": "metric_TMA_....Store_Bound(%)", - "expression": "100 * [EXE_ACTIVITY.BOUND_ON_STORES] / [cpu-cycles]" - }, - { - "name": "metric_TMA_......DTLB_Store(%)", - "expression": "100 * (7 * [DTLB_STORE_MISSES.STLB_HIT] + [DTLB_STORE_MISSES.WALK_ACTIVE]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_..Core_Bound(%)", - "expression": "100 * (1 - (([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))) * (1 - (([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) / ([EXE_ACTIVITY.EXE_BOUND_0_PORTS] + [EXE_ACTIVITY.1_PORTS_UTIL] + ([EXE_ACTIVITY.2_PORTS_UTIL] if ([instructions] / [cpu-cycles]) > 1.8 else 0) + [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES])))" - }, - { - "name": "metric_TMA_....Divider(%)", - "expression": "100 * [ARITH.DIVIDER_ACTIVE] / [cpu-cycles]" - }, - { - "name": "metric_TMA_....Ports_Utilization(%)", - "expression": "100 * (([EXE_ACTIVITY.EXE_BOUND_0_PORTS] + [EXE_ACTIVITY.1_PORTS_UTIL] + ([EXE_ACTIVITY.2_PORTS_UTIL] if ([instructions] / [cpu-cycles]) > 1.8 else 0) + [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) - [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [EXE_ACTIVITY.BOUND_ON_STORES]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......0_Port_Utilized(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_......1_Port_Utilized(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_......2_Port_Utilized(%)", - "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_......3m_Ports_Utilized(%)", - "expression": "100 * [UOPS_EXECUTED.CORE_CYCLES_GE_3] / [CPU_CLK_UNHALTED.THREAD_ANY]" - }, - { - "name": "metric_TMA_Retiring(%)", - "expression": "100 * [UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" - }, - { - "name": "metric_TMA_..Base(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) - (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))))" - }, - { - "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))" - } -] \ No newline at end of file + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "expression": "[cpu-cycles] / [instructions]" + }, + { + "name": "metric_kernel_CPI", + "expression": "[cpu-cycles:k] / [instructions:k]", + "origin": "perfspect" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]" + }, + { + "name": "metric_L2 demand data read MPI", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]" + }, + { + "name": "metric_L2 demand code MPI", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" + }, + { + "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [instructions]" + }, + { + "name": "metric_LLC total HITM (per instr)", + "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr)", + "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency (in clks)", + "expression": "[OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD] / [OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency (in ns)", + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40433]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency for LOCAL requests (in ns)", + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40432]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency for REMOTE requests (in ns)", + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )", + "origin": "perfspect" + }, + { + "name": "metric_ITLB MPI", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" + }, + { + "name": "metric_ITLB large page MPI", + "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + }, + { + "name": "metric_DTLB load MPI", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" + }, + { + "name": "metric_DTLB 4KB page load MPI", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_DTLB 2MB large page load MPI", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + }, + { + "name": "metric_DTLB 1GB large page load MPI", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_DTLB store MPI", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" + }, + { + "name": "metric_DTLB load miss latency (in core clks)", + "expression": "[DTLB_LOAD_MISSES.WALK_ACTIVE] / [DTLB_LOAD_MISSES.WALK_COMPLETED]", + "origin": "perfspect" + }, + { + "name": "metric_DTLB store miss latency (in core clks)", + "expression": "[DTLB_STORE_MISSES.WALK_ACTIVE] / [DTLB_STORE_MISSES.WALK_COMPLETED]", + "origin": "perfspect" + }, + { + "name": "metric_ITLB miss latency (in core clks)", + "expression": "[ITLB_MISSES.WALK_ACTIVE] / [ITLB_MISSES.WALK_COMPLETED]", + "origin": "perfspect" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * [UNC_CHA_TOR_INSERTS.IA_MISS.0x40432] / ([UNC_CHA_TOR_INSERTS.IA_MISS.0x40432] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431] / ([UNC_CHA_TOR_INSERTS.IA_MISS.0x40432] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431])" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_UPI Transmit utilization_% (includes control)", + "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))", + "origin": "perfspect" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_core % cycles in non AVX license", + "expression": "(100 * [CORE_POWER.LVL0_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", + "origin": "perfspect" + }, + { + "name": "metric_core % cycles in AVX2 license", + "expression": "(100 * [CORE_POWER.LVL1_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", + "origin": "perfspect" + }, + { + "name": "metric_core % cycles in AVX-512 license", + "expression": "(100 * [CORE_POWER.LVL2_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", + "origin": "perfspect" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "[OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP] * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "[OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP_ocr_msr_3fB80007f7] * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "(([UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3]) * 4 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3]) * 4 / 1000000) / 1" + }, + { + "name": "metric_TMA_Info_cycles_both_threads_active(%)", + "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)", + "origin": "perfspect" + }, + { + "name": "metric_TMA_Info_CoreIPC", + "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Frontend_Latency(%)", + "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( ( [ICACHE_16B.IFDATA_STALL] + 2 * [ICACHE_16B.IFDATA_STALL:c1:e1] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_64B.IFTAG_STALL] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....Branch_Resteers(%)", + "expression": "100 * ( [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) + ( ( 9 ) * [BACLEARS.ANY] / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_......Mispredicts_Resteers(%)", + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......Clears_Resteers(%)", + "expression": "100 * ( ( 1 - ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) ) * [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......Unknown_Branches_Resteers(%)", + "expression": "100 * (9 * [BACLEARS.ANY]) / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_TMA_..Frontend_Bandwidth(%)", + "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "origin": "perfspect" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( 1 - ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( [UOPS_ISSUED.ANY] + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( 1 - ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( [UOPS_ISSUED.ANY] + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_......DTLB_Load(%)", + "expression": "100 * ( min( ( 9 ) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE] , max( [CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [CYCLE_ACTIVITY.CYCLES_L1D_MISS] , 0 ) ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......Store_Fwd_Blk(%)", + "expression": "100 * ( min( ( 13 * [LD_BLOCKS.STORE_FORWARD] / ( [cpu-cycles] ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL:c1] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......L3_Latency(%)", + "expression": "100 * (((min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD], [cpu-cycles])) / [cpu-cycles]) - ((min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles])) / [cpu-cycles]))", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......L3_Bandwidth(%)", + "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6], [cpu-cycles])) / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......SQ_Full(%)", + "expression": "100 * ( ( ( [OFFCORE_REQUESTS_BUFFER.SQ_FULL] / 2 ) if [HYPERTHREADING_ON] else [OFFCORE_REQUESTS_BUFFER.SQ_FULL] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....MEM_Bound(%)", + "expression": "100 * [CYCLE_ACTIVITY.STALLS_L3_MISS] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......MEM_Bandwidth(%)", + "expression": "100 * min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles]) / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......MEM_Latency(%)", + "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD] , [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles]))/ [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......DTLB_Store(%)", + "expression": "100 * ( min( ( ( ( 9 ) * [DTLB_STORE_MISSES.STLB_HIT:c1] + [DTLB_STORE_MISSES.WALK_ACTIVE] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( ( 1 - ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( [UOPS_ISSUED.ANY] + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( 1 - ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( [UOPS_ISSUED.ANY] + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....Ports_Utilization(%)", + "expression": "100 * ( ( [EXE_ACTIVITY.EXE_BOUND_0_PORTS] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) ) / ( [cpu-cycles] ) if ( [ARITH.DIVIDER_ACTIVE] < ( [CYCLE_ACTIVITY.STALLS_TOTAL] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) ) else ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......0_Port_Utilized(%)", + "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......1_Port_Utilized(%)", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......2_Port_Utilized(%)", + "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......3m_Ports_Utilized(%)", + "expression": "100 * [UOPS_EXECUTED.CORE_CYCLES_GE_3] / [CPU_CLK_UNHALTED.THREAD_ANY]", + "origin": "perfspect" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Base(%)", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) - (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))))", + "origin": "perfspect" + }, + { + "name": "metric_TMA_..Microcode_Sequencer(%)", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", + "origin": "perfspect" + } +] diff --git a/events/metric_spr.json b/events/metric_spr.json index 08c89c4..c992a99 100644 --- a/events/metric_spr.json +++ b/events/metric_spr.json @@ -207,6 +207,14 @@ "name": "metric_TMA_..Fetch_Bandwidth(%)", "expression": "100 * ( max( 0 , ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) - ( ( [PERF_METRICS.FETCH_LATENCY] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) ) ) )" }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, { "name": "metric_TMA_Bad_Speculation(%)", "expression": "100 * ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) )" @@ -236,8 +244,8 @@ "expression": "100 * ( min( ( 7 ) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE] , max( [CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [MEMORY_ACTIVITY.CYCLES_L1D_MISS] , 0 ) ) / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_......Split_Loads(%)", - "expression": "100 * ( min( ( ( [L1D_PEND_MISS.PENDING] / [MEM_LOAD_COMPLETED.L1_MISS_ANY] ) * [LD_BLOCKS.NO_SR] / ( [cpu-cycles] ) ) , ( 1 ) ) )" + "name": "metric_TMA_......Lock_Latency(%)", + "expression": "100 * ( min( ( ( 16 * max( 0 , [MEM_INST_RETIRED.LOCK_LOADS] - [L2_RQSTS.ALL_RFO] ) + ( [MEM_INST_RETIRED.LOCK_LOADS] / [MEM_INST_RETIRED.ALL_STORES] ) * ( ( 10 ) * [L2_RQSTS.RFO_HIT] + ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO] ) ) ) ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_....L2_Bound(%)", @@ -247,6 +255,14 @@ "name": "metric_TMA_....L3_Bound(%)", "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" }, + { + "name": "metric_TMA_......Data_Sharing(%)", + "expression": "100 * ( min( ( ( ( 79.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * ( 1 - ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD] ) ) ) ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( min( ( ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) - ( min( ( ( ( ( 1 - ( ( ( 19 * ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + 10 * ( ( [MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) ) / ( ( 19 * ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + 10 * ( ( [MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) ) + ( 25 * ( ( [MEM_LOAD_RETIRED.LOCAL_PMM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) + 33 * ( ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) ) ) ) ) ) ) * ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) ) if ( ( 1000000 ) * ( [MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM] + [MEM_LOAD_RETIRED.LOCAL_PMM] ) > [MEM_LOAD_RETIRED.L1_MISS] ) else 0 ) ) , ( 1 ) ) ) ) ) , ( 1 ) ) )" + }, { "name": "metric_TMA_......MEM_Bandwidth(%)", "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] ) ) / ( [cpu-cycles] ) )" @@ -260,12 +276,12 @@ "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_..Core_Bound(%)", - "expression": "100 * ( max( 0 , ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.MEMORY_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" + "name": "metric_TMA_......False_Sharing(%)", + "expression": "100 * ( min( ( ( 80 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) * [OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM] / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { - "name": "metric_TMA_....Divider(%)", - "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.MEMORY_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" }, { "name": "metric_TMA_....Ports_Utilization(%)", @@ -295,6 +311,10 @@ "name": "metric_TMA_..Light_Operations(%)", "expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.HEAVY_OPERATIONS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" }, + { + "name": "metric_TMA_....FP_Arith(%)", + "expression": "100 * ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] + [FP_ARITH_INST_RETIRED2.SCALAR] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c] + [FP_ARITH_INST_RETIRED2.VECTOR] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) ) + ( [AMX_OPS_RETIRED.BF16:c1] / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) )" + }, { "name": "metric_TMA_..Heavy_Operations(%)", "expression": "100 * ( [PERF_METRICS.HEAVY_OPERATIONS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" @@ -308,11 +328,6 @@ "expression": "[instructions] / [cpu-cycles]", "origin": "perfspect" }, - { - "name": "metric_TMA_Info_Core_ILP", - "expression": "[instructions] / [CPU_CLK_UNHALTED.DISTRIBUTED]", - "origin": "perfspect" - }, { "name": "metric_TMA_Info_System_SMT_2T_Utilization", "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0", diff --git a/events/spr.txt b/events/spr.txt index 16bc52a..eba75f1 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -8,23 +8,23 @@ cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, -cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, -cpu-cycles, -ref-cycles, -instructions; - -cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, -cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, -cpu/event=0xa3,umask=0x08,cmask=0x08,period=1000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, -cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, +cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x80,period=2000003,name='EXE_ACTIVITY.3_PORTS_UTIL:u0x80'/, +cpu/event=0xa6,umask=0xc,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL:u0xc'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu/event=0xad,umask=0x40,period=1000003,name='INT_MISC.UNKNOWN_BRANCH_CYCLES'/, cpu-cycles, ref-cycles, instructions; @@ -33,13 +33,10 @@ cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, -cpu-cycles, -ref-cycles, -instructions; - -# OFFCORE -cpu/event=0x20,umask=0x08,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, -cpu/event=0x20,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4'/, +cpu/event=0x3c,umask=0x02,period=25003,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, +cpu/event=0x3c,umask=0x08,period=2000003,name='CPU_CLK_UNHALTED.REF_DISTRIBUTED'/, +cpu/event=0xa2,umask=0x02,period=2000003,name='RESOURCE_STALLS.SCOREBOARD'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, cpu-cycles, ref-cycles, instructions; @@ -53,9 +50,9 @@ cpu/event=0x00,umask=0x86,period=10000003,name='PERF_METRICS.FETCH_LATENCY'/, cpu/event=0x00,umask=0x87,period=10000003,name='PERF_METRICS.MEMORY_BOUND'/, cpu/event=0x00,umask=0x85,period=10000003,name='PERF_METRICS.BRANCH_MISPREDICTS'/, cpu/event=0x00,umask=0x84,period=10000003,name='PERF_METRICS.HEAVY_OPERATIONS'/, -cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, -cpu/event=0xad,umask=0x40,period=1000003,name='INT_MISC.UNKNOWN_BRANCH_CYCLES'/, -cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, cpu-cycles, ref-cycles, instructions; @@ -64,68 +61,86 @@ cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS cpu/event=0x12,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, cpu/event=0x12,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xad,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, -cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, -cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, -cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu/event=0xce,umask=0x02,cmask=0x01,period=100003,name='AMX_OPS_RETIRED.BF16:c1'/, +cpu/event=0xd3,umask=0x10,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM'/, +cpu/event=0xd1,umask=0x08,cmask=0x00,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, +cpu/event=0xd1,umask=0x80,cmask=0x00,period=1000003,name='MEM_LOAD_RETIRED.LOCAL_PMM'/, +cpu/event=0xb1,umask=0x01,cmask=0x03,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3'/, +cpu/event=0xb1,umask=0x01,cmask=0x00,period=2000003,name='UOPS_EXECUTED.THREAD'/, +cpu/event=0xb1,umask=0x10,cmask=0x00,period=2000003,name='UOPS_EXECUTED.X87'/, +cpu/event=0xc2,umask=0x04,period=2000003,name='UOPS_RETIRED.MS'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x43,umask=0xfd,period=2000003,name='MEM_LOAD_COMPLETED.L1_MISS_ANY'/, -cpu/event=0xa2,umask=0x02,period=2000003,name='RESOURCE_STALLS.SCOREBOARD'/, -cpu/event=0xa6,umask=0x80,period=2000003,name='EXE_ACTIVITY.3_PORTS_UTIL:u0x80'/, -cpu/event=0xa6,umask=0xc,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL:u0xc'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0xd0,umask=0x82,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.ALL_STORES'/, +cpu/event=0x24,umask=0xe2,cmask=0x00,period=2000003,name='L2_RQSTS.ALL_RFO'/, +cpu/event=0x24,umask=0xc2,cmask=0x00,period=2000003,name='L2_RQSTS.RFO_HIT'/, +cpu/event=0xcf,umask=0x03,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.SCALAR'/, +cpu/event=0xcf,umask=0x1c,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.VECTOR'/, +cpu/event=0xc7,umask=0x02,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, +cpu/event=0xc7,umask=0x04,umask=0x3c,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0xad,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, -cpu/event=0xb1,umask=0x01,cmask=0x03,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3'/, -cpu/event=0x48,umask=0x01,period=1000003,name='L1D_PEND_MISS.PENDING'/, -cpu/event=0x03,umask=0x88,period=100003,name='LD_BLOCKS.NO_SR'/, +cpu/event=0x2a,umask=0x01,cmask=0x00,offcore_rsp=0x8003C0001,name='OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD'/, +cpu/event=0x2a,umask=0x01,cmask=0x00,offcore_rsp=0x10003C0002,name='OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM'/, +cpu/event=0x20,umask=0x04,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO'/, +cpu/event=0xd1,umask=0x40,cmask=0x00,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/, -cpu/event=0xc2,umask=0x04,period=2000003,name='UOPS_RETIRED.MS'/, -cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, -cpu/event=0x47,umask=0x02,cmask=0x02,period=1000003,name='MEMORY_ACTIVITY.CYCLES_L1D_MISS'/, +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x3c,umask=0x02,period=25003,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, -cpu/event=0x3c,umask=0x08,period=2000003,name='CPU_CLK_UNHALTED.REF_DISTRIBUTED'/, -cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xd3,umask=0x02,cmask=0x00,period=1000003,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM'/, +cpu/event=0xd3,umask=0x01,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, cpu-cycles, ref-cycles, instructions; -#offcore response -cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, -cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, -cpu-cycles:k, -ref-cycles:k, -instructions:k; - -cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, -cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu/event=0xd2,umask=0x02,cmask=0x00,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD'/, +cpu/event=0xd2,umask=0x04,cmask=0x00,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD'/, +cpu/event=0x47,umask=0x02,cmask=0x02,period=1000003,name='MEMORY_ACTIVITY.CYCLES_L1D_MISS'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, cpu-cycles, ref-cycles, instructions; +cpu/event=0x2a,umask=0x01,cmask=0x00,offcore_rsp=0x1030004477,name='OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM'/, cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, -cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x20,umask=0x08,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, +cpu/event=0x20,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4'/, cpu-cycles, ref-cycles, instructions; +cpu/event=0xd3,umask=0x08,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD'/, +cpu/event=0xd3,umask=0x04,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + #C6 cstate_core/c6-residency/; cstate_pkg/c6-residency/; @@ -135,9 +150,7 @@ power/energy-pkg/, power/energy-ram/; #UPI -upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, -upi/event=0x02,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, -upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; #CHA (Cache) cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, diff --git a/perf-collect.py b/perf-collect.py index 2247b74..5616026 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -24,6 +24,7 @@ "Cascadelake", "Icelake", "SapphireRapids", + "EmeraldRapids", ] @@ -236,6 +237,7 @@ def validate_file(fname): crash("Input argument muxinterval is too large, max is [1s or 1000ms]") # select architecture default event file if not supplied + have_uncore = True procinfo = perf_helpers.get_cpuinfo() arch, cpuname = perf_helpers.get_arch_and_name(procinfo) if not arch: @@ -251,6 +253,9 @@ def validate_file(fname): eventfile = "icx.txt" elif arch == "sapphirerapids": eventfile = "spr.txt" + elif arch == "emeraldrapids": + eventfile = "spr.txt" + have_uncore = False if eventfile is None: crash(f"failed to match architecture ({arch}) to event file name.") @@ -283,17 +288,21 @@ def validate_file(fname): # get perf events to collect collection_events = [] - imc, upi = perf_helpers.get_imc_upi_count() - cha = perf_helpers.get_cha_count() - have_uncore = True - if imc == 0 and cha == 0 and upi == 0: + sys_devs = perf_helpers.get_sys_devices() + if ( + "uncore_cha" not in sys_devs + and "uncore_cbox" not in sys_devs + and "uncore_upi" not in sys_devs + and "uncore_qpi" not in sys_devs + and "uncore_imc" not in sys_devs + ): logging.info("disabling uncore (possibly in a vm?)") have_uncore = False if arch == "icelake": logging.warning( "Due to lack of vPMU support, TMA L1 events will not be collected" ) - if arch == "sapphirerapids": + if arch == "sapphirerapids" or arch == "emeraldrapids": logging.warning( "Due to lack of vPMU support, TMA L1 & L2 events will not be collected" ) diff --git a/perf-postprocess.py b/perf-postprocess.py index 7628725..c948286 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -18,7 +18,6 @@ from src.common import crash from src import common from src import perf_helpers -from src import report class Mode(Enum): @@ -86,10 +85,13 @@ def get_args(script_path): action="store_true", ) parser.add_argument( - "--rawevents", help="save raw events in .csv format", action="store_true" + "-f", + "--fail-postprocessing", + help="gives exit code 1 when postprocessing detects missing event or zero division errors", + action="store_true", ) parser.add_argument( - "-html", "--html", type=str, default=None, help="Static HTML report" + "--rawevents", help="save raw events in .csv format", action="store_true" ) args = parser.parse_args() @@ -367,7 +369,7 @@ def get_metric_file_name(microarchitecture): metric_file = "metric_skx_clx.json" elif microarchitecture == "icelake": metric_file = "metric_icx.json" - elif microarchitecture == "sapphirerapids": + elif microarchitecture == "sapphirerapids" or microarchitecture == "emeraldrapids": metric_file = "metric_spr.json" else: crash("Suitable metric file not found") @@ -564,6 +566,62 @@ def generate_metrics_averages( return +def row(df, name): + if name in df.index: + return json.dumps(df.loc[name, :].values.flatten().tolist()) + else: + return "[]" + + +def write_html(time_series_df, perf_mode, out_file_path): + html_file = "base.html" + if getattr(sys, "frozen", False): + basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) + html_file = os.path.join(basepath, html_file) + elif __file__: + html_file = script_path + "/src/" + html_file + else: + crash("Unknown application type") + + html = "" + with open(html_file, "r") as f_html: + html = f_html.read() + + # only show TMA if system-wide mode + if perf_mode == Mode.System: + time_series_df.index.name = "metrics" + for metric in [ + ["CPUUTIL", "metric_CPU utilization %"], + ["CPIDATA", "metric_CPI"], + ["CPUFREQ", "metric_CPU operating frequency (in GHz)"], + ["CPIDATA", "metric_CPI"], + ["PKGPOWER", "metric_package power (watts)"], + ["DRAMPOWER", "metric_DRAM power (watts)"], + ["L1DATA", "metric_L1D MPI (includes data+rfo w/ prefetches)"], + ["L2DATA", "metric_L2 MPI (includes code+data+rfo w/ prefetches)"], + ["LLCDATA", "metric_LLC data read MPI (demand+prefetch)"], + ["READDATA", "metric_memory bandwidth read (MB/sec)"], + ["WRITEDATA", "metric_memory bandwidth write (MB/sec)"], + ["TOTALDATA", "metric_memory bandwidth total (MB/sec)"], + ["REMOTENUMA", "metric_NUMA %_Reads addressed to remote DRAM"], + ]: + html = html.replace(metric[0], row(time_series_df, metric[1])) + + avg = time_series_df.mean(numeric_only=True, axis=1).to_frame() + for number in [ + ["FRONTEND", "metric_TMA_Frontend_Bound(%)"], + ["BACKEND", "metric_TMA_Backend_Bound(%)"], + ["CORE", "metric_TMA_..Core_Bound(%)"], + ["MEMORY", "metric_TMA_..Memory_Bound(%)"], + ["BADSPECULATION", "metric_TMA_Bad_Speculation(%)"], + ["RETIRING", "metric_TMA_Retiring(%)"], + ]: + html = html.replace(number[0], str(avg.loc[number[1], 0])) + + with open(os.path.splitext(out_file_path)[0] + ".html", "w") as file: + file.write(html) + + def log_skip_metric(metric, instance, msg): logging.warning( msg @@ -585,6 +643,7 @@ def generate_metrics( metrics, perf_mode, verbose=False, + fail_postprocessing=False, ): time_slice_groups = perf_data_df.groupby("ts", sort=False) time_metrics_result = {} @@ -725,8 +784,14 @@ def generate_metrics( logging.warning( str(len(errors[error])) + " " + error + ": " + str(errors[error]) ) + if fail_postprocessing and ( + len(errors["MISSING EVENTS"]) > 0 or len(errors["ZERO DIVISION"]) > 0 + ): + crash("Failing due to postprocessing errors") generate_metrics_time_series(time_series_df, perf_mode, out_file_path) generate_metrics_averages(time_series_df, perf_mode, out_file_path) + if perf_mode == Mode.System: + write_html(time_series_df, perf_mode, out_file_path) return @@ -879,19 +944,11 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): metrics, perf_mode, args.verbose, + args.fail_postprocessing, ) logging.info( "Generated results file(s) in: " + out_file_path.rsplit("/", 1)[0] ) - if args.html: - report.write_html( - cgroup_id_out_file_path, - perf_mode, - meta_data["constants"]["CONST_ARCH"], - args.html.replace( - ".html", "_" + meta_data["CGROUP_HASH"][cgroup_id] + ".html" - ), - ) # generate metrics for system, persocket or percore else: generate_metrics( @@ -902,13 +959,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): metrics, perf_mode, args.verbose, + args.fail_postprocessing, ) logging.info("Generated results file(s) in: " + out_file_path.rsplit("/", 1)[0]) - if args.html: - report.write_html( - out_file_path, - perf_mode, - meta_data["constants"]["CONST_ARCH"], - args.html, - ) logging.info("Done!") diff --git a/similarity-analyzer/dopca.py b/similarity-analyzer/dopca.py index 95daba3..e6f1cd7 100644 --- a/similarity-analyzer/dopca.py +++ b/similarity-analyzer/dopca.py @@ -1,294 +1,402 @@ -#!/usr/bin/env python3 - -########################################################################################################### -# Copyright (C) 2021-2023 Intel Corporation -# SPDX-License-Identifier: BSD-3-Clause -########################################################################################################### - -import os -import sys -import logging -import subprocess # nosec -from argparse import ArgumentParser -import pandas as pd -import numpy as np - - -def verify_args(args): - if not args.files: - parser.print_help() - logger.error("files is a required field") - sys.exit(1) - basepath = os.getcwd() - outfilecsv = os.path.join(basepath, args.out + ".csv") - if os.path.exists(outfilecsv): - logger.warning(f"The {outfilecsv} exists already!") - sys.exit(1) - if args.march and args.march not in ("CLX", "ICX"): - logger.warning(f"The current released version doesn't support {args.march}") - parser.print_help() - sys.exit(1) - try: - files = args.files.split(",") - if "" in files: - logger.error("File name cannot be null/empty string") - sys.exit(1) - component_size = len(files) - if component_size in (0, 1) and not args.march: - logger.error( - f"The number of components requested is {component_size}, a minimum of 2 is required..." - ) - raise Exception - except Exception as invalid_comp_size: - raise SystemExit( - 'Minimum of 2 input files required and must contain "," delimiter between them' - ) from invalid_comp_size - if args.label: - if "" in args.label: - logger.error("label cannot be null/empty string") - parser.print_help() - sys.exit(1) - if component_size != len(args.label): - logger.warning(f"The size of labels {args.label} don't match with input files {args.files}") - parser.print_help() - sys.exit(1) - return component_size - -def get_version(): - basepath = os.getcwd() - version_file = os.path.join(basepath, "_version.txt") - if os.access(version_file, os.R_OK): - with open(version_file) as vfile: - version = vfile.readline() - else: - raise SystemError("version file isn't accessible") - return version - -def setup_custom_logger(name, debug): - formatter = logging.Formatter( - fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" - ) - handler = logging.FileHandler("log.txt", mode="w") - handler.setFormatter(formatter) - screen_handler = logging.StreamHandler(stream=sys.stdout) - screen_handler.setFormatter(formatter) - custom_logger = logging.getLogger(name) - if debug: - custom_logger.setLevel(logging.DEBUG) - else: - custom_logger.setLevel(logging.INFO) - custom_logger.addHandler(handler) - custom_logger.addHandler(screen_handler) - return custom_logger - -def handle_nan(data, comp_size): - logger.debug("Checking for NaN in telemetry input files") - df = pd.DataFrame(data) - deleted_workload_profiles = [] - if not df.isnull().values.any(): - logger.debug("No NaN found in telemetry input files") - else: - logger.warning("NaN found in the input telemetry files, attempting to fix them") - df_thresh_nan = df.dropna(thresh=0.8*len(df.columns)) - diff_df = pd.merge(df, df_thresh_nan, how='outer', indicator='Exist') - diff_df = diff_df.loc[diff_df['Exist'] != 'both'] - deleted_row_indices = diff_df.index.tolist() - if deleted_row_indices: - if len(deleted_row_indices) in (comp_size, comp_size-1): - #too many workload profiles have NaN greater than threshold, must quit similarity analysis - logger.error("Attempted dropping of NaNs resulted in fewer #input profiles without NaN....quiting similarity analysis") - sys.exit(1) - logger.warning("The following input files contain NaN and will no longer be considered for similarity analysis") - inp_files = args.files.split(",") - for row in deleted_row_indices: - for index, filename in enumerate(inp_files): - if row == index: - comp_size = comp_size - 1 - logger.warning(f"{filename}") - if args.label: - deleted_workload_profiles.append(args.label[index]) - else: - deleted_workload_profiles.append(filename) - df = data = df_thresh_nan - if df.isnull().values.any(): - logger.debug(f"A total of {df.isnull().sum().sum()} NaN found in your telemetry files and these will be replaced with large negative number") - data = df.fillna(-99999) - return data, df.shape[0], deleted_workload_profiles - -def dopca(dataset, colnames, n_components, cols): - # lazy loading - from sklearn.preprocessing import StandardScaler - from sklearn.decomposition import PCA - logger.info("starting PCA") - # cleaning and separating dimensions - logger.debug(f"deleting colnames {colnames[0]}") - del colnames[0] - num_val = dataset.loc[:, colnames].values - num_val, n_components, del_rows = handle_nan(num_val, n_components) - if del_rows: - for profiles in del_rows: - try: - cols.remove(profiles) - except ValueError as e: - logger.error(e) - sys.exit(1) - # normalizing the metrics - num_val = StandardScaler().fit_transform(num_val) - logger.debug(f"Post normalizing metrics, num_val: {num_val}") - # PCA analysis, Create PCA model - #pca = PCA(n_components=n_components) #Limitation: If the n_components(no of workloads) are greater than num_val(the number of features), it will throw error. - - n_components = min(len(num_val), len(colnames)) #Solution: To scale it for any number of workloads, generate PCAs equivalent to number of features/performance matrics (instead of number of workloads) that we have for each workload. - pca = PCA(n_components=n_components) - - # transform - principal_components = pca.fit_transform(num_val) - principal_df = pd.DataFrame( - data=principal_components, - columns=["PC" + str(i) for i in range(1, n_components + 1)], - ) - logger.debug(f"explained variance ratio: {pca.explained_variance_ratio_}") - metric_df = pd.DataFrame(cols, columns=["Metric"]) - # concatenating the dataframe along axis = 1 - final_dataframe = pd.concat([principal_df, metric_df], axis=1) - logger.debug(f"principalDF:\n\n {principal_df}") - logger.debug(f"finalDF:\n\n {final_dataframe}") - logger.info("PCA completed") - return final_dataframe - -# plot along PCs -def plotpca(rownames, dataframe): - # lazy loading - from matplotlib import pyplot as plt - from matplotlib import cm as colmgr - - logger.info("PCA plot initiated") - fig = plt.figure(figsize=(8, 8)) - plot = fig.add_subplot(1, 1, 1) - plot.set_xlabel("Principal Component 1", fontsize=15) - plot.set_ylabel("Principal Component 2", fontsize=15) - plot.set_title("Similarity Analyzer", fontsize=20) - xs = np.arange(len(rownames)) - ys = [i + xs + (i * xs) ** 2 for i in range(len(rownames))] - colors = colmgr.rainbow(np.linspace(0, 1, len(ys))) - for target, color in zip(rownames, colors): - indices_to_keep = dataframe["Metric"] == target - pc1 = dataframe.loc[indices_to_keep, "PC1"] - pc2 = dataframe.loc[indices_to_keep, "PC2"] - plot.scatter(pc1, pc2, c=color.reshape(1, -1), s=50) - plot.annotate(target, (pc1, pc2)) - plt.xlabel("PC1", fontsize=8) - plt.ylabel("PC2", fontsize=8) - plot.grid() - plt.savefig(outfile) - logger.info(f"PCA plot saved at {outfile}") - - -if __name__ == "__main__": - parser = ArgumentParser(description="Similarity Analyzer") - required_arg = parser.add_argument_group("required arguments") - required_arg.add_argument( - "-f", "--files", type=str, default=None, help='excel files delimited by ","' - ) - parser.add_argument( - "-p", - "--postprocessType", - type=str, - default="perfspect", - help="pmu postprocessing tool used (perfspect)", - ) - parser.add_argument( - "-o", "--out", type=str, default="sim_workload", help="output file name" - ) - parser.add_argument( - "-d", "--debug", dest="debug", default=False, action="store_true" - ) - parser.add_argument( - "-v", "--version", help="prints the version of the tool", action="store_true" - ) - parser.add_argument( - "-m", - "--march", - help="plot pca against reference SPECcpu2017 (int_rate) components based on architecture specified. Expected values: ICX/CLX", - ) - parser.add_argument( - "-l", - "--label", - type=str, - help='label each workload profiles which will be used to plot for similarity analysis; This must map to corresponding input files delimited by ","', - ) - args = parser.parse_args() - logger = setup_custom_logger("similarity_analyzer", args.debug) - if args.version: - print(get_version()) - sys.exit(0) - if args.label: - args.label = args.label.split(",") - logger.info(f"starting similarity analyzer {get_version()}") - comp_size = verify_args(args) - if args.march: - import glob - if args.postprocessType == "perfspect": - spec_profiles = glob.glob("Reference/" + args.march + "/*.csv") - else: - logger.error("Similarity Analyzer supports perfspect telemetry data only") - sys.exit(1) - for spec in spec_profiles: - args.files += "," + spec - logger.debug(f"The files being compared are: {args.files}") - cmd = [] - cmd.append("python3") - cmd.append("data_formatter/main.py") - cmd.append("-f") - cmd.append(args.files) - cmd.append("-m") - cmd.append("d") - cmd.append("-o") - cmd.append(args.out + ".csv") - if args.postprocessType == "perfspect": - cmd.append("-p") - logger.debug(f"The command used by data formatter: {cmd}") - logger.info( - f"Initiating data_formatter with {args.postprocessType} pmu postprocessor" - ) - with subprocess.Popen( # nosec - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) as process: - out, err = process.communicate() - if err: - logger.error(err.decode()) - sys.exit(1) - if "Data compared and stored" in str(out): - logger.info(f"data formatter collated pmu metrics at {args.out}.csv file") - else: - logger.error( - "data formatter wasn't able to collate all the pmu metrics from input files" - ) - data = args.out + ".csv" - outfile = args.out + ".png" - pd_data = pd.read_csv(data) - pd_data = pd_data.rename( - columns={i: i[14:] for i in pd_data.columns if i.startswith("Reference")} - ) - if not args.label and args.postprocessType == "perfspect": - pd_data = pd_data.rename( - columns={i: i[:-4] for i in pd_data.columns if i != "Metric"} - ) - elif args.label and args.postprocessType == "perfspect": - pd_data = pd_data.rename( - columns={i: str(j) for i,j in zip(pd_data.drop(columns="Metric").columns, args.label)} - ) - else: - logger.error("Similarity Analyzer supports perfspect telemetry data only") - sys.exit(1) - logger.debug(f"dataset before transpose:\n {pd_data}") - columns = list(pd_data.columns) - columns.remove("Metric") - pd_data.set_index("Metric", inplace=True) - pd_data = pd_data.T - pd_data.insert(loc=0, column="metric", value=columns) - logger.debug(f"dataset post transpose:\n {pd_data}") - column_names = pd_data.columns.tolist() - final_df = dopca(pd_data, column_names, comp_size, columns) - row_names = final_df["Metric"].values - plotpca(row_names, final_df) \ No newline at end of file +#!/usr/bin/env python3 + +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### +import os +import sys +import logging +import subprocess # nosec +from argparse import ArgumentParser +import pandas as pd +import numpy as np +from pca import pca +import matplotlib.pyplot as plt +from matplotlib import cm as colmgr +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA +from scipy.cluster import hierarchy + + +def verify_args(parser, args): + if not args.files: + parser.print_help() + logger.error("files is a required field") + sys.exit(1) + if args.march and args.march not in ("CLX", "ICX"): + logger.warning(f"The current released version doesn't support {args.march}") + parser.print_help() + sys.exit(1) + try: + print(args.files) + args.files = args.files.split(",") + print(args.files) + if "" in args.files: + logger.error("File name cannot be null/empty string") + sys.exit(1) + component_size = len(args.files) + if component_size in (0, 1) and not args.march: + logger.error( + f"The number of components requested is {component_size}, a minimum of 2 is required..." + ) + raise Exception + except Exception as invalid_comp_size: + raise SystemExit( + 'Minimum of 2 input files required and must contain "," delimiter between them' + ) from invalid_comp_size + if args.label: + if "" in args.label: + logger.error("label cannot be null/empty string") + parser.print_help() + sys.exit(1) + if component_size != len(args.label): + logger.warning( + f"The size of labels {args.label} don't match with input files {args.files}" + ) + parser.print_help() + sys.exit(1) + return component_size + + +def get_version(): + basepath = os.getcwd() + version_file = os.path.join(basepath, "_version.txt") + if os.access(version_file, os.R_OK): + with open(version_file) as vfile: + version = vfile.readline() + else: + raise SystemError("version file isn't accessible") + return version + + +def setup_custom_logger(name, debug): + formatter = logging.Formatter( + fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" + ) + handler = logging.FileHandler("log.txt", mode="w") + handler.setFormatter(formatter) + screen_handler = logging.StreamHandler(stream=sys.stdout) + screen_handler.setFormatter(formatter) + custom_logger = logging.getLogger(name) + if debug: + custom_logger.setLevel(logging.DEBUG) + else: + custom_logger.setLevel(logging.INFO) + custom_logger.addHandler(handler) + custom_logger.addHandler(screen_handler) + return custom_logger + + +def handle_nan(data, comp_size): + logger.debug("Checking for NaN in telemetry input files") + df = pd.DataFrame(data).fillna(0) + deleted_workload_profiles = [] + if not df.isnull().values.any(): + logger.debug("No NaN found in telemetry input files") + else: + logger.warning("NaN found in the input telemetry files, attempting to fix them") + df_thresh_nan = df.dropna(thresh=0.8 * len(df.columns)) + diff_df = pd.merge(df, df_thresh_nan, how="outer", indicator="Exist") + diff_df = diff_df.loc[diff_df["Exist"] != "both"] + deleted_row_indices = diff_df.index.tolist() + if deleted_row_indices: + if len(deleted_row_indices) in (comp_size, comp_size - 1): + # too many workload profiles have NaN greater than threshold, must quit similarity analysis + logger.error( + "Attempted dropping of NaNs resulted in fewer #input profiles without NaN....quiting similarity analysis" + ) + sys.exit(1) + logger.warning( + "The following input files contain NaN and will no longer be considered for similarity analysis" + ) + inp_files = args.files + for row in deleted_row_indices: + for index, filename in enumerate(inp_files): + if row == index: + comp_size = comp_size - 1 + logger.warning(f"{filename}") + if args.label: + deleted_workload_profiles.append(args.label[index]) + else: + deleted_workload_profiles.append(filename) + df = data = df_thresh_nan + if df.isnull().values.any(): + logger.debug( + f"A total of {df.isnull().sum().sum()} NaN found in your telemetry files and these will be replaced with large negative number" + ) + data = df.fillna(-99999) + return data, df.shape[0], deleted_workload_profiles + + +def add_dimension_to_data(dataset, metric_name, metric_names): + new_vec = [0] * len(metric_names) + new_vec[metric_names.index(metric_name)] = 100 + dataset.loc[len(dataset.index)] = new_vec + + +def dopca(org_dataset, metric_names, org_workload_names, dimensions): + workload_names = org_workload_names.copy() + # Make a coupy of dataset + dataset = org_dataset.copy() + dataset.columns = metric_names + print(dataset) + print(workload_names) + vec = [0] * len(metric_names) + print(vec) + print(len(vec)) + dataset.loc[len(dataset.index)] = vec + workload_names.append("Origin") + + for d in dimensions: + add_dimension_to_data(dataset, d[1], metric_names) + workload_names.append(d[0]) + dataset.index = workload_names + print("after adding dimensions") + print(dataset) + logger.info("starting PCA") + # Cleaning and separating dimensions + num_val = dataset.loc[:, metric_names].values + num_val, n_components, del_rows = handle_nan(num_val, 2) + if del_rows: + for profiles in del_rows: + try: + workload_names.remove(profiles) + except ValueError as e: + logger.error(e) + sys.exit(1) + # Normalizing the metrics + num_val = StandardScaler().fit_transform(num_val) + logger.debug(f"Post normalizing metrics, num_val: {num_val}") + # To scale to any number of workloads, generate PCAs equivalent to minimum between workloads and features + n_components = min(len(num_val), len(metric_names)) + pca = PCA(n_components=n_components) + # Transform + principal_components = pca.fit_transform(num_val) + principal_df = pd.DataFrame( + data=principal_components, + columns=["PC" + str(i) for i in range(1, n_components + 1)], + ) + logger.debug(f"explained variance ratio: {pca.explained_variance_ratio_}") + metric_df = pd.DataFrame(workload_names, columns=["Metric"]) + # concatenating the dataframe along axis = 1 + final_dataframe = pd.concat([principal_df, metric_df], axis=1) + logger.debug(f"principalDF:\n\n {principal_df}") + logger.debug(f"finalDF:\n\n {final_dataframe}") + logger.info("PCA completed") + return final_dataframe + + +def do_hierarchy(dataset, features_names, workload_names, outfile_hierarchy): + Y = hierarchy.linkage(dataset) + print("hierarchy") + print(dataset) + print(workload_names) + _ = hierarchy.dendrogram( + Y, labels=workload_names, show_leaf_counts=True, leaf_rotation=90 + ) + plt.ylabel("distance") + plt.savefig(outfile_hierarchy) + plt.clf() + logger.info(f"Hierarchy plot saved at {outfile_hierarchy}") + + +def dopca_density(dataset, features_names, workload_names, outfile_pca2): + logger.info("starting PCA-2") + + # Initialize + model = pca(normalize=True) + # Fit transform and include the column labels and row labels + dataset = dataset.reset_index(drop=True) + dataset = dataset.apply(pd.to_numeric, errors="ignore") + dataset.columns = features_names + # generate scatter plot with density and workload labels + results = model.fit_transform( + dataset, col_labels=features_names, row_labels=["0" for x in workload_names] + ) + pc_data_frame = results["PC"] + c = 0 + model.scatter(HT2=True, density=True) + for index, row in pc_data_frame.iterrows(): + plt.text(row["PC1"], row["PC2"], workload_names[c], fontsize=16) + c += 1 + plt.savefig(outfile_pca2) + plt.clf() + logger.info("PCA-2 completed") + logger.info(f"PCA_2 plot saved at {outfile_pca2}") + return results + + +# plot along PCs +def plotpca(rownames, dataframe, outfile_pca, dimensions): + logger.info("PCA plot initiated") + fig = plt.figure(figsize=(8, 8)) + plot = fig.add_subplot(1, 1, 1) + plot.set_xlabel("Principal Component 1", fontsize=15) + plot.set_ylabel("Principal Component 2", fontsize=15) + plot.set_title("Similarity Analyzer", fontsize=20) + xs = np.arange(len(rownames)) + ys = [i + xs + (i * xs) ** 2 for i in range(len(rownames))] + colors = colmgr.rainbow(np.linspace(0, 1, len(ys))) + for target, color in zip(rownames, colors): + indices_to_keep = dataframe["Metric"] == target + pc1 = dataframe.loc[indices_to_keep, "PC1"] + pc2 = dataframe.loc[indices_to_keep, "PC2"] + plot.scatter(pc1, pc2, c=color.reshape(1, -1), s=50) + plot.annotate(target, (pc1, pc2)) + plt.xlabel("PC1", fontsize=8) + plt.ylabel("PC2", fontsize=8) + plt.grid() + # add arrows + origin_vector = dataframe[dataframe["Metric"] == "Origin"] + for d in dimensions: + end_vector = dataframe[dataframe["Metric"] == d[0]] + plt.arrow( + origin_vector["PC1"].values[0], + origin_vector["PC2"].values[0], + 3 * (end_vector["PC1"].values[0] - origin_vector["PC1"].values[0]), + 3 * (end_vector["PC2"].values[0] - origin_vector["PC2"].values[0]), + length_includes_head=True, + width=0.1, + ) + + plt.savefig(outfile_pca) + plt.clf() + + logger.info(f"PCA plot saved at {outfile_pca}") + + +def get_args(): + parser = ArgumentParser(description="Similarity Analyzer") + required_arg = parser.add_argument_group("required arguments") + required_arg.add_argument( + "-f", "--files", type=str, default=None, help='excel files delimited by ","' + ) + parser.add_argument( + "-o", "--out", type=str, default="sim_workload", help="output file name" + ) + parser.add_argument( + "-d", "--debug", dest="debug", default=False, action="store_true" + ) + parser.add_argument( + "-v", "--version", help="prints the version of the tool", action="store_true" + ) + parser.add_argument( + "-m", + "--march", + help="plot pca against reference SPECcpu2017 (int_rate) components based on architecture specified. Expected values: ICX/CLX", + ) + parser.add_argument( + "-l", + "--label", + type=str, + help='label each workload profiles which will be used to plot for similarity analysis; This must map to corresponding input files delimited by ","', + ) + args = parser.parse_args() + if args.version: + print(get_version()) + sys.exit(0) + if args.label: + args.label = args.label.split(",") + print("verifying args") + verify_args(parser, args) + print("verifying args done") + + return parser.parse_args() + + +def format_data_for_PCA(args): + cmd = [] + cmd.append("python3") + cmd.append("data_formatter/main.py") + cmd.append("-f") + cmd.append(args.files) + cmd.append("-m") + cmd.append("d") + cmd.append("-o") + cmd.append(args.out + ".csv") + cmd.append("-p") + logger.debug(f"The command used by data formatter: {cmd}") + print(cmd) + with subprocess.Popen( # nosec + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) as process: + out, err = process.communicate() + if err: + logger.error(err.decode()) + sys.exit(1) + if "Data compared and stored" in str(out): + logger.info(f"data formatter collated pmu metrics at {args.out}.csv file") + else: + logger.error( + "data formatter wasn't able to collate all the pmu metrics from input files" + ) + + +def get_formatted_data_for_PCA(data_file_path): + pd_data = pd.read_csv(data_file_path) + pd_data = pd_data.rename( + columns={i: i[14:] for i in pd_data.columns if i.startswith("Reference")} + ) + if not args.label: + pd_data = pd_data.rename( + columns={i: i[:-4] for i in pd_data.columns if i != "Metric"} + ) + elif args.label: + pd_data = pd_data.rename( + columns={ + i: str(j) + for i, j in zip(pd_data.drop(columns="Metric").columns, args.label) + } + ) + return pd_data + + +if __name__ == "__main__": + args = get_args() + logger = setup_custom_logger("similarity_analyzer", args.debug) + logger.info(f"starting similarity analyzer {get_version()}") + + format_data_for_PCA(args) + + data_file_path = args.out + ".csv" + outfile = args.out + ".png" + + pd_data = get_formatted_data_for_PCA(data_file_path) + features_names = pd_data["Metric"].tolist() + + pd_data = pd_data.iloc[:, 1:] + logger.debug(f"dataset before transpose:\n {pd_data}") + + workload_names = list(pd_data.columns) + + pd_data = pd_data.T + pd_data = pd_data.reset_index(drop=True) + + pd_data.insert(loc=0, column="metric", value=workload_names) + pd_data.set_index("metric", inplace=True) + + logger.debug(f"dataset post transpose:\n {pd_data}") + features_index = pd_data.columns.tolist() + + pd_data = pd_data.fillna(0) + + # PCA + dimensions = [ + ("Front-end", "metric_TMA_Frontend_Bound(%)"), + ("Back-end", "metric_TMA_Backend_Bound(%)"), + ] + final_df = dopca(pd_data, features_names, workload_names, dimensions) + row_names = final_df["Metric"].values + outfile_pca = args.out + "_pca.png" + plotpca(row_names, final_df, outfile_pca, dimensions) + + # PCA with density + outfile_pca_density = args.out + "_pca_density.png" + final_df = dopca_density( + pd_data, features_names, workload_names, outfile_pca_density + ) + + # Hierarchy + outfile_hierarchy = args.out + "_hierarchy.png" + do_hierarchy(pd_data, features_index, workload_names, outfile_hierarchy) diff --git a/similarity-analyzer/requirements.txt b/similarity-analyzer/requirements.txt index 2a2e4d5..6675382 100644 --- a/similarity-analyzer/requirements.txt +++ b/similarity-analyzer/requirements.txt @@ -3,3 +3,5 @@ pandas scikit_learn simplejson xlrd +pca +scipy \ No newline at end of file diff --git a/src/base.html b/src/base.html new file mode 100644 index 0000000..f94622f --- /dev/null +++ b/src/base.html @@ -0,0 +1,515 @@ + + + + + PerfSpect + + + + + + + + + + + + + + + +
+ + + + \ No newline at end of file diff --git a/src/basic_stats.py b/src/basic_stats.py deleted file mode 100644 index 75288ff..0000000 --- a/src/basic_stats.py +++ /dev/null @@ -1,228 +0,0 @@ -#!/usr/bin/env python3 - -########################################################################################################### -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: BSD-3-Clause -########################################################################################################### - -import os -import pandas as pd -import plotly -import plotly.graph_objects as go -import tempfile -from yattag import Doc -from src.common import crash -from collections import OrderedDict - - -os.environ["MPLCONFIGDIR"] = tempfile.mkdtemp() -doc, tag, text = Doc().tagtext() - - -def get_fig(df, y, name, title, title_text): - # Plot row 1 col 1 - fig = go.Figure() - for i in range(len(y)): - if y[i] not in df.columns: - continue - fig.add_trace( - go.Scatter( - x=df["time"], - y=df[y[i]], - name=name[i], - showlegend=True, - ) - ) - fig.update_layout(title=title) - fig.update_yaxes(title_text=title_text) - if y[0] == "metric_CPU utilization %": - fig.update_layout(yaxis_range=[0, 100]) - return fig - - -def get_row_header(): - fig = '
' - return fig - - -def get_row_footer(): - fig = "
" - return fig - - -def get_col(html_list): - col_start = '
' - col_end = "
" - fig = col_start + html_list.pop(0) + col_end - return fig - - -def row_of_3(html_list): - return ( - get_row_header() - + get_col(html_list) - + get_col(html_list) - + get_col(html_list) - + get_row_footer() - ) - - -def row_of_2(html_list): - return get_row_header() + get_col(html_list) + get_col(html_list) + get_row_footer() - - -def row_of_1(html_list): - return get_row_header() + get_col(html_list) + get_row_footer() - - -def get_stats_plot(input_file, arch): - try: - df = pd.read_csv(input_file, keep_default_na=False) - - except FileNotFoundError: - crash(f"{input} file not found") - figure_to_column_dict = OrderedDict() - figure_to_column_dict["CPU Operating Frequency"] = { - "metrics_prefixes": ["metric_CPU operating frequency (in GHz)"], - "Y_axis_text": "Freq (GHz)", - "name_prefix": ["Frequency"], - } - figure_to_column_dict["CPU Utilization"] = { - "metrics_prefixes": [ - "metric_CPU utilization %", - "metric_CPU utilization% in kernel mode", - ], - "Y_axis_text": "Percentage", - "name_prefix": ["User", "Kernel"], - } - figure_to_column_dict["CPI"] = { - "metrics_prefixes": ["metric_CPI", "metric_kernel_CPI"], - "Y_axis_text": "CPI", - "name_prefix": ["CPI", "Kernel CPI"], - } - figure_to_column_dict["Power"] = { - "metrics_prefixes": [ - "metric_package power (watts)", - "metric_DRAM power (watts)", - ], - "Y_axis_text": "Watts", - "name_prefix": ["Package", "DRAM"], - } - figure_to_column_dict["Memory Bandwidth"] = { - "metrics_prefixes": [ - "metric_memory bandwidth read (MB/sec)", - "metric_memory bandwidth write (MB/sec)", - "metric_memory bandwidth total (MB/sec)", - ], - "Y_axis_text": "MB/sec", - "name_prefix": ["Read", "Write", "Total"], - } - figure_to_column_dict["AVX Percentage"] = { - "metrics_prefixes": [ - "metric_core % cycles in non AVX license", - "metric_core % cycles in AVX2 license", - "metric_core % cycles in AVX-512 license", - ], - "Y_axis_text": "Percentage", - "name_prefix": ["AVX", "AVX2", "AVX512"], - } - figure_to_column_dict["NUMA Locality DRAM Reads %"] = { - "metrics_prefixes": [ - "metric_NUMA %_Reads addressed to local DRAM", - "metric_NUMA %_Reads addressed to remote DRAM", - ], - "Y_axis_text": "Percentage", - "name_prefix": ["Local", "Remote"], - } - figure_to_column_dict["TMA"] = { - "metrics_prefixes": [ - "metric_TMA_Frontend_Bound(%)", - "metric_TMA_Backend_Bound(%)", - ], - "Y_axis_text": "Percentage", - "name_prefix": ["TMA_Frontend", "TMA_Backend"], - } - figure_to_column_dict["Cache MPI"] = { - "metrics_prefixes": [ - "metric_L1D MPI (includes data+rfo w/ prefetches)", - "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "metric_LLC data read MPI (demand+prefetch)", - ], - "Y_axis_text": "MPI", - "name_prefix": ["L1D MPI", "L2 MPI", "LLC MPI"], - } - - figure_list = [] - for figure_title in figure_to_column_dict: - figure_data = figure_to_column_dict[figure_title] - for metric_index, metric_prefix in enumerate(figure_data["metrics_prefixes"]): - for column in df.columns: - if metric_prefix in column: - if "cols" not in figure_data: - figure_data["cols"] = [] - if "names" not in figure_data: - figure_data["names"] = [] - figure_data["cols"].append(column) - series_name = ( - figure_data["name_prefix"][metric_index] - + "_" - + column.replace(metric_prefix, "") - ) - figure_data["names"].append(series_name) - if "cols" in figure_data: - fig = get_fig( - df, - y=figure_data["cols"], - title=figure_title, - title_text=figure_data["Y_axis_text"], - name=figure_data["names"], - ) - figure_list.append(fig) - - for fig in figure_list: - # update layout - fig.update_layout( - font=dict(family="Courier New, monospace", size=14, color="Black") - ) - - fig.update_layout(paper_bgcolor="#f0f0f5", plot_bgcolor="white") - - fig.update_layout( - title_font_family="Open Sans", - title_font_color="Black", - ) - - fig.update_layout(autosize=True, margin=dict(l=20, r=30, b=20, t=70)) - - fig.update_layout( - legend=dict( - orientation="h", yanchor="bottom", y=1.01, xanchor="right", x=1.0 - ), - legend_groupclick="toggleitem", - ) - - # update axes - # fig.update_yaxes(showline=True, linewidth=2, linecolor="black", mirror=True) - fig.update_yaxes(rangemode="tozero") - fig.update_xaxes(showline=True, linewidth=1.5, linecolor="black") - fig.update_yaxes(showline=True, linewidth=1.5, linecolor="black") - fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="LightPink") - fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="LightPink") - fig.update_xaxes(ticks="inside", tickwidth=2, tickcolor="black", ticklen=6) - fig.update_yaxes(ticks="inside", tickwidth=2, tickcolor="black", ticklen=6) - - html_fig_list = [] - for fig in figure_list: - html_fig = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div") - html_fig_list.append(html_fig) - - fig = "" - while len(html_fig_list) > 0: - if len(html_fig_list) >= 3: - fig = fig + row_of_3(html_fig_list) - if len(html_fig_list) == 2: - fig = fig + row_of_2(html_fig_list) - if len(html_fig_list) == 1: - fig = fig + row_of_1(html_fig_list) - - return fig diff --git a/src/icicle.py b/src/icicle.py deleted file mode 100644 index d640855..0000000 --- a/src/icicle.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python3 - -########################################################################################################### -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: BSD-3-Clause -########################################################################################################### - -import numpy as np -import pandas as pd -import plotly.graph_objects as go -from yattag import Doc -from src.common import crash - -doc, tag, text = Doc().tagtext() -metric_parent = {} - -""" returns icicle figure with L1, L2, L3 and L4 TMA """ - - -def get_icicle(input_csv): - L1 = "pipeline" - L2 = "" - L3 = "" - try: - df = pd.read_csv(input_csv, keep_default_na=False) - except FileNotFoundError: - crash(f"{input_csv} File not found") - unwanted = ["%", "metric_TMA_", ".", "(", ")", "metric_TMAM_"] - df = df.replace("N/A", np.nan) - - TMA = df[df["metrics"].str.startswith("metric_TMA")] - - """ assign parent to each metric """ - for metric in TMA["metrics"]: - if metric == "pipeline": - metric_parent[metric] = "" - if any( - x in metric.lower() - for x in ["frontend_bound", "bad_speculation", "backend_bound", "retiring"] - ): - metric_parent[metric] = "pipeline" - L1 = metric - if metric.count(".") == 2: - metric_parent[metric] = L1 - L2 = metric - if metric.count(".") == 4: - metric_parent[metric] = L2 - L3 = metric - if metric.count(".") == 6: - metric_parent[metric] = L3 - - """ get parents """ - parent, ignore = get_parents(TMA["metrics"].tolist()) - TMA = TMA[~TMA["metrics"].isin(ignore)] - - """ prepare data """ - tma = TMA.copy() - for item in unwanted: - tma["metrics"] = tma["metrics"].str.replace(item, "", regex=False) - characters = ["pipeline"] + tma["metrics"].tolist() - parent = [""] + parent - new_data = [] - # pipeline_avg = TMA[TMA["metrics"] =="metric_TMA_Frontend_Bound(%)"].avg.iloc[0] + TMA[TMA["metrics"] =="metric_TMA_Bad_Speculation(%)"].avg.iloc[0] + TMA[TMA["metrics"] =="metric_TMA_Backend_Bound(%)"].avg.iloc[0] + TMA[TMA["metrics"] =="metric_TMA_Retiring(%)"].avg.iloc[0] - new_data.insert( - 0, {"metrics": "pipeline", "avg": 100, "p95": 100, "min": 100, "max": 100} - ) - TMA = pd.concat([pd.DataFrame(new_data), TMA], ignore_index=True) - TMA["parent"] = parent - TMA["id"] = characters - - """ plot icicle """ - fig = go.Figure() - fig.add_trace( - go.Icicle( - ids=TMA.id, - labels=TMA.id, - parents=TMA.parent, - root_color="lightgrey", - tiling=dict(orientation="v"), - ) - ) - fig.update_traces( - text=TMA.avg.round(decimals=2), - textinfo="label+text", - textposition="top center", - ) - - fig.update_layout( - # autosize=False, - # height=300, - # width=200, - margin=dict(t=50, l=25, r=25, b=25) - ) - return fig - - -def strip_unwanted(metric_name): - unwanted = ["%", "metric_TMA_", ".", "(", ")", "metric_TMAM_"] - for char in unwanted: - metric_name = metric_name.replace(char, "") - return metric_name - - -def get_parents(metrics): - parent = [] - no_parent = [] - for metric in metrics: - try: - parent.append(strip_unwanted(metric_parent[metric])) - except KeyError: - no_parent.append(metric) - return parent, no_parent diff --git a/src/perf_helpers.py b/src/perf_helpers.py index f3ed434..35f5332 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -339,6 +339,8 @@ def get_arch_and_name(procinfo): arch = "icelake" elif model == 143 and cpufamily == 6 and stepping >= 3: arch = "sapphirerapids" + elif model == 207 and cpufamily == 6: + arch = "emeraldrapids" return arch, modelname diff --git a/src/report.py b/src/report.py deleted file mode 100644 index 95c8dc0..0000000 --- a/src/report.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 - -########################################################################################################### -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: BSD-3-Clause -########################################################################################################### - -import logging -from src import basic_stats -from src import icicle -from yattag import Doc, indent - - -def write_html(in_file, perf_mode, arch, html_report_out, data_type="both"): - if data_type not in ("tma", "basic", "both"): - data_type = "both" - if str(perf_mode) == "Mode.System": - tma_inp = in_file.replace(".csv", ".sys.csv") - tma_inp_avg = in_file.replace(".csv", ".sys.average.csv") - elif str(perf_mode) == "Mode.Socket": - tma_inp = in_file.replace(".csv", ".socket.csv") - tma_inp_avg = in_file.replace(".csv", ".socket.average.csv") - elif str(perf_mode) == "Mode.Core": - tma_inp = in_file.replace(".csv", ".core.csv") - tma_inp_avg = in_file.replace(".csv", ".core.average.csv") - - doc, tag, text = Doc().tagtext() - with tag("html"): - with tag("style"): - text("h1{text-align: center;background-color: #00ccff;}") - text("h2{text-align: center;background-color: #e6faff;}") - with tag("head"): - doc.asis('') - with tag("h1"): - text("Intel® PerfSpect Report") - with tag("body"): - if data_type in ("both", "tma"): - fig1 = icicle.get_icicle(tma_inp_avg) - with tag("h2", align="center"): - text("TopDown Microarchitecture Analysis (TMA)") - with doc.tag("div"): - doc.attr(id="tma") - doc.asis(fig1.to_html(full_html=False, include_plotlyjs="cdn")) - if data_type in ("both", "basic"): - fig2 = basic_stats.get_stats_plot(tma_inp, arch) - with tag("h2", align="center"): - text("Basic Statistics") - with doc.tag("div"): - doc.attr(id="basic_stats") - doc.stag("br") - doc.asis(fig2) - result = indent(doc.getvalue()) - with open(html_report_out, "w") as file: - file.write(result) - logging.info(f"static HTML file written at {html_report_out}")