From 78cbddb7a6d6aa28ce0c280956eb161514c08284 Mon Sep 17 00:00:00 2001 From: Wendy Wang Date: Tue, 2 Jan 2024 11:30:10 +0800 Subject: [PATCH] Add 4 cstate cases covering cstate perf and cpu offline online stress Signed-off-by: Wendy Wang --- cstate/README.md | 3 +- cstate/powermgr_cstate_tests.sh | 154 +++++++++++++++++++++++++++----- cstate/tests-client | 1 + cstate/tests-server | 4 + 4 files changed, 140 insertions(+), 22 deletions(-) diff --git a/cstate/README.md b/cstate/README.md index 0387487..839bceb 100644 --- a/cstate/README.md +++ b/cstate/README.md @@ -1,6 +1,7 @@ # Release Notes for CPU Core Cstate cases -The CPU Core C-state cases are designed for Intel® Architecture-based platforms. +The CPU Core and Package cstate cases are designed for Intel® Architecture-based +platforms. The cases are designed for intel_idle driver, not legacy acpi_idle driver. Considering Intel® Server and Client platforms have different Core C-states behavior. So created two tests files to distinguish the cases running on different test units: diff --git a/cstate/powermgr_cstate_tests.sh b/cstate/powermgr_cstate_tests.sh index bde5f42..9690e3c 100755 --- a/cstate/powermgr_cstate_tests.sh +++ b/cstate/powermgr_cstate_tests.sh @@ -14,11 +14,14 @@ CPU_IDLE_SYSFS_PATH="/sys/devices/system/cpu/cpuidle" current_cpuidle_driver=$(cat "$CPU_IDLE_SYSFS_PATH"/current_driver) -#Turbostat tool is required to run core cstate cases +# Turbostat tool is required to run core cstate cases turbostat sleep 1 1>/dev/null 2>&1 || block_test "Turbostat tool is required to \ to run CPU core cstate cases, please get it from latest upstream kernel-tools." -#Function to verify if Intel_idle driver refer to BIOS _CST table +# Perf tool is required to run this cstate perf cases +perf list 1>/dev/null 2>&1 || block_test "perf tool is required to run cstate perf cases" + +# Function to verify if Intel_idle driver refer to BIOS _CST table test_cstate_table_name() { local cstate_name="" local name="" @@ -34,7 +37,7 @@ test_cstate_table_name() { fi } -#Function to verify if current idle driver is intel_idle +# Function to verify if current idle driver is intel_idle check_intel_idle() { [[ $current_cpuidle_driver == "intel_idle" ]] || { block_test "If the platform does not support Intel_Idle driver yet, \ @@ -42,7 +45,7 @@ please ignore this test case." } } -#Function to switch each core cstate +# Function to switch each core cstate test_cstate_switch_idle() { local usage_before=() local usage_after=() @@ -114,8 +117,8 @@ test_cstate_switch_intel_idle() { test_cstate_switch_idle } -#The Core C7 is only supported on Intel® Client platform -#This function is to check Core C7 residency during runtime +# The Core C7 is only supported on Intel® Client platform +# This function is to check Core C7 residency during runtime judge_cc7_residency_during_idle() { columns="Core,CPU%c1,CPU%c6,CPU%c7" turbostat_output=$(turbostat -i 10 --quiet \ @@ -134,8 +137,8 @@ when $current_cpuidle_driver is running." when $current_cpuidle_driver is running" } -#The Core C7 is only supported on Intel® Client platforms -#This function is to check Core C7 residency for S2idle path +# The Core C7 is only supported on Intel® Client platforms +# This function is to check Core C7 residency for S2idle path judge_cc7_residency_during_s2idle() { columns="Core,CPU%c1,CPU%c6,CPU%c7" turbostat_output=$( @@ -157,7 +160,7 @@ when $current_cpuidle_driver is running" when $current_cpuidle_driver is running." } -#The Core C6 is the deepest cstate on Intel® Server platforms +# The Core C6 is the deepest cstate on Intel® Server platforms test_server_all_cpus_deepest_cstate() { local unexpected_cstate=0.00 @@ -175,8 +178,8 @@ test_server_all_cpus_deepest_cstate() { fi } -#The Core C6 is only supported on Intel® Server platform -#This function is to check Core C6 residency during runtime +# The Core C6 is only supported on Intel® Server platform +# This function is to check Core C6 residency during runtime judge_cc6_residency_during_idle() { columns="Core,CPU%c1,CPU%c6" turbostat_output=$(turbostat -i 10 --quiet \ @@ -217,15 +220,15 @@ cc_state_disable_enable() { local setting=$2 for ((i = 0; i < cpu_num; i++)); do - #Find Core Cx state + # Find Core Cx state cc_num=$(grep . /sys/devices/system/cpu/cpu0/cpuidle/state*/name | sed -n "/$cc$/p" | awk -F "/" '{print $8}' | cut -c 6) test_print_trc "Core $cc state name is: $cc_num" [[ -n "$cc_num" ]] || block_test "Did not get Core $cc state." - #Change Core Cx state + # Change Core Cx state do_cmd "echo $setting > /sys/devices/system/cpu/cpu$i/cpuidle/state$cc_num/disable" deeper=$(("$cc_num" + 1)) - #Change deeper Core Cx state + # Change deeper Core Cx state for ((j = deeper; j < state_num; j++)); do do_cmd "echo $setting > /sys/devices/system/cpu/cpu$i/cpuidle/state$j/disable" done @@ -245,8 +248,8 @@ disable_cc_check_pc() { cc_state_disable_enable "$cc" 1 - #Check Package Cstates, CC10 disable--> expect PC8 only - #CC8 and deeper disable--> PC6 only + # Check Package Cstates, CC10 disable--> expect PC8 only + # CC8 and deeper disable--> PC6 only tc_out=$(turbostat -q --show $columns -i 1 sleep 20 2>&1) [[ -n "$tc_out" ]] || die "Did not get turbostat log" test_print_trc "turbostat tool output: $tc_out" @@ -272,10 +275,7 @@ $pc_y residency: $pc_y_res; $pc_n residency: $pc_n_res" # perf tool listed cstate is based on kernel MSRs, turbostat tool listed # cstate is based on user space MSRs, these two outputs should be aligned # also client and server platforms support different core and pkg cstates. -perf_cstate_list() { - #Perf tool is required to run this case - perf list 1>/dev/null 2>&1 || block_test "perf tool is required to run this cstate case" - +perf_client_cstate_list() { tc_out=$(turbostat -q --show idle sleep 1 2>&1) [[ -n "$tc_out" ]] || block_test "Did not get turbostat log" test_print_trc "turbostat tool output: $tc_out" @@ -338,6 +338,106 @@ to 30 us:" do_cmd "echo > $idle_debugfs/control" } +# Server platforms support different core cstate and package cstate +perf_server_cstate_list() { + tc_out=$(turbostat -q --show idle sleep 1 2>&1) + [[ -n "$tc_out" ]] || block_test "Did not get turbostat log" + test_print_trc "turbostat tool output: $tc_out" + tc_out_cstate_list=$(echo "$tc_out" | grep -E "^POLL") + + perf_cstates=$(perf list | grep cstate) + [[ -n "$perf_cstates" ]] || block_test "Did not get cstate events by perf list" + test_print_trc "perf list shows cstate events: $perf_cstates" + perf_core_cstate_num=$(perf list | grep -c cstate_core) + for ((i = 1; i <= perf_core_cstate_num; i++)); do + perf_core_cstate=$(perf list | grep cstate_core | sed -n "$i, 1p") + if [[ $perf_core_cstate =~ c1 ]] && [[ $tc_out_cstate_list =~ CPU%c1 ]]; then + test_print_trc "$perf_core_cstate is supported and aligned with turbostat" + elif [[ $perf_core_cstate =~ c6 ]] && [[ $tc_out_cstate_list =~ CPU%c6 ]]; then + test_print_trc "$perf_core_cstate is supported and aligned with turbostat" + else + die "perf list shows unexpected core_cstate event." + fi + done + + perf_pkg_cstate_num=$(perf list | grep -c cstate_pkg) + for ((i = 1; i <= perf_pkg_cstate_num; i++)); do + perf_pkg_cstate=$(perf list | grep cstate_pkg | sed -n "$i, 1p") + if [[ $perf_pkg_cstate =~ c2 ]] && [[ $tc_out_cstate_list =~ Pkg%pc2 ]]; then + test_print_trc "$perf_pkg_cstate is supported and aligned with turbostat" + elif [[ $perf_pkg_cstate =~ c6 ]] && [[ $tc_out_cstate_list =~ Pkg%pc6 ]]; then + test_print_trc "$perf_pkg_cstate is supported and aligned with turbostat" + else + die "perf list shows unexpected pkg_cstate event." + fi + done +} + +# Verify if server cstate_core or cstate_pkg pmu event updates during idle +perf_server_cstat_update() { + local cstate_name=$1 + + perf_cstates=$(perf list | grep "$cstate_name" 2>&1) + perf_cstates_num=$(perf list | grep -c "$cstate_name" 2>&1) + [[ -n $perf_cstates ]] || block_test "Did not get $cstate_name event by perf list" + + # Sleep 20 seconds to capture the cstate counter update + for ((i = 1; i <= perf_cstates_num; i++)); do + perf_cstate=$(echo "$perf_cstates" | awk '{print $1}' | sed -n "$i, 1p" 2>&1) + test_print_trc "perf event name: $perf_cstate" + option="$option -e $perf_cstate" + test_print_trc "option name: $option" + done + do_cmd "perf stat -o out.txt --per-socket $option sleep 20" + test_print_trc "$cstate_name perf events log:" + do_cmd "cat out.txt" + perf_cstates_sockets=$(grep cstate out.txt | awk '{print $NF}' | wc -l 2>&1) + + if ! counter=$(grep cstate out.txt | awk '{print $3}'); then + block_test "Did not get $cstate_name perf event: $counter" + else + for ((i = 1; i <= perf_cstates_sockets; i++)); do + perf_cstat_counter=$(grep cstate out.txt | awk '{print $3}' | sed -n "$i, 1p" 2>&1) + perf_cstat_name=$(grep cstate out.txt | awk '{print $4}' | sed -n "$i, 1p" 2>&1) + if [[ $perf_cstat_counter -eq 0 ]]; then + die "$perf_cstat_name event counter shows 0" + else + test_print_trc "$perf_cstat_name event counter is updated" + fi + done + fi +} + +# Function to do CPU offline and online short stress +cpu_off_on_stress() { + local cycle=$1 + local dmesg_log + + last_dmesg_timestamp + + cpu_num=$(lscpu | grep "On-line CPU" | awk '{print $NF}' | awk -F "-" '{print $2}') + [ -n "$cpu_num" ] || block_test "On-line CPU is not available." + test_print_trc "The max CPU number is: $cpu_num " + + for ((i = 1; i <= cycle; i++)); do + test_print_trc "CPUs offline online stress cycle$i" + for ((j = 1; j <= cpu_num; j++)); do + do_cmd "echo 0 > /sys/devices/system/cpu/cpu$j/online" + done + sleep 1 + for ((j = 1; j <= cpu_num; j++)); do + do_cmd "echo 1 > /sys/devices/system/cpu/cpu$j/online" + done + done + + dmesg_log=$(extract_case_dmesg) + if echo "$dmesg_log" | grep -iE "fail|Call Trace|error|BUG|err"; then + die "Kernel dmesg shows failure after CPU offline/online stress: $dmesg_log" + else + test_print_trc "Kernel dmesg shows Okay after CPU offline/online stress." + fi +} + while getopts :t:H arg; do case $arg in t) @@ -378,7 +478,7 @@ core_cstate_test() { disable_cc_check_pc C10 Pkg%pc8 Pk%pc10 ;; verify_cstate_list_by_perf) - perf_cstate_list + perf_client_cstate_list ;; verify_residency_latency_override) override_residency_latency @@ -389,6 +489,18 @@ core_cstate_test() { verify_server_all_cores_cstate6) test_server_all_cpus_deepest_cstate ;; + verify_server_cstate_list) + perf_server_cstate_list + ;; + verify_server_perf_core_cstat_update) + perf_server_cstat_update cstate_core + ;; + verify_server_perf_pkg_cstat_update) + perf_server_cstat_update cstate_pkg + ;; + verify_cpu_offline_online_stress) + cpu_off_on_stress 5 + ;; *) block_test "Wrong Case Id is assigned: $CASE_ID" ;; diff --git a/cstate/tests-client b/cstate/tests-client index 04e9492..2815c4d 100755 --- a/cstate/tests-client +++ b/cstate/tests-client @@ -9,3 +9,4 @@ powermgr_cstate_tests.sh -t verify_client_pkg6_by_disabling_cc8 powermgr_cstate_tests.sh -t verify_client_pkg8_by_disabling_cc10 powermgr_cstate_tests.sh -t verify_cstate_list_by_perf powermgr_cstate_tests.sh -t verify_residency_latency_override +powermgr_cstate_tests.sh -t verify_cpu_offline_online_stress diff --git a/cstate/tests-server b/cstate/tests-server index 537484e..885bf29 100755 --- a/cstate/tests-server +++ b/cstate/tests-server @@ -5,3 +5,7 @@ powermgr_cstate_tests.sh -t verify_cstate_name powermgr_cstate_tests.sh -t verify_server_all_cores_cstate6 powermgr_cstate_tests.sh -t verify_server_core_cstate6_residency powermgr_cstate_tests.sh -t verify_residency_latency_override +powermgr_cstate_tests.sh -t verify_server_cstate_list +powermgr_cstate_tests.sh -t verify_server_perf_core_cstat_update +powermgr_cstate_tests.sh -t verify_server_perf_pkg_cstat_update +powermgr_cstate_tests.sh -t verify_cpu_offline_online_stress