Skip to content

Commit

Permalink
ras: add LMCE feature test cases
Browse files Browse the repository at this point in the history
Add 18 test cases for LMCE feature.

Signed-off-by: Lai, Yi1 <[email protected]>
  • Loading branch information
laifryiee committed Aug 28, 2024
1 parent a12f961 commit 0f7c740
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 2 deletions.
8 changes: 6 additions & 2 deletions BM/ras/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUBDIRS := mce-test
SUBDIRS := mce-test ras-tools

.PHONY: all
all: $(SUBDIRS)
Expand All @@ -10,7 +10,11 @@ $(SUBDIRS):
.PHONY: install
install:
for dir in $(SUBDIRS); do \
$(MAKE) -C $$dir install; \
if [ "$$dir" = "mce-test" ]; then \
$(MAKE) -C $$dir install; \
else \
echo "Skipping install in $$dir"; \
fi \
done

.PHONY: clean
Expand Down
1 change: 1 addition & 0 deletions BM/ras/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ You can run the cases one by one, e.g. command

```
./mce_test.sh -t apei-inj
./lmce_test.sh -t sameaddr_samecore_instr/instr
```
You also can run the cases together with runtests command, e.g.

Expand Down
125 changes: 125 additions & 0 deletions BM/ras/lmce_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (c) 2024 Intel Corporation
# Author: Yi Lai <[email protected]>
# @Desc Test script to verify Intel RAS LMCE functionality

cd "$(dirname "$0")" 2>/dev/null || exit 1
source ../.env
source ./ras_common.sh

usage() {
cat <<__EOF
usage: ./${0##*/} [-t TESTCASE_ID] [-H]
-t TEST CASE ID
-H show this
__EOF
}

# Check whether LMCE feature is supported
lmce_support_check() {
local lmce_support=$(((0x$(rdmsr 0x3a) >> 20) & 1)) # 0x3a is IA32_FEATURE_CONTROL, bit 20 is LMCE_ON
if [ $lmce_support -eq 1 ]; then
test_print_trc "LMCE feature is supported"
else
die "LMCE feature is not supported"
fi
}

lmce_check_result() {
local testcase=$1

if [[ $2 -eq 0 ]] && grep -q LMCE ${MCELOG_LOGFILE}; then
test_print_trc "${testcase} Test PASS"
else
die "${testcase} Test FAIL"
fi
}

lmce_test() {
disable_cmci # disable MCE CMCI before LMCE test execution
cat /dev/null > ${MCELOG_LOGFILE} # clear previous decoded MCE event records
cd ras-tools/
case $TEST_SCENARIO in
sameaddr_samecore_instr/instr)
./lmce -a -c 1 -t INSTR/INSTR
;;
sameaddr_samecore_instr/data)
./lmce -a -c 1 -t INSTR/DATA
;;
sameaddr_samecore_data/data)
./lmce -a -c 1 -t DATA/DATA
;;
sameaddr_samesocket_instr/instr)
./lmce -a -c 2 -t INSTR/INSTR
;;
sameaddr_samesocket_instr/data)
./lmce -a -c 2 -t INSTR/DATA
;;
sameaddr_samesocket_data/data)
./lmce -a -c 2 -t DATA/DATA
;;
sameaddr_diffsocket_instr/instr)
./lmce -a -c 3 -t INSTR/INSTR
;;
sameaddr_diffsocket_instr/data)
./lmce -a -c 3 -t INSTR/DATA
;;
sameaddr_diffsocket_data/data)
./lmce -a -c 3 -t DATA/DATA
;;
diffaddr_samecore_instr/instr)
./lmce -c 1 -t INSTR/INSTR
;;
diffaddr_samecore_instr/data)
./lmce -c 1 -t INSTR/DATA
;;
diffaddr_samecore_data/data)
./lmce -c 1 -t DATA/DATA
;;
diffaddr_samesocket_instr/instr)
./lmce -c 2 -t INSTR/INSTR
;;
diffaddr_samesocket_instr/data)
./lmce -c 2 -t INSTR/DATA
;;
diffaddr_samesocket_data/data)
./lmce -c 2 -t DATA/DATA
;;
diffaddr_diffsocket_instr/instr)
./lmce -c 3 -t INSTR/INSTR
;;
diffaddr_diffsocket_instr/data)
./lmce -c 3 -t INSTR/DATA
;;
diffddr_diffsocket_data/data)
./lmce -c 3 -t DATA/DATA
;;
esac
enable_cmci # restore ENV
lmce_check_result $TEST_SCENARIO $?
}

while getopts :t:H arg; do
case $arg in
t)
TEST_SCENARIO=$OPTARG
;;
H)
usage && exit 0
;;
\?)
usage
die "Invalid Option -$OPTARG"
;;
:)
usage
die "Option -$OPTARG requires an argument."
;;
esac
done

lmce_support_check # check whether LMCE feature is supported
pkg_check_install msr-tools cpuid mcelog # install pre-requisite packages
mcelog_config # configure mcelog service
lmce_test
48 changes: 48 additions & 0 deletions BM/ras/ras_common.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
cd "$(dirname "$0")" 2>/dev/null || exit 1
source ../.env

MCELOG_LOGFILE=/var/log/mcelog
MCA_BANK_NUM=$((0x$(rdmsr 0x179) & 0xFF)) # 0x179 is IA32_MCG_CAP
IA32_MCi_CTL2=$(rdmsr 0x280) # 0x280 is IA32_MCi_CTL2

# check whether one package is installed, if not, install the package
# Usage: pkg_check_install pkg_name_1 pkg_name_2 pkg_name_x
pkg_check_install() {
Expand All @@ -21,3 +25,47 @@ pkg_check_install() {
fi
done
}

# check mcelog service is properly configured and running
mcelog_config() {
local daemon=0
local logfile=0

pgrep -x mcelog >/dev/null 2>&1
if [ $? -ne 0 ]; then
test_print_trc "mcelog service is not running, start the service"
/usr/sbin/mcelog --ignorenodev --daemon --logfile=${MCELOG_LOGFILE}
exit 0
fi
for i in $(pgrep -a -x mcelog); do
if [[ $i == *"--daemon"* ]]; then
daemon=1
elif [[ $i == *"--logfile=${MCELOG_LOGFILE}"* ]]; then
logfile=1
fi
done
if [ "$daemon" -eq 0 ] || [ "$logfile" -eq 0 ]; then
test_print_trc "mcelog service is not properly configured, reload the service."
kill -9 $(pgrep -x mcelog)
/usr/sbin/mcelog --ignorenodev --daemon --logfile=${MCELOG_LOGFILE}
fi
}

# disable MCE CMCI
disable_cmci() {
for (( bank=0; bank<MCA_BANK_NUM; bank++ )); do
msr_address=$((0x280 + bank))
current_value=$(rdmsr -p 0 $msr_address)
current_value_dec=$((0x$current_value))
new_value_dec=$((current_value_dec & ~(1 << 30)))
wrmsr -a $msr_address $new_value_dec # write to all processors
done
}

# enable MCE CMCI
enable_cmci() {
for ((bank=0; bank<MCA_BANK_NUM; bank++)); do
msr_address=$((0x280 + bank))
wrmsr -a $msr_address "0x${IA32_MCi_CTL2}"
done
}
25 changes: 25 additions & 0 deletions BM/ras/tests
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Intel® Architecture-based platforms.
# @other_dep: general_test.sh -t module -p einj @ No module einj

# error injection test cases
mce_test.sh -t apei-inj
mce_test.sh -t core_recovery_ifu
mce_test.sh -t core_recovery_dcu
Expand All @@ -10,3 +11,27 @@ mce_test.sh -t einj-ext
mce_test.sh -t emca-inj
mce_test.sh -t erst-inject
mce_test.sh -t pfa

# LMCE test cases - three-dimensional matrix
# error-injected addresses: same address vs. different address
# CPUs that threads run on: same core, different core but same socket vs. different socket
# acess type to trigger the fault (Instruction Fetch/Data Access): INSTR/INSTR, INSTR/DATA vs. DATA/DATA
lmce_test.sh -t sameaddr_samecore_instr/instr
lmce_test.sh -t sameaddr_samecore_instr/data
lmce_test.sh -t sameaddr_samecore_data/data
lmce_test.sh -t sameaddr_samesocket_instr/instr
lmce_test.sh -t sameaddr_samesocket_instr/data
lmce_test.sh -t sameaddr_samesocket_data/data
lmce_test.sh -t sameaddr_diffsocket_instr/instr
lmce_test.sh -t sameaddr_diffsocket_instr/data
lmce_test.sh -t sameaddr_diffsocket_data/data
lmce_test.sh -t diffaddr_samecore_instr/instr
lmce_test.sh -t diffaddr_samecore_instr/data
lmce_test.sh -t diffaddr_samecore_data/data
lmce_test.sh -t diffaddr_samesocket_instr/instr
lmce_test.sh -t diffaddr_samesocket_instr/data
lmce_test.sh -t diffaddr_samesocket_data/data
lmce_test.sh -t diffaddr_diffsocket_instr/instr
lmce_test.sh -t diffaddr_diffsocket_instr/data
lmce_test.sh -t diffddr_diffsocket_data/data

0 comments on commit 0f7c740

Please sign in to comment.