Skip to content

Commit

Permalink
Enhance E2E summary (#1376)
Browse files Browse the repository at this point in the history
  • Loading branch information
mengfei25 authored Feb 20, 2025
1 parent d4fd7dc commit ae651dd
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 59 deletions.
110 changes: 88 additions & 22 deletions .github/scripts/e2e_summary.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,101 @@
results_dir="$1"
check_file="$(dirname "$0")/../ci_expected_accuracy/check_expected.py"

function get_model_result() {
echo -e "\n<table><thead>
<tr>
<th rowspan=2> Suite </th><th rowspan=2> Model </th>
<th colspan=5> Training </th><th colspan=5> Inference </th>
</tr><tr>
<th> float32 </th><th> bfloat16 </th><th> float16 </th><th> amp_bf16 </th><th> amp_fp16 </th>
<th> float32 </th><th> bfloat16 </th><th> float16 </th><th> amp_bf16 </th><th> amp_fp16 </th>
</tr>
</thead><tbody>"
suite_list=$(
find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |\
sed "s/.*inductor_//;s/_[abf].*//" |sort |uniq
)
rm -rf /tmp/tmp-result.txt
for suite in ${suite_list}
do
model_list=$(
find "${results_dir}" -name "*.csv" |grep -E ".*${suite}.*_xpu_accuracy.csv" |\
xargs cat |grep "^xpu," |cut -d, -f2 |sort |uniq
)
for model in ${model_list}
do
for dtype in float32 bfloat16 float16 amp_bf16 amp_fp16
do
for mode in training inference
do
colorful=$(grep "${model}" "/tmp/tmp-${suite}-${mode}-${dtype}.txt" 2>&1 |awk 'BEGIN{
color = "black";
exit_label = 0;
}{
if ($0 ~/Real failed/){
color="red";
exit_label++;
}else if ($0 ~/Expected failed/){
color="blue";
}else if ($0 ~/Warning timeout/){
color="orange";
}else if ($0 ~/New models/){
color="blue";
}else if ($0 ~/Failed to passed/){
color="green";
exit_label++;
}
}END{print color, exit_label}')
echo "${colorful}" >> /tmp/tmp-result.txt
context=$(find "${results_dir}" -name "*.csv" |\
grep -E ".*${suite}_${dtype}_${mode}_xpu_accuracy.csv" |xargs grep ",${model}," |cut -d, -f4 |\
awk -v c="${colorful/ *}" '{if(c=="black") {print $0}else {printf("\\$\\${__color__{%s}%s}\\$\\$", c, $0)}}')
eval "export ${mode}_${dtype}=${context}"
done
done
echo -e "<tr>
<td>${suite}</td>
<td>${model}</td>
<td>${training_float32}</td>
<td>${training_bfloat16}</td>
<td>${training_float16}</td>
<td>${training_amp_bf16}</td>
<td>${training_amp_fp16}</td>
<td>${inference_float32}</td>
<td>${inference_bfloat16}</td>
<td>${inference_float16}</td>
<td>${inference_amp_bf16}</td>
<td>${inference_amp_fp16}</td>
</tr>" |sed '/__color__/{s/__color__/\\color/g;s/_/\\_/g}'
done
done
echo -e "</tbody></table>\n"
}

# Accuracy
accuracy=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" -c)
if [ "${accuracy}" -gt 0 ];then
printf "#### Note:
\$\${\\color{red}Red}\$\$: the failed cases which need look into
\$\${\\color{green}Green}\$\$: the new passed cases which need update reference
\$\${\\color{blue}Blue}\$\$: the expected failed or new enabled cases
\$\${\\color{orange}Orange}\$\$: the warning cases
Empty means the cases NOT run\n\n"
echo "### Accuracy"
printf "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | New Passed | New Enabled | Not Run |\n"
printf "| Category | Total | Passed | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | "
printf "\$\${\\color{green}New Passed}\$\$ | \$\${\\color{blue}New Enabled}\$\$ | Not Run |\n"
printf "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
echo > tmp-summary.txt
echo > tmp-details.txt
echo > /tmp/tmp-summary.txt
echo > /tmp/tmp-details.txt
for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |sort)
do
category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_accuracy.*//')"
suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
dt="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dt}" --csv_file "${csv}" > tmp-result.txt
test_result="$(sed 's/, /,/g' tmp-result.txt |awk '{
dtype="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
test_result="$(sed 's/, /,/g' "/tmp/tmp-${suite}-${mode}-${dtype}.txt" |awk '{
if($0 ~/Total/){
total = $3;
}
Expand Down Expand Up @@ -57,22 +135,10 @@ if [ "${accuracy}" -gt 0 ];then
printf(" %d | %d | %s | %d | %d | %d | %d | %d | %d\n",
total, passed, pass_rate, failed, xfail, timeout, new_passed, new_enabled, not_run);
}')"
echo "| ${category} | ${test_result} |" >> tmp-summary.txt
sed -i '
s/Real failed models:/$${\\color{red}Real \\space failed \\space models}$$:/g;
s/Expected failed models:/$${\\color{blue}Expected \\space failed \\space models}$$:/g;
s/Warning timeout models:/$${\\color{orange}Warning \\space timeout \\space models}$$:/g;
s/Failed to passed models:/$${\\color{green}Failed \\space to \\space passed \\space models}$$:/g;
' tmp-result.txt
{
echo "<table><thead><tr><th colspan=2>$(sed 's/=//g' tmp-result.txt |head -n 1)</th></tr></thead><tbody>"
sed "1d" tmp-result.txt |awk -F: '{printf("<tr><td>%s</td><td>%s</td></tr>\n", $1, $2)}'
echo -e "</tbody></table>\n"
} >> tmp-details.txt
echo "| ${category} | ${test_result} |" >> /tmp/tmp-summary.txt
done
cat tmp-summary.txt
grep -v "<td> 0 \[\]</td>" tmp-details.txt
rm -rf tmp-*.txt
cat /tmp/tmp-summary.txt
get_model_result
fi

# Performance
Expand Down
3 changes: 3 additions & 0 deletions .github/scripts/lintrunner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ if ! command -v lintrunner &> /dev/null; then
python3 -m pip install lintrunner==0.12.7
fi

# Ignoring errors in one specific run
export SHELLCHECK_OPTS="-e SC2154"

# This has already been cached in the docker image
lintrunner init 2> /dev/null

Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,13 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -297,18 +297,13 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nightly_ondemand_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,18 +242,13 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,13 @@ jobs:
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down

0 comments on commit ae651dd

Please sign in to comment.