Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance E2E summary #1376

Merged
merged 13 commits into from
Feb 20, 2025
110 changes: 88 additions & 22 deletions .github/scripts/e2e_summary.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,101 @@
results_dir="$1"
check_file="$(dirname "$0")/../ci_expected_accuracy/check_expected.py"

function get_model_result() {
echo -e "\n<table><thead>
<tr>
<th rowspan=2> Suite </th><th rowspan=2> Model </th>
<th colspan=5> Training </th><th colspan=5> Inference </th>
</tr><tr>
<th> float32 </th><th> bfloat16 </th><th> float16 </th><th> amp_bf16 </th><th> amp_fp16 </th>
<th> float32 </th><th> bfloat16 </th><th> float16 </th><th> amp_bf16 </th><th> amp_fp16 </th>
</tr>
</thead><tbody>"
suite_list=$(
find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |\
sed "s/.*inductor_//;s/_[abf].*//" |sort |uniq
)
rm -rf /tmp/tmp-result.txt
for suite in ${suite_list}
do
model_list=$(
find "${results_dir}" -name "*.csv" |grep -E ".*${suite}.*_xpu_accuracy.csv" |\
xargs cat |grep "^xpu," |cut -d, -f2 |sort |uniq
)
for model in ${model_list}
do
for dtype in float32 bfloat16 float16 amp_bf16 amp_fp16
do
for mode in training inference
do
colorful=$(grep "${model}" "/tmp/tmp-${suite}-${mode}-${dtype}.txt" 2>&1 |awk 'BEGIN{
color = "black";
exit_label = 0;
}{
if ($0 ~/Real failed/){
color="red";
exit_label++;
}else if ($0 ~/Expected failed/){
color="blue";
}else if ($0 ~/Warning timeout/){
color="orange";
}else if ($0 ~/New models/){
color="blue";
}else if ($0 ~/Failed to passed/){
color="green";
exit_label++;
}
}END{print color, exit_label}')
echo "${colorful}" >> /tmp/tmp-result.txt
context=$(find "${results_dir}" -name "*.csv" |\
grep -E ".*${suite}_${dtype}_${mode}_xpu_accuracy.csv" |xargs grep ",${model}," |cut -d, -f4 |\
awk -v c="${colorful/ *}" '{if(c=="black") {print $0}else {printf("\\$\\${__color__{%s}%s}\\$\\$", c, $0)}}')
eval "export ${mode}_${dtype}=${context}"
done
done
echo -e "<tr>
<td>${suite}</td>
<td>${model}</td>
<td>${training_float32}</td>
<td>${training_bfloat16}</td>
<td>${training_float16}</td>
<td>${training_amp_bf16}</td>
<td>${training_amp_fp16}</td>
<td>${inference_float32}</td>
<td>${inference_bfloat16}</td>
<td>${inference_float16}</td>
<td>${inference_amp_bf16}</td>
<td>${inference_amp_fp16}</td>
</tr>" |sed '/__color__/{s/__color__/\\color/g;s/_/\\_/g}'
done
done
echo -e "</tbody></table>\n"
}

# Accuracy
accuracy=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" -c)
if [ "${accuracy}" -gt 0 ];then
printf "#### Note:
\$\${\\color{red}Red}\$\$: the failed cases which need look into
\$\${\\color{green}Green}\$\$: the new passed cases which need update reference
\$\${\\color{blue}Blue}\$\$: the expected failed or new enabled cases
\$\${\\color{orange}Orange}\$\$: the warning cases
Empty means the cases NOT run\n\n"
echo "### Accuracy"
printf "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | New Passed | New Enabled | Not Run |\n"
printf "| Category | Total | Passed | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | "
printf "\$\${\\color{green}New Passed}\$\$ | \$\${\\color{blue}New Enabled}\$\$ | Not Run |\n"
printf "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
echo > tmp-summary.txt
echo > tmp-details.txt
echo > /tmp/tmp-summary.txt
echo > /tmp/tmp-details.txt
for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |sort)
do
category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_accuracy.*//')"
suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
dt="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dt}" --csv_file "${csv}" > tmp-result.txt
test_result="$(sed 's/, /,/g' tmp-result.txt |awk '{
dtype="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
test_result="$(sed 's/, /,/g' "/tmp/tmp-${suite}-${mode}-${dtype}.txt" |awk '{
if($0 ~/Total/){
total = $3;
}
Expand Down Expand Up @@ -57,22 +135,10 @@ if [ "${accuracy}" -gt 0 ];then
printf(" %d | %d | %s | %d | %d | %d | %d | %d | %d\n",
total, passed, pass_rate, failed, xfail, timeout, new_passed, new_enabled, not_run);
}')"
echo "| ${category} | ${test_result} |" >> tmp-summary.txt
sed -i '
s/Real failed models:/$${\\color{red}Real \\space failed \\space models}$$:/g;
s/Expected failed models:/$${\\color{blue}Expected \\space failed \\space models}$$:/g;
s/Warning timeout models:/$${\\color{orange}Warning \\space timeout \\space models}$$:/g;
s/Failed to passed models:/$${\\color{green}Failed \\space to \\space passed \\space models}$$:/g;
' tmp-result.txt
{
echo "<table><thead><tr><th colspan=2>$(sed 's/=//g' tmp-result.txt |head -n 1)</th></tr></thead><tbody>"
sed "1d" tmp-result.txt |awk -F: '{printf("<tr><td>%s</td><td>%s</td></tr>\n", $1, $2)}'
echo -e "</tbody></table>\n"
} >> tmp-details.txt
echo "| ${category} | ${test_result} |" >> /tmp/tmp-summary.txt
done
cat tmp-summary.txt
grep -v "<td> 0 \[\]</td>" tmp-details.txt
rm -rf tmp-*.txt
cat /tmp/tmp-summary.txt
get_model_result
fi

# Performance
Expand Down
3 changes: 3 additions & 0 deletions .github/scripts/lintrunner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ if ! command -v lintrunner &> /dev/null; then
python3 -m pip install lintrunner==0.12.7
fi

# Ignoring errors in one specific run
export SHELLCHECK_OPTS="-e SC2154"

# This has already been cached in the docker image
lintrunner init 2> /dev/null

Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,13 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -297,18 +297,13 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nightly_ondemand_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,18 +242,13 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,13 @@ jobs:
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
# Print summary
rm -rf /tmp/tmp-*.txt
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down