Enhance E2E summary (#1376)

intel · Feb 20, 2025 · ae651dd · ae651dd
1 parent d4fd7dc
commit ae651dd
Show file tree

Hide file tree

Showing 6 changed files with 115 additions and 59 deletions.
diff --git a/.github/scripts/e2e_summary.sh b/.github/scripts/e2e_summary.sh
@@ -3,23 +3,101 @@
 results_dir="$1"
 check_file="$(dirname "$0")/../ci_expected_accuracy/check_expected.py"
 
+function get_model_result() {
+    echo -e "\n<table><thead>
+        <tr>
+            <th rowspan=2> Suite </th><th rowspan=2> Model </th>
+            <th colspan=5> Training </th><th colspan=5> Inference </th>
+        </tr><tr>
+            <th> float32 </th><th> bfloat16 </th><th> float16 </th><th> amp_bf16 </th><th> amp_fp16 </th>
+            <th> float32 </th><th> bfloat16 </th><th> float16 </th><th> amp_bf16 </th><th> amp_fp16 </th>
+        </tr>
+    </thead><tbody>"
+    suite_list=$(
+        find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |\
+        sed "s/.*inductor_//;s/_[abf].*//" |sort |uniq
+    )
+    rm -rf /tmp/tmp-result.txt
+    for suite in ${suite_list}
+    do
+        model_list=$(
+            find "${results_dir}" -name "*.csv" |grep -E ".*${suite}.*_xpu_accuracy.csv" |\
+            xargs cat |grep "^xpu," |cut -d, -f2 |sort |uniq
+        )
+        for model in ${model_list}
+        do
+            for dtype in float32 bfloat16 float16 amp_bf16 amp_fp16
+            do
+                for mode in training inference
+                do
+                    colorful=$(grep "${model}" "/tmp/tmp-${suite}-${mode}-${dtype}.txt" 2>&1 |awk 'BEGIN{
+                        color = "black";
+                        exit_label = 0;
+                    }{
+                        if ($0 ~/Real failed/){
+                            color="red";
+                            exit_label++;
+                        }else if ($0 ~/Expected failed/){
+                            color="blue";
+                        }else if ($0 ~/Warning timeout/){
+                            color="orange";
+                        }else if ($0 ~/New models/){
+                            color="blue";
+                        }else if ($0 ~/Failed to passed/){
+                            color="green";
+                            exit_label++;
+                        }
+                    }END{print color, exit_label}')
+                    echo "${colorful}" >> /tmp/tmp-result.txt
+                    context=$(find "${results_dir}" -name "*.csv" |\
+                        grep -E ".*${suite}_${dtype}_${mode}_xpu_accuracy.csv" |xargs grep ",${model}," |cut -d, -f4 |\
+                        awk -v c="${colorful/ *}" '{if(c=="black") {print $0}else {printf("\\$\\${__color__{%s}%s}\\$\\$", c, $0)}}')
+                    eval "export ${mode}_${dtype}=${context}"
+                done
+            done
+            echo -e "<tr>
+                    <td>${suite}</td>
+                    <td>${model}</td>
+                    <td>${training_float32}</td>
+                    <td>${training_bfloat16}</td>
+                    <td>${training_float16}</td>
+                    <td>${training_amp_bf16}</td>
+                    <td>${training_amp_fp16}</td>
+                    <td>${inference_float32}</td>
+                    <td>${inference_bfloat16}</td>
+                    <td>${inference_float16}</td>
+                    <td>${inference_amp_bf16}</td>
+                    <td>${inference_amp_fp16}</td>
+                </tr>" |sed '/__color__/{s/__color__/\\color/g;s/_/\\_/g}'
+        done
+    done
+    echo -e "</tbody></table>\n"
+}
+
 # Accuracy
 accuracy=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" -c)
 if [ "${accuracy}" -gt 0 ];then
+    printf "#### Note:
+\$\${\\color{red}Red}\$\$: the failed cases which need look into
+\$\${\\color{green}Green}\$\$: the new passed cases which need update reference
+\$\${\\color{blue}Blue}\$\$: the expected failed or new enabled cases
+\$\${\\color{orange}Orange}\$\$: the warning cases
+Empty means the cases NOT run\n\n"
     echo "### Accuracy"
-    printf "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
-    printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | New Passed | New Enabled | Not Run |\n"
+    printf "| Category | Total | Passed | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
+    printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | "
+    printf "\$\${\\color{green}New Passed}\$\$ | \$\${\\color{blue}New Enabled}\$\$ | Not Run |\n"
     printf "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
-    echo > tmp-summary.txt
-    echo > tmp-details.txt
+    echo > /tmp/tmp-summary.txt
+    echo > /tmp/tmp-details.txt
     for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |sort)
     do
         category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_accuracy.*//')"
         suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
         mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
-        dt="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
-        python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dt}" --csv_file "${csv}" > tmp-result.txt
-        test_result="$(sed 's/, /,/g' tmp-result.txt |awk '{
+        dtype="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
+        python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dtype}" --csv_file "${csv}" > "/tmp/tmp-${suite}-${mode}-${dtype}.txt"
+        test_result="$(sed 's/, /,/g' "/tmp/tmp-${suite}-${mode}-${dtype}.txt" |awk '{
             if($0 ~/Total/){
                 total = $3;
             }
@@ -57,22 +135,10 @@ if [ "${accuracy}" -gt 0 ];then
             printf(" %d | %d | %s | %d | %d | %d | %d | %d | %d\n",
                 total, passed, pass_rate, failed, xfail, timeout, new_passed, new_enabled, not_run);
         }')"
-        echo "| ${category} | ${test_result} |" >> tmp-summary.txt
-        sed -i '
-            s/Real failed models:/$${\\color{red}Real \\space failed \\space models}$$:/g;
-            s/Expected failed models:/$${\\color{blue}Expected \\space failed \\space models}$$:/g;
-            s/Warning timeout models:/$${\\color{orange}Warning \\space timeout \\space models}$$:/g;
-            s/Failed to passed models:/$${\\color{green}Failed \\space to \\space passed \\space models}$$:/g;
-        ' tmp-result.txt
-        {
-            echo "<table><thead><tr><th colspan=2>$(sed 's/=//g' tmp-result.txt |head -n 1)</th></tr></thead><tbody>"
-            sed "1d" tmp-result.txt |awk -F: '{printf("<tr><td>%s</td><td>%s</td></tr>\n", $1, $2)}'
-            echo -e "</tbody></table>\n"
-        } >> tmp-details.txt
+        echo "| ${category} | ${test_result} |" >> /tmp/tmp-summary.txt
     done
-    cat tmp-summary.txt
-    grep -v "<td> 0 \[\]</td>" tmp-details.txt
-    rm -rf tmp-*.txt
+    cat /tmp/tmp-summary.txt
+    get_model_result
 fi
 
 # Performance

diff --git a/.github/scripts/lintrunner.sh b/.github/scripts/lintrunner.sh
@@ -23,6 +23,9 @@ if ! command -v lintrunner &> /dev/null; then
     python3 -m pip install lintrunner==0.12.7
 fi
 
+# Ignoring errors in one specific run
+export SHELLCHECK_OPTS="-e SC2154"
+
 # This has already been cached in the docker image
 lintrunner init 2> /dev/null
 

diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
@@ -286,18 +286,13 @@ jobs:
           find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
           tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
           # Print summary
+          rm -rf /tmp/tmp-*.txt
           bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
-          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          if [ ${timeout_models} -ne 0 ];then
-            TIMEOUT_MODELS="$(
-              grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
-            )"
-            echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
-          fi
-          if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
-            exit 1
+          exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
+          if [ ${exit_label} -ne 0 ];then
+            grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
+            echo "There are ${exit_label} cases that need look into!!! Please check them"
+            exit ${exit_label}
           fi
       - name: Upload Inductor XPU E2E Data
         if: ${{ ! cancelled() }}

diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml
@@ -297,18 +297,13 @@ jobs:
           find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
           tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
           # Print summary
+          rm -rf /tmp/tmp-*.txt
           bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
-          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          if [ ${timeout_models} -ne 0 ];then
-            TIMEOUT_MODELS="$(
-              grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
-            )"
-            echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
-          fi
-          if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
-            exit 1
+          exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
+          if [ ${exit_label} -ne 0 ];then
+            grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
+            echo "There are ${exit_label} cases that need look into!!! Please check them"
+            exit ${exit_label}
           fi
       - name: Upload Inductor XPU E2E Data
         if: ${{ ! cancelled() }}

diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml
@@ -242,18 +242,13 @@ jobs:
           find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
           tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
           # Print summary
+          rm -rf /tmp/tmp-*.txt
           bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
-          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          if [ ${timeout_models} -ne 0 ];then
-            TIMEOUT_MODELS="$(
-              grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
-            )"
-            echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
-          fi
-          if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
-            exit 1
+          exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
+          if [ ${exit_label} -ne 0 ];then
+            grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
+            echo "There are ${exit_label} cases that need look into!!! Please check them"
+            exit ${exit_label}
           fi
       - name: Upload Inductor XPU E2E Data
         if: ${{ ! cancelled() }}

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -194,11 +194,13 @@ jobs:
           rm -rf ${{ github.workspace }}/upload_files
           cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
           # Print summary
+          rm -rf /tmp/tmp-*.txt
           bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
-          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
-          if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
-            exit 1
+          exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
+          if [ ${exit_label} -ne 0 ];then
+            grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
+            echo "There are ${exit_label} cases that need look into!!! Please check them"
+            exit ${exit_label}
           fi
       - name: Upload Inductor XPU E2E Data
         if: ${{ ! cancelled() }}