Merge branch 'add_modeltest' of https://github.com/opea-project/GenAI…

…Eval into add_modeltest
opea-project · May 15, 2024 · 89084f5 · 89084f5
2 parents 0454b21 + 9403582
commit 89084f5
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 17 deletions.
diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
@@ -15,6 +15,13 @@
 name: Model Test on CPU
 
 on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - .github/workflows/model_test_cpu.yml
+      - GenAIEval/**
+      - setup.py
   workflow_dispatch:
 
 # If there is a new commit, the previous jobs will be canceled
@@ -45,7 +52,7 @@ jobs:
     steps:
       - name: Clean Up Working Directory
         run: sudo rm -rf ${{github.workspace}}/*
-      
+
       - name: Load environment variables
         run:
           cat ~/actions-runner4/.env >> $GITHUB_ENV
@@ -80,7 +87,7 @@ jobs:
         run: |
             docker exec ${{ env.CONTAINER_NAME }} \
             bash -c "cd /GenAIEval/.github/workflows/scripts/models \
-            && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
+            && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
 
       - name: Collect Log
         run: |
@@ -101,7 +108,7 @@ jobs:
             ${{ github.workspace }}/summary.log
           if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
           retention-days: 60 # 1 <= retention-days <= 90
-  
+
   Genreate-Report:
     runs-on: ubuntu-latest
     needs: [Evaluation-Workflow]
@@ -143,7 +150,7 @@ jobs:
           echo "------ Generating final report.html ------"
           cd ${{ env.OUT_SCRIPT_PATH }}
           mkdir -p generated
-          /usr/bin/bash generate_report.sh
+          /usr/bin/bash -x generate_report.sh
         env:
           RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
           BUILD_NUMBER: ${{ github.run_id }}

diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml
@@ -15,6 +15,13 @@
 name: Model Test on HPU
 
 on:
+  pull_request:
+      branches: [main]
+      types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+      paths:
+        - .github/workflows/model_test_hpu.yml
+        - GenAIEval/**
+        - setup.py
   workflow_dispatch:
 
 # If there is a new commit, the previous jobs will be canceled
@@ -76,7 +83,7 @@ jobs:
         run: |
             docker exec ${{ env.CONTAINER_NAME }} \
             bash -c "cd /GenAIEval/.github/workflows/scripts/models \
-            && bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
+            && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
 
       - name: Collect Log
         run: |
@@ -97,7 +104,7 @@ jobs:
             ${{ github.workspace }}/summary.log
           if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
           retention-days: 60 # 1 <= retention-days <= 90
-  
+
   Genreate-Report:
     runs-on: ubuntu-latest
     needs: [Evaluation-Workflow]
@@ -139,7 +146,7 @@ jobs:
           echo "------ Generating final report.html ------"
           cd ${{ env.OUT_SCRIPT_PATH }}
           mkdir -p generated
-          /usr/bin/bash generate_report.sh
+          /usr/bin/bash -x generate_report.sh
         env:
           RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
           BUILD_NUMBER: ${{ github.run_id }}

diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
@@ -1,4 +1,18 @@
 #!/bin/bash
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -x
 WORKSPACE=generated
 last_log_path=FinalReport
@@ -18,7 +32,7 @@ function main {
 }
 
 function preprocessing {
-    for file_path in ./*
+    for file_path in log/*
     do
         if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then
             cat ${file_path}/summary.log >> ${summaryLog}
@@ -64,19 +78,19 @@ function generate_results {
         </tr>
 eof
 
-    devices=$(cat ${summaryLog} | cut -d',' -f1 | awk '!a[$0]++')
+    devices=$(cat ${summaryLog} | cut -d';' -f1 | awk '!a[$0]++')
     for device in ${devices[@]}; do
-        models=$(cat ${summaryLog} | grep "${device}," | cut -d',' -f2 | awk '!a[$0]++')
+        models=$(cat ${summaryLog} | grep "${device};" | cut -d';' -f2 | awk '!a[$0]++')
         for model in ${models[@]}; do
-            tasks=$(cat ${summaryLog} | grep "${device},${model}," | cut -d',' -f3 | awk '!a[$0]++')
+            tasks=$(cat ${summaryLog} | grep "${device};${model};" | cut -d';' -f3 | awk '!a[$0]++')
             for task in ${tasks[@]}; do
-                datasets=$(cat ${summaryLog} | grep "${device},${model},${task}," | cut -d',' -f4 | awk '!a[$0]++')
+                datasets=$(cat ${summaryLog} | grep "${device};${model};${task};" | cut -d';' -f4 | awk '!a[$0]++')
                 for dataset in ${datasets[@]}; do
-                    benchmark_pattern="${device},${model},${task},${dataset},"
-                    acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++')
+                    benchmark_pattern="${device};${model};${task};${dataset};"
+                    acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
                     acc_last=nan
                     if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then
-                        acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++')
+                        acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
                     fi
                     generate_core
                 done
@@ -129,7 +143,7 @@ function generate_core {
             show_benchmark(acc)
             // Last
             printf("</tr>\n<tr><td>Last</td>")
-            show_benchmark(acc_l)          
+            show_benchmark(acc_l)
             // current vs last
             printf("</tr>\n<tr><td>New/Last</td>");
             compare_new_last(acc,acc_l)

diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
@@ -87,7 +87,7 @@ function run_benchmark() {
         --tasks ${datasets} \
         --device ${device} \
         --batch_size 112  2>&1 | tee ${overall_log}
-    
+
     echo "print log content:"
     cat ${overall_log}
     status=$?