Skip to content

Commit

Permalink
Merge branch 'add_modeltest' of https://github.com/opea-project/GenAI…
Browse files Browse the repository at this point in the history
…Eval into add_modeltest
  • Loading branch information
VincyZhang committed May 15, 2024
2 parents 0454b21 + 9403582 commit 89084f5
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 17 deletions.
15 changes: 11 additions & 4 deletions .github/workflows/model_test_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
name: Model Test on CPU

on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- .github/workflows/model_test_cpu.yml
- GenAIEval/**
- setup.py
workflow_dispatch:

# If there is a new commit, the previous jobs will be canceled
Expand Down Expand Up @@ -45,7 +52,7 @@ jobs:
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*

- name: Load environment variables
run:
cat ~/actions-runner4/.env >> $GITHUB_ENV
Expand Down Expand Up @@ -80,7 +87,7 @@ jobs:
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval/.github/workflows/scripts/models \
&& bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
&& bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
- name: Collect Log
run: |
Expand All @@ -101,7 +108,7 @@ jobs:
${{ github.workspace }}/summary.log
if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
retention-days: 60 # 1 <= retention-days <= 90

Genreate-Report:
runs-on: ubuntu-latest
needs: [Evaluation-Workflow]
Expand Down Expand Up @@ -143,7 +150,7 @@ jobs:
echo "------ Generating final report.html ------"
cd ${{ env.OUT_SCRIPT_PATH }}
mkdir -p generated
/usr/bin/bash generate_report.sh
/usr/bin/bash -x generate_report.sh
env:
RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
BUILD_NUMBER: ${{ github.run_id }}
Expand Down
13 changes: 10 additions & 3 deletions .github/workflows/model_test_hpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
name: Model Test on HPU

on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- .github/workflows/model_test_hpu.yml
- GenAIEval/**
- setup.py
workflow_dispatch:

# If there is a new commit, the previous jobs will be canceled
Expand Down Expand Up @@ -76,7 +83,7 @@ jobs:
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval/.github/workflows/scripts/models \
&& bash model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
&& bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
- name: Collect Log
run: |
Expand All @@ -97,7 +104,7 @@ jobs:
${{ github.workspace }}/summary.log
if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
retention-days: 60 # 1 <= retention-days <= 90

Genreate-Report:
runs-on: ubuntu-latest
needs: [Evaluation-Workflow]
Expand Down Expand Up @@ -139,7 +146,7 @@ jobs:
echo "------ Generating final report.html ------"
cd ${{ env.OUT_SCRIPT_PATH }}
mkdir -p generated
/usr/bin/bash generate_report.sh
/usr/bin/bash -x generate_report.sh
env:
RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
BUILD_NUMBER: ${{ github.run_id }}
Expand Down
32 changes: 23 additions & 9 deletions .github/workflows/scripts/models/generate_report.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
#!/bin/bash
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -x
WORKSPACE=generated
last_log_path=FinalReport
Expand All @@ -18,7 +32,7 @@ function main {
}

function preprocessing {
for file_path in ./*
for file_path in log/*
do
if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then
cat ${file_path}/summary.log >> ${summaryLog}
Expand Down Expand Up @@ -64,19 +78,19 @@ function generate_results {
</tr>
eof

devices=$(cat ${summaryLog} | cut -d',' -f1 | awk '!a[$0]++')
devices=$(cat ${summaryLog} | cut -d';' -f1 | awk '!a[$0]++')
for device in ${devices[@]}; do
models=$(cat ${summaryLog} | grep "${device}," | cut -d',' -f2 | awk '!a[$0]++')
models=$(cat ${summaryLog} | grep "${device};" | cut -d';' -f2 | awk '!a[$0]++')
for model in ${models[@]}; do
tasks=$(cat ${summaryLog} | grep "${device},${model}," | cut -d',' -f3 | awk '!a[$0]++')
tasks=$(cat ${summaryLog} | grep "${device};${model};" | cut -d';' -f3 | awk '!a[$0]++')
for task in ${tasks[@]}; do
datasets=$(cat ${summaryLog} | grep "${device},${model},${task}," | cut -d',' -f4 | awk '!a[$0]++')
datasets=$(cat ${summaryLog} | grep "${device};${model};${task};" | cut -d';' -f4 | awk '!a[$0]++')
for dataset in ${datasets[@]}; do
benchmark_pattern="${device},${model},${task},${dataset},"
acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++')
benchmark_pattern="${device};${model};${task};${dataset};"
acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
acc_last=nan
if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then
acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++')
acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
fi
generate_core
done
Expand Down Expand Up @@ -129,7 +143,7 @@ function generate_core {
show_benchmark(acc)
// Last
printf("</tr>\n<tr><td>Last</td>")
show_benchmark(acc_l)
show_benchmark(acc_l)
// current vs last
printf("</tr>\n<tr><td>New/Last</td>");
compare_new_last(acc,acc_l)
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scripts/models/model_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ function run_benchmark() {
--tasks ${datasets} \
--device ${device} \
--batch_size 112 2>&1 | tee ${overall_log}

echo "print log content:"
cat ${overall_log}
status=$?
Expand Down

0 comments on commit 89084f5

Please sign in to comment.