From f16742c846d34aa9da3e955b5600fcd0c8609726 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 17 Jun 2024 16:53:34 +0800 Subject: [PATCH 01/17] fix coverage Signed-off-by: Sun, Xuehao --- .github/workflows/scripts/unittest/calc_coverage.sh | 2 +- .github/workflows/scripts/unittest/unittest.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scripts/unittest/calc_coverage.sh b/.github/workflows/scripts/unittest/calc_coverage.sh index 16cd09e9..947f7810 100644 --- a/.github/workflows/scripts/unittest/calc_coverage.sh +++ b/.github/workflows/scripts/unittest/calc_coverage.sh @@ -3,7 +3,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -source ../../change_color +source ../change_color LOG_DIR=$1 coverage_compare="${LOG_DIR}/coverage_compare.html" coverage_log_pr="${LOG_DIR}/UnitTestPR-test/coverage_pr" diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index fab9145f..d539cc9c 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -28,7 +28,7 @@ function pytest() { export GLOG_minloglevel=2 genaieval_path=$(python -c 'import GenAIEval; import os; print(os.path.dirname(GenAIEval.__file__))') - find . -name "test*.py" | sed 's,\.\/,coverage run --source='"${genaieval_path}"' --append ,g' | sed 's/$/ --verbose/' >run.sh + find . -name "test*.py" | sed "s,\.\/,coverage run --source=\"${genaieval_path}\" --append ,g" | sed 's/$/ --verbose/' >run.sh coverage erase # run UT From 5fd821709b607b7dc4eada5cf379fdb9f63079bb Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 17 Jun 2024 16:55:25 +0800 Subject: [PATCH 02/17] fix CI paths Signed-off-by: Sun, Xuehao --- .github/workflows/unittest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 354fc2f1..e9e329e7 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -13,6 +13,7 @@ on: - GenAIEval/** - setup.py - tests/** + - .github/workflows/scripts/unittest/** workflow_dispatch: # If there is a new commit, the previous jobs will be canceled From ab56cbc4f76db1b0e43401ff0b164361a044d30d Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 17 Jun 2024 17:02:59 +0800 Subject: [PATCH 03/17] fix package name Signed-off-by: Sun, Xuehao --- .github/workflows/scripts/unittest/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index d539cc9c..f8ca3f2f 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -27,7 +27,7 @@ function pytest() { ut_log_name="${LOG_DIR}/unit_test_$1.log" export GLOG_minloglevel=2 - genaieval_path=$(python -c 'import GenAIEval; import os; print(os.path.dirname(GenAIEval.__file__))') + genaieval_path=$(python -c 'import opea-eval; import os; print(os.path.dirname(opea-eval.__file__))') find . -name "test*.py" | sed "s,\.\/,coverage run --source=\"${genaieval_path}\" --append ,g" | sed 's/$/ --verbose/' >run.sh coverage erase From fcaabd6e8cf9ec634c987ce7f5c085164a1d77d4 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 17 Jun 2024 18:53:48 +0800 Subject: [PATCH 04/17] fix name Signed-off-by: Sun, Xuehao --- .github/workflows/scripts/unittest/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index f8ca3f2f..05e42c88 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -27,7 +27,7 @@ function pytest() { ut_log_name="${LOG_DIR}/unit_test_$1.log" export GLOG_minloglevel=2 - genaieval_path=$(python -c 'import opea-eval; import os; print(os.path.dirname(opea-eval.__file__))') + genaieval_path=$(python -c 'import evals; import os; print(os.path.dirname(evals.__file__))') find . -name "test*.py" | sed "s,\.\/,coverage run --source=\"${genaieval_path}\" --append ,g" | sed 's/$/ --verbose/' >run.sh coverage erase From 888f450288ea4b80aa1058bed379a1417dfb3d1a Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 17 Jun 2024 19:35:42 +0800 Subject: [PATCH 05/17] fix Signed-off-by: Sun, Xuehao --- tests/test_lm_eval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py index 552f33ed..5a88c21e 100644 --- a/tests/test_lm_eval.py +++ b/tests/test_lm_eval.py @@ -25,6 +25,7 @@ def test_lm_eval(self): device="cpu", batch_size=1, limit=5, + trust_remote_code=True, ) results = evaluate(args) self.assertEqual(results["results"]["piqa"]["acc,none"], 0.6) From 854672be7273c43e724ff6fcf3a860e3a76486e1 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Tue, 18 Jun 2024 10:49:14 +0800 Subject: [PATCH 06/17] Update test_lm_eval.py --- tests/test_lm_eval.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py index 5a88c21e..207ceeb0 100644 --- a/tests/test_lm_eval.py +++ b/tests/test_lm_eval.py @@ -24,8 +24,7 @@ def test_lm_eval(self): tasks="piqa", device="cpu", batch_size=1, - limit=5, - trust_remote_code=True, + limit=5 ) results = evaluate(args) self.assertEqual(results["results"]["piqa"]["acc,none"], 0.6) From caa28cd76e62d32b4d7bd8403a1a3271e2c6e2b3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 02:49:21 +0000 Subject: [PATCH 07/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_lm_eval.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py index 207ceeb0..63e0b7ce 100644 --- a/tests/test_lm_eval.py +++ b/tests/test_lm_eval.py @@ -18,13 +18,7 @@ def test_lm_eval(self): user_model = AutoModelForCausalLM.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) args = LMEvalParser( - model="hf", - user_model=user_model, - tokenizer=tokenizer, - tasks="piqa", - device="cpu", - batch_size=1, - limit=5 + model="hf", user_model=user_model, tokenizer=tokenizer, tasks="piqa", device="cpu", batch_size=1, limit=5 ) results = evaluate(args) self.assertEqual(results["results"]["piqa"]["acc,none"], 0.6) From 83d905dbe02685855d8cc938fa1e9fe6c3b91410 Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 17 Jun 2024 23:18:16 -0700 Subject: [PATCH 08/17] adapte datasets 2.2.0 change Signed-off-by: changwangss --- evals/evaluation/lm_evaluation_harness/accuracy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/evals/evaluation/lm_evaluation_harness/accuracy.py b/evals/evaluation/lm_evaluation_harness/accuracy.py index 4c862524..376996f2 100644 --- a/evals/evaluation/lm_evaluation_harness/accuracy.py +++ b/evals/evaluation/lm_evaluation_harness/accuracy.py @@ -41,6 +41,7 @@ def cli_evaluate(args) -> None: eval_logger.setLevel(getattr(logging, f"{args.verbosity}")) eval_logger.info(f"Verbosity set to {args.verbosity}") os.environ["TOKENIZERS_PARALLELISM"] = "false" + os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = "true" if args.predict_only: args.log_samples = True From 634537a608708e0fed111d532c6da896f09218bc Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 17 Jun 2024 23:34:42 -0700 Subject: [PATCH 09/17] set the env default to 1 Signed-off-by: changwangss --- evals/evaluation/lm_evaluation_harness/accuracy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/evaluation/lm_evaluation_harness/accuracy.py b/evals/evaluation/lm_evaluation_harness/accuracy.py index 376996f2..11995074 100644 --- a/evals/evaluation/lm_evaluation_harness/accuracy.py +++ b/evals/evaluation/lm_evaluation_harness/accuracy.py @@ -41,7 +41,7 @@ def cli_evaluate(args) -> None: eval_logger.setLevel(getattr(logging, f"{args.verbosity}")) eval_logger.info(f"Verbosity set to {args.verbosity}") os.environ["TOKENIZERS_PARALLELISM"] = "false" - os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = "true" + os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = "1" if args.predict_only: args.log_samples = True From b052f7e0fa8466f1854c09143b5948f582da1da9 Mon Sep 17 00:00:00 2001 From: changwangss Date: Tue, 18 Jun 2024 01:48:41 -0700 Subject: [PATCH 10/17] fix trust_remote_code issur Signed-off-by: changwangss --- evals/evaluation/lm_evaluation_harness/accuracy.py | 1 - .../lm_eval/models/huggingface.py | 2 +- tests/test_lm_eval.py | 11 +++++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/evals/evaluation/lm_evaluation_harness/accuracy.py b/evals/evaluation/lm_evaluation_harness/accuracy.py index 11995074..4c862524 100644 --- a/evals/evaluation/lm_evaluation_harness/accuracy.py +++ b/evals/evaluation/lm_evaluation_harness/accuracy.py @@ -41,7 +41,6 @@ def cli_evaluate(args) -> None: eval_logger.setLevel(getattr(logging, f"{args.verbosity}")) eval_logger.info(f"Verbosity set to {args.verbosity}") os.environ["TOKENIZERS_PARALLELISM"] = "false" - os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = "1" if args.predict_only: args.log_samples = True diff --git a/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py index 1631672c..0d8c595b 100644 --- a/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py +++ b/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py @@ -88,7 +88,7 @@ def __init__( dtype: Optional[Union[str, torch.dtype]] = "auto", batch_size: Optional[Union[int, str]] = 1, max_batch_size: Optional[int] = 64, - trust_remote_code: Optional[bool] = False, + trust_remote_code: Optional[bool] = True, use_fast_tokenizer: Optional[bool] = True, add_bos_token: Optional[bool] = False, prefix_token_id: Optional[int] = None, diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py index 63e0b7ce..4fb23e70 100644 --- a/tests/test_lm_eval.py +++ b/tests/test_lm_eval.py @@ -18,10 +18,17 @@ def test_lm_eval(self): user_model = AutoModelForCausalLM.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) args = LMEvalParser( - model="hf", user_model=user_model, tokenizer=tokenizer, tasks="piqa", device="cpu", batch_size=1, limit=5 + model="hf", + user_model=user_model, + tokenizer=tokenizer, + tasks="lambada_openai", + device="cpu", + batch_size=1, + limit=5, + trust_remote_code=True, ) results = evaluate(args) - self.assertEqual(results["results"]["piqa"]["acc,none"], 0.6) + self.assertEqual(results["results"]["lambada_openai"]["acc,none"], 0.6) if __name__ == "__main__": From 50f86f63745482ad4b8df9e1eab2b4ddc1e4a4d5 Mon Sep 17 00:00:00 2001 From: changwangss Date: Tue, 18 Jun 2024 02:00:12 -0700 Subject: [PATCH 11/17] fix piqa to lambda_openai Signed-off-by: changwangss --- .github/workflows/model_test_cpu.yml | 2 +- .github/workflows/model_test_hpu.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml index 3fe43edf..ce43a9e3 100644 --- a/.github/workflows/model_test_cpu.yml +++ b/.github/workflows/model_test_cpu.yml @@ -34,7 +34,7 @@ jobs: matrix: include: - modelName: "opt-125m" - datasets: "piqa" + datasets: "lambada_openai" device: "cpu" tasks: "text-generation" fail-fast: true diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml index 4914a68d..613372ef 100644 --- a/.github/workflows/model_test_hpu.yml +++ b/.github/workflows/model_test_hpu.yml @@ -34,7 +34,7 @@ jobs: matrix: include: - modelName: "opt-125m" - datasets: "piqa" + datasets: "lambada_openai" device: "hpu" tasks: "text-generation" fail-fast: true From 762643e1962dc8146084225d528bdf764301a128 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Tue, 18 Jun 2024 19:48:18 +0800 Subject: [PATCH 12/17] test Signed-off-by: Sun, Xuehao --- .github/workflows/scripts/unittest/unittest.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index 05e42c88..fb8c1685 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -27,7 +27,7 @@ function pytest() { ut_log_name="${LOG_DIR}/unit_test_$1.log" export GLOG_minloglevel=2 - genaieval_path=$(python -c 'import evals; import os; print(os.path.dirname(evals.__file__))') + genaieval_path=$(python3 -c 'import evals; import os; print(os.path.dirname(evals.__file__))') find . -name "test*.py" | sed "s,\.\/,coverage run --source=\"${genaieval_path}\" --append ,g" | sed 's/$/ --verbose/' >run.sh coverage erase @@ -37,6 +37,11 @@ function pytest() { $BOLD_YELLOW && echo "------UT start-------" && $RESET bash run.sh 2>&1 | tee -a ${ut_log_name} $BOLD_YELLOW && echo "------UT end -------" && $RESET + which pip + which pip3 + which python + which python3 + python3 -c 'import evals; import os; print(os.path.dirname(evals.__file__))' # run coverage report coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log_dir}/coverage.log From b3163675dfed0dd4ea11810d3791309fda2dac1b Mon Sep 17 00:00:00 2001 From: chensuyue Date: Mon, 24 Jun 2024 10:22:40 +0800 Subject: [PATCH 13/17] fix python path --- .github/workflows/scripts/unittest/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index fb8c1685..2eb79d56 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -27,7 +27,7 @@ function pytest() { ut_log_name="${LOG_DIR}/unit_test_$1.log" export GLOG_minloglevel=2 - genaieval_path=$(python3 -c 'import evals; import os; print(os.path.dirname(evals.__file__))') + genaieval_path=$(python3 -c 'import evals; print(evals.__path__[0])') find . -name "test*.py" | sed "s,\.\/,coverage run --source=\"${genaieval_path}\" --append ,g" | sed 's/$/ --verbose/' >run.sh coverage erase From fea4b67820123e8836f39663ff5dadaf99fdb5b9 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Mon, 24 Jun 2024 10:24:07 +0800 Subject: [PATCH 14/17] remove test code --- .github/workflows/scripts/unittest/unittest.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index 2eb79d56..e1076565 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -37,11 +37,6 @@ function pytest() { $BOLD_YELLOW && echo "------UT start-------" && $RESET bash run.sh 2>&1 | tee -a ${ut_log_name} $BOLD_YELLOW && echo "------UT end -------" && $RESET - which pip - which pip3 - which python - which python3 - python3 -c 'import evals; import os; print(os.path.dirname(evals.__file__))' # run coverage report coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log_dir}/coverage.log From 6a4d0619e286e0a3ced376a50f3aaaeca966fe9d Mon Sep 17 00:00:00 2001 From: chensuyue Date: Mon, 24 Jun 2024 12:20:09 +0800 Subject: [PATCH 15/17] update evals installation --- .github/workflows/scripts/install_evals.sh | 9 +++++++++ .github/workflows/unittest.yml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/scripts/install_evals.sh diff --git a/.github/workflows/scripts/install_evals.sh b/.github/workflows/scripts/install_evals.sh new file mode 100644 index 00000000..f4eee178 --- /dev/null +++ b/.github/workflows/scripts/install_evals.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo -e "\n Install GenAIEval ... " +cd /GenAIEval +python -m pip install --no-cache-dir -r requirements.txt +python setup.py bdist_wheel +pip install dist/opea_eval*.whl + +pip list \ No newline at end of file diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index e9e329e7..ca76f308 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -62,7 +62,7 @@ jobs: - name: Install Dependencies run: | - docker exec ${{ env.CONTAINER_NAME }} bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install" + docker exec ${{ env.CONTAINER_NAME }} bash -c "bash /GenAIEval/.github/workflows/scripts/install_evals.sh" - name: Run UT run: | From 2a013d47f21e8efb57ad8448c4cffdcd9408dfef Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 04:20:22 +0000 Subject: [PATCH 16/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/workflows/scripts/install_evals.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/scripts/install_evals.sh b/.github/workflows/scripts/install_evals.sh index f4eee178..56c0eb43 100644 --- a/.github/workflows/scripts/install_evals.sh +++ b/.github/workflows/scripts/install_evals.sh @@ -1,9 +1,12 @@ #!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + echo -e "\n Install GenAIEval ... " cd /GenAIEval python -m pip install --no-cache-dir -r requirements.txt python setup.py bdist_wheel pip install dist/opea_eval*.whl -pip list \ No newline at end of file +pip list From 06e8cfd7e8660aceeba14405eb4c3cc28d6bf375 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 24 Jun 2024 13:06:40 +0800 Subject: [PATCH 17/17] fix path Signed-off-by: Sun, Xuehao --- .github/workflows/scripts/unittest/calc_coverage.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scripts/unittest/calc_coverage.sh b/.github/workflows/scripts/unittest/calc_coverage.sh index 947f7810..e2e1cc23 100644 --- a/.github/workflows/scripts/unittest/calc_coverage.sh +++ b/.github/workflows/scripts/unittest/calc_coverage.sh @@ -6,8 +6,8 @@ source ../change_color LOG_DIR=$1 coverage_compare="${LOG_DIR}/coverage_compare.html" -coverage_log_pr="${LOG_DIR}/UnitTestPR-test/coverage_pr" -coverage_log_base="${LOG_DIR}/UnitTestbaseline/coverage_base" +coverage_log_pr="${LOG_DIR}/UnitTestPR-test/pr" +coverage_log_base="${LOG_DIR}/UnitTestbaseline/base" function get_coverage_data() { # Input argument