From 226e7fd171f00a604becb1cb62aa8d08b472ab06 Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Fri, 31 May 2024 14:06:40 +0800
Subject: [PATCH] Rename GenAIEval and Docker folder and set version, binary
 name (#22)

Signed-off-by: changwangss <chang1.wang@intel.com>
Co-authored-by: chensuyue <suyue.chen@intel.com>
---
 .github/workflows/model_test_cpu.yml                 | 10 ++--------
 .github/workflows/model_test_hpu.yml                 |  2 +-
 .github/workflows/scripts/models/generate_report.sh  |  4 ++--
 .github/workflows/scripts/models/model_test.sh       |  4 ++--
 .../workflows/scripts/unittest/compare_coverage.sh   |  2 +-
 .github/workflows/unittest.yml                       |  6 +-----
 README.md                                            | 12 ++++++------
 {Docker => docker}/hpu.dockerfile                    |  0
 {GenAIEval => evals}/__init__.py                     |  0
 {GenAIEval => evals}/benchmark/__init__.py           |  0
 {GenAIEval => evals}/benchmark/chatqna_benchmark.py  |  0
 {GenAIEval => evals}/benchmark/data.json             |  0
 {GenAIEval => evals}/evaluation/__init__.py          |  0
 .../bigcode_evaluation_harness/__init__.py           |  0
 .../bigcode_evaluation_harness/accuracy.py           |  0
 .../bigcode_evaluation_harness/arguments.py          |  0
 .../bigcode_evaluation_harness/examples/main.py      |  2 +-
 .../evaluation/lm_evaluation_harness/__init__.py     |  0
 .../evaluation/lm_evaluation_harness/accuracy.py     |  0
 .../evaluation/lm_evaluation_harness/arguments.py    |  0
 .../lm_evaluation_harness/examples/main.py           |  2 +-
 .../lm_evaluation_harness/lm_eval/__init__.py        |  0
 .../lm_evaluation_harness/lm_eval/evaluator.py       |  0
 .../lm_evaluation_harness/lm_eval/models/__init__.py |  0
 .../lm_eval/models/huggingface.py                    |  0
 setup.py                                             |  4 ++--
 tests/requirements.txt                               |  1 +
 tests/test_bigcode_eval.py                           |  2 +-
 tests/test_lm_eval.py                                |  2 +-
 29 files changed, 22 insertions(+), 31 deletions(-)
 rename {Docker => docker}/hpu.dockerfile (100%)
 rename {GenAIEval => evals}/__init__.py (100%)
 rename {GenAIEval => evals}/benchmark/__init__.py (100%)
 rename {GenAIEval => evals}/benchmark/chatqna_benchmark.py (100%)
 rename {GenAIEval => evals}/benchmark/data.json (100%)
 rename {GenAIEval => evals}/evaluation/__init__.py (100%)
 rename {GenAIEval => evals}/evaluation/bigcode_evaluation_harness/__init__.py (100%)
 rename {GenAIEval => evals}/evaluation/bigcode_evaluation_harness/accuracy.py (100%)
 rename {GenAIEval => evals}/evaluation/bigcode_evaluation_harness/arguments.py (100%)
 rename {GenAIEval => evals}/evaluation/bigcode_evaluation_harness/examples/main.py (90%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/__init__.py (100%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/accuracy.py (100%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/arguments.py (100%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/examples/main.py (90%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/lm_eval/__init__.py (100%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/lm_eval/evaluator.py (100%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py (100%)
 rename {GenAIEval => evals}/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py (100%)

diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
index ed70411d..498ded08 100644
--- a/.github/workflows/model_test_cpu.yml
+++ b/.github/workflows/model_test_cpu.yml
@@ -53,10 +53,6 @@ jobs:
       - name: Clean Up Working Directory
         run: sudo rm -rf ${{github.workspace}}/*
 
-      - name: Load environment variables
-        run:
-          cat ~/actions-runner4/.env >> $GITHUB_ENV
-
       - name: Checkout out Repo
         uses: actions/checkout@v4
         with:
@@ -65,7 +61,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
@@ -74,9 +70,7 @@ jobs:
             docker rm -vf ${{ env.CONTAINER_NAME }} || true
           fi
           docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
-          -v ${{ github.workspace }}:/GenAIEval \
-          -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \
-          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
+          -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
       - name: Binary build
         run: |
diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml
index 1e6f2316..4a99de9c 100644
--- a/.github/workflows/model_test_hpu.yml
+++ b/.github/workflows/model_test_hpu.yml
@@ -61,7 +61,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/Docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
index 4db273f5..8d5f2c71 100644
--- a/.github/workflows/scripts/models/generate_report.sh
+++ b/.github/workflows/scripts/models/generate_report.sh
@@ -48,7 +48,7 @@ function generate_html_overview {
 
 <body>
     <div id="main">
-        <h1 align="center">ITREX Tests
+        <h1 align="center">GenAIEval Tests
         [ <a href="${RUN_DISPLAY_URL}">Job-${BUILD_NUMBER}</a> ]</h1>
       <h1 align="center">Test Status: ${JOB_STATUS}</h1>
         <h2>Summary</h2>
@@ -58,7 +58,7 @@ function generate_html_overview {
               ${Test_Info_Title}
               </tr>
               <tr>
-                    <td><a href="https://github.com/intel/intel-extension-for-transformers">ITREX</a></td>
+                    <td><a href="https://github.com/opea-project/GenAIEval">GenAIEval</a></td>
               ${Test_Info}
                 </tr>
         </table>
diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
index 7d460ac2..92e55090 100644
--- a/.github/workflows/scripts/models/model_test.sh
+++ b/.github/workflows/scripts/models/model_test.sh
@@ -39,9 +39,9 @@ working_dir=""
 main() {
     case ${tasks} in
         "text-generation")
-            working_dir="/GenAIEval/GenAIEval/evaluation/lm_evaluation_harness/examples";;
+            working_dir="/GenAIEval/evals/evaluation/lm_evaluation_harness/examples";;
         "code-generation")
-            working_dir="/GenAIEval/GenAIEval/evaluation/bigcode_evaluation_harness/examples";;
+            working_dir="/GenAIEval/evals/evaluation/bigcode_evaluation_harness/examples";;
         *)
             echo "Not suppotted task"; exit 1;;
     esac
diff --git a/.github/workflows/scripts/unittest/compare_coverage.sh b/.github/workflows/scripts/unittest/compare_coverage.sh
index 88a4e1e5..55b75f44 100644
--- a/.github/workflows/scripts/unittest/compare_coverage.sh
+++ b/.github/workflows/scripts/unittest/compare_coverage.sh
@@ -20,7 +20,7 @@ coverage_PR_lines_rate=$5
 coverage_base_lines_rate=$6
 coverage_PR_branches_rate=$7
 coverage_base_branches_rate=$8
-module_name="GenAIEval"
+module_name="evals"
 [[ ! -f $coverage_pr_log ]] && exit 1
 [[ ! -f $coverage_base_log ]] && exit 1
 file_name="./coverage_compare"
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 4c3807f6..cc4a2712 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -49,9 +49,6 @@ jobs:
     steps:
         - name: Clean Up Working Directory
           run: sudo rm -rf ${{github.workspace}}/*
-        - name: Load environment variables
-          run:
-            cat ~/actions-runner4/.env >> $GITHUB_ENV
         - name: Checkout out Repo
           uses: actions/checkout@v4
           with:
@@ -61,7 +58,7 @@ jobs:
 
         - name: Docker Build
           run: |
-            docker build --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+            docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
         - name: Docker Run
           run: |
@@ -70,7 +67,6 @@ jobs:
               docker rm -vf ${{ env.CONTAINER_NAME }} || true
             fi
             docker run -dit --memory="4g" --memory-reservation="1g" --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} --shm-size="1g" \
-            -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" \
             -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
         - name: Install Dependencies
diff --git a/README.md b/README.md
index d4b4e2a3..23838d92 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ For evaluating the models on text-generation tasks, we follow the [lm-evaluation
 ```shell
 
 # pip install --upgrade-strategy eager optimum[habana]
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model gaudi-hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -29,7 +29,7 @@ python main.py \
 ##### CPU
 ```shell
 
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -39,7 +39,7 @@ python main.py \
 ```
 #### function call usage
 ```python
-from GenAIEval.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
+from evals.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
 
 args = LMevalParser(
     model="hf",
@@ -69,7 +69,7 @@ docker run -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Inte
 - set `base_url`, `tokenizer` and `--model genai-hf`
 
 ```
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 
 python main.py \
     --model genai-hf \
@@ -83,7 +83,7 @@ For evaluating the models on coding tasks or specifically coding LLMs, we follow
 #### command line usage
 
 ```shell
-cd GenAIEval/evaluation/bigcode_evaluation_harness/examples
+cd evals/evaluation/bigcode_evaluation_harness/examples
 python main.py \
     --model "codeparrot/codeparrot-small" \
     --tasks "humaneval" \
@@ -93,7 +93,7 @@ python main.py \
 ```
 #### function call usage
 ```python
-from GenAIEval.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
+from evals.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
 
 args = BigcodeEvalParser(
     user_model=user_model,
diff --git a/Docker/hpu.dockerfile b/docker/hpu.dockerfile
similarity index 100%
rename from Docker/hpu.dockerfile
rename to docker/hpu.dockerfile
diff --git a/GenAIEval/__init__.py b/evals/__init__.py
similarity index 100%
rename from GenAIEval/__init__.py
rename to evals/__init__.py
diff --git a/GenAIEval/benchmark/__init__.py b/evals/benchmark/__init__.py
similarity index 100%
rename from GenAIEval/benchmark/__init__.py
rename to evals/benchmark/__init__.py
diff --git a/GenAIEval/benchmark/chatqna_benchmark.py b/evals/benchmark/chatqna_benchmark.py
similarity index 100%
rename from GenAIEval/benchmark/chatqna_benchmark.py
rename to evals/benchmark/chatqna_benchmark.py
diff --git a/GenAIEval/benchmark/data.json b/evals/benchmark/data.json
similarity index 100%
rename from GenAIEval/benchmark/data.json
rename to evals/benchmark/data.json
diff --git a/GenAIEval/evaluation/__init__.py b/evals/evaluation/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/__init__.py
rename to evals/evaluation/__init__.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/__init__.py b/evals/evaluation/bigcode_evaluation_harness/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/__init__.py
rename to evals/evaluation/bigcode_evaluation_harness/__init__.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/accuracy.py b/evals/evaluation/bigcode_evaluation_harness/accuracy.py
similarity index 100%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/accuracy.py
rename to evals/evaluation/bigcode_evaluation_harness/accuracy.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/arguments.py b/evals/evaluation/bigcode_evaluation_harness/arguments.py
similarity index 100%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/arguments.py
rename to evals/evaluation/bigcode_evaluation_harness/arguments.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/examples/main.py b/evals/evaluation/bigcode_evaluation_harness/examples/main.py
similarity index 90%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/examples/main.py
rename to evals/evaluation/bigcode_evaluation_harness/examples/main.py
index 1b998c04..bef7f494 100644
--- a/GenAIEval/evaluation/bigcode_evaluation_harness/examples/main.py
+++ b/evals/evaluation/bigcode_evaluation_harness/examples/main.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from GenAIEval.evaluation.bigcode_evaluation_harness import evaluate, setup_parser
+from evals.evaluation.bigcode_evaluation_harness import evaluate, setup_parser
 
 
 def main():
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/__init__.py b/evals/evaluation/lm_evaluation_harness/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/__init__.py
rename to evals/evaluation/lm_evaluation_harness/__init__.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/accuracy.py b/evals/evaluation/lm_evaluation_harness/accuracy.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/accuracy.py
rename to evals/evaluation/lm_evaluation_harness/accuracy.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/arguments.py b/evals/evaluation/lm_evaluation_harness/arguments.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/arguments.py
rename to evals/evaluation/lm_evaluation_harness/arguments.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/examples/main.py b/evals/evaluation/lm_evaluation_harness/examples/main.py
similarity index 90%
rename from GenAIEval/evaluation/lm_evaluation_harness/examples/main.py
rename to evals/evaluation/lm_evaluation_harness/examples/main.py
index 15b23d2a..ee61377e 100644
--- a/GenAIEval/evaluation/lm_evaluation_harness/examples/main.py
+++ b/evals/evaluation/lm_evaluation_harness/examples/main.py
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from GenAIEval.evaluation.lm_evaluation_harness import evaluate, setup_parser
+from evals.evaluation.lm_evaluation_harness import evaluate, setup_parser
 
 
 def main():
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/__init__.py b/evals/evaluation/lm_evaluation_harness/lm_eval/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/__init__.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/__init__.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/evaluator.py b/evals/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py b/evals/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
diff --git a/setup.py b/setup.py
index 56838559..8f51bda0 100644
--- a/setup.py
+++ b/setup.py
@@ -26,8 +26,8 @@ def parse_requirements(filename):
 
 
 setup(
-    name="GenAIEval",
-    version="0.0.0",
+    name="opea_eval",
+    version="0.6",
     author="Intel AISE AIPC Team",
     author_email="haihao.shen@intel.com, feng.tian@intel.com, chang1.wang@intel.com, kaokao.lv@intel.com",
     description="Evaluation and benchmark for Generative AI",
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 3439940c..cc3859dd 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1 +1,2 @@
+bigcode-eval@git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
 lm-eval==0.4.2
diff --git a/tests/test_bigcode_eval.py b/tests/test_bigcode_eval.py
index d57e8a51..09e3f139 100644
--- a/tests/test_bigcode_eval.py
+++ b/tests/test_bigcode_eval.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from GenAIEval.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
+from evals.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
 
 
 class TestLMEval(unittest.TestCase):
diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py
index c5e49e14..1f8f4f63 100644
--- a/tests/test_lm_eval.py
+++ b/tests/test_lm_eval.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from GenAIEval.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
+from evals.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
 
 
 class TestLMEval(unittest.TestCase):