Rename GenAIEval and Docker folder and set version, binary name (#22)

Signed-off-by: changwangss <[email protected]> Co-authored-by: chensuyue <[email protected]>
opea-project · May 31, 2024 · 226e7fd · 226e7fd
1 parent 15345cb
commit 226e7fd
Show file tree

Hide file tree

Showing 29 changed files with 22 additions and 31 deletions.
diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
@@ -53,10 +53,6 @@ jobs:
       - name: Clean Up Working Directory
         run: sudo rm -rf ${{github.workspace}}/*
 
-      - name: Load environment variables
-        run:
-          cat ~/actions-runner4/.env >> $GITHUB_ENV
-
       - name: Checkout out Repo
         uses: actions/checkout@v4
         with:
@@ -65,7 +61,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
@@ -74,9 +70,7 @@ jobs:
             docker rm -vf ${{ env.CONTAINER_NAME }} || true
           fi
           docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
-          -v ${{ github.workspace }}:/GenAIEval \
-          -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \
-          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
+          -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
       - name: Binary build
         run: |

diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml
@@ -61,7 +61,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/Docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |

diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
@@ -48,7 +48,7 @@ function generate_html_overview {
 
 <body>
     <div id="main">
-        <h1 align="center">ITREX Tests
+        <h1 align="center">GenAIEval Tests
         [ <a href="${RUN_DISPLAY_URL}">Job-${BUILD_NUMBER}</a> ]</h1>
       <h1 align="center">Test Status: ${JOB_STATUS}</h1>
         <h2>Summary</h2>
@@ -58,7 +58,7 @@ function generate_html_overview {
               ${Test_Info_Title}
               </tr>
               <tr>
-                    <td><a href="https://github.com/intel/intel-extension-for-transformers">ITREX</a></td>
+                    <td><a href="https://github.com/opea-project/GenAIEval">GenAIEval</a></td>
               ${Test_Info}
                 </tr>
         </table>

diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
@@ -39,9 +39,9 @@ working_dir=""
 main() {
     case ${tasks} in
         "text-generation")
-            working_dir="/GenAIEval/GenAIEval/evaluation/lm_evaluation_harness/examples";;
+            working_dir="/GenAIEval/evals/evaluation/lm_evaluation_harness/examples";;
         "code-generation")
-            working_dir="/GenAIEval/GenAIEval/evaluation/bigcode_evaluation_harness/examples";;
+            working_dir="/GenAIEval/evals/evaluation/bigcode_evaluation_harness/examples";;
         *)
             echo "Not suppotted task"; exit 1;;
     esac

diff --git a/.github/workflows/scripts/unittest/compare_coverage.sh b/.github/workflows/scripts/unittest/compare_coverage.sh
@@ -20,7 +20,7 @@ coverage_PR_lines_rate=$5
 coverage_base_lines_rate=$6
 coverage_PR_branches_rate=$7
 coverage_base_branches_rate=$8
-module_name="GenAIEval"
+module_name="evals"
 [[ ! -f $coverage_pr_log ]] && exit 1
 [[ ! -f $coverage_base_log ]] && exit 1
 file_name="./coverage_compare"

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
@@ -49,9 +49,6 @@ jobs:
     steps:
         - name: Clean Up Working Directory
           run: sudo rm -rf ${{github.workspace}}/*
-        - name: Load environment variables
-          run:
-            cat ~/actions-runner4/.env >> $GITHUB_ENV
         - name: Checkout out Repo
           uses: actions/checkout@v4
           with:
@@ -61,7 +58,7 @@ jobs:
 
         - name: Docker Build
           run: |
-            docker build --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+            docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
         - name: Docker Run
           run: |
@@ -70,7 +67,6 @@ jobs:
               docker rm -vf ${{ env.CONTAINER_NAME }} || true
             fi
             docker run -dit --memory="4g" --memory-reservation="1g" --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} --shm-size="1g" \
-            -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" \
             -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
         - name: Install Dependencies

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For evaluating the models on text-generation tasks, we follow the [lm-evaluation
 ```shell
 
 # pip install --upgrade-strategy eager optimum[habana]
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model gaudi-hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -29,7 +29,7 @@ python main.py \
 ##### CPU
 ```shell
 
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -39,7 +39,7 @@ python main.py \
 ```
 #### function call usage
 ```python
-from GenAIEval.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
+from evals.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
 
 args = LMevalParser(
     model="hf",
@@ -69,7 +69,7 @@ docker run -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Inte
 - set `base_url`, `tokenizer` and `--model genai-hf`
 
 ```
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 
 python main.py \
     --model genai-hf \
@@ -83,7 +83,7 @@ For evaluating the models on coding tasks or specifically coding LLMs, we follow
 #### command line usage
 
 ```shell
-cd GenAIEval/evaluation/bigcode_evaluation_harness/examples
+cd evals/evaluation/bigcode_evaluation_harness/examples
 python main.py \
     --model "codeparrot/codeparrot-small" \
     --tasks "humaneval" \
@@ -93,7 +93,7 @@ python main.py \
 ```
 #### function call usage
 ```python
-from GenAIEval.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
+from evals.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
 
 args = BigcodeEvalParser(
     user_model=user_model,

diff --git a/Docker/hpu.dockerfile → docker/hpu.dockerfile b/Docker/hpu.dockerfile → docker/hpu.dockerfile
diff --git a/GenAIEval/__init__.py → evals/__init__.py b/GenAIEval/__init__.py → evals/__init__.py
diff --git a/GenAIEval/benchmark/__init__.py → evals/benchmark/__init__.py b/GenAIEval/benchmark/__init__.py → evals/benchmark/__init__.py
diff --git a/GenAIEval/benchmark/chatqna_benchmark.py → evals/benchmark/chatqna_benchmark.py b/GenAIEval/benchmark/chatqna_benchmark.py → evals/benchmark/chatqna_benchmark.py
diff --git a/GenAIEval/benchmark/data.json → evals/benchmark/data.json b/GenAIEval/benchmark/data.json → evals/benchmark/data.json
diff --git a/GenAIEval/evaluation/__init__.py → evals/evaluation/__init__.py b/GenAIEval/evaluation/__init__.py → evals/evaluation/__init__.py
diff --git a/...on/bigcode_evaluation_harness/__init__.py → ...on/bigcode_evaluation_harness/__init__.py b/...on/bigcode_evaluation_harness/__init__.py → ...on/bigcode_evaluation_harness/__init__.py
diff --git a/...on/bigcode_evaluation_harness/accuracy.py → ...on/bigcode_evaluation_harness/accuracy.py b/...on/bigcode_evaluation_harness/accuracy.py → ...on/bigcode_evaluation_harness/accuracy.py
diff --git a/...n/bigcode_evaluation_harness/arguments.py → ...n/bigcode_evaluation_harness/arguments.py b/...n/bigcode_evaluation_harness/arguments.py → ...n/bigcode_evaluation_harness/arguments.py
diff --git a/...gcode_evaluation_harness/examples/main.py → ...gcode_evaluation_harness/examples/main.py b/...gcode_evaluation_harness/examples/main.py → ...gcode_evaluation_harness/examples/main.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from GenAIEval.evaluation.bigcode_evaluation_harness import evaluate, setup_parser
+from evals.evaluation.bigcode_evaluation_harness import evaluate, setup_parser
 
 
 def main():

diff --git a/...luation/lm_evaluation_harness/__init__.py → ...luation/lm_evaluation_harness/__init__.py b/...luation/lm_evaluation_harness/__init__.py → ...luation/lm_evaluation_harness/__init__.py
diff --git a/...luation/lm_evaluation_harness/accuracy.py → ...luation/lm_evaluation_harness/accuracy.py b/...luation/lm_evaluation_harness/accuracy.py → ...luation/lm_evaluation_harness/accuracy.py
diff --git a/...uation/lm_evaluation_harness/arguments.py → ...uation/lm_evaluation_harness/arguments.py b/...uation/lm_evaluation_harness/arguments.py → ...uation/lm_evaluation_harness/arguments.py
diff --git a/...on/lm_evaluation_harness/examples/main.py → ...on/lm_evaluation_harness/examples/main.py b/...on/lm_evaluation_harness/examples/main.py → ...on/lm_evaluation_harness/examples/main.py
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from GenAIEval.evaluation.lm_evaluation_harness import evaluate, setup_parser
+from evals.evaluation.lm_evaluation_harness import evaluate, setup_parser
 
 
 def main():

diff --git a/...lm_evaluation_harness/lm_eval/__init__.py → ...lm_evaluation_harness/lm_eval/__init__.py b/...lm_evaluation_harness/lm_eval/__init__.py → ...lm_evaluation_harness/lm_eval/__init__.py
diff --git a/...m_evaluation_harness/lm_eval/evaluator.py → ...m_evaluation_harness/lm_eval/evaluator.py b/...m_evaluation_harness/lm_eval/evaluator.py → ...m_evaluation_harness/lm_eval/evaluator.py
diff --git a/...uation_harness/lm_eval/models/__init__.py → ...uation_harness/lm_eval/models/__init__.py b/...uation_harness/lm_eval/models/__init__.py → ...uation_harness/lm_eval/models/__init__.py
diff --git a/...ion_harness/lm_eval/models/huggingface.py → ...ion_harness/lm_eval/models/huggingface.py b/...ion_harness/lm_eval/models/huggingface.py → ...ion_harness/lm_eval/models/huggingface.py
diff --git a/setup.py b/setup.py
@@ -26,8 +26,8 @@ def parse_requirements(filename):
 
 
 setup(
-    name="GenAIEval",
-    version="0.0.0",
+    name="opea_eval",
+    version="0.6",
     author="Intel AISE AIPC Team",
     author_email="[email protected], [email protected], [email protected], [email protected]",
     description="Evaluation and benchmark for Generative AI",

diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -1 +1,2 @@
+bigcode-eval@git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
 lm-eval==0.4.2
diff --git a/tests/test_bigcode_eval.py b/tests/test_bigcode_eval.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from GenAIEval.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
+from evals.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
 
 
 class TestLMEval(unittest.TestCase):

diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from GenAIEval.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
+from evals.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
 
 
 class TestLMEval(unittest.TestCase):
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		bigcode-eval@git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
		lm-eval==0.4.2