diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
index ed70411d..498ded08 100644
--- a/.github/workflows/model_test_cpu.yml
+++ b/.github/workflows/model_test_cpu.yml
@@ -53,10 +53,6 @@ jobs:
       - name: Clean Up Working Directory
         run: sudo rm -rf ${{github.workspace}}/*
 
-      - name: Load environment variables
-        run:
-          cat ~/actions-runner4/.env >> $GITHUB_ENV
-
       - name: Checkout out Repo
         uses: actions/checkout@v4
         with:
@@ -65,7 +61,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
@@ -74,9 +70,7 @@ jobs:
             docker rm -vf ${{ env.CONTAINER_NAME }} || true
           fi
           docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
-          -v ${{ github.workspace }}:/GenAIEval \
-          -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \
-          ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
+          -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
       - name: Binary build
         run: |
diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml
index 1e6f2316..4a99de9c 100644
--- a/.github/workflows/model_test_hpu.yml
+++ b/.github/workflows/model_test_hpu.yml
@@ -61,7 +61,7 @@ jobs:
     # We need this because GitHub needs to clone the branch to pipeline
       - name: Docker Build
         run: |
-          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/Docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+          docker build --target hpu --build-arg REPO_PATH="." -f ${{ github.workspace }}/docker/hpu.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
       - name: Docker Run
         run: |
diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
index 4db273f5..8d5f2c71 100644
--- a/.github/workflows/scripts/models/generate_report.sh
+++ b/.github/workflows/scripts/models/generate_report.sh
@@ -48,7 +48,7 @@ function generate_html_overview {
 
 <body>
     <div id="main">
-        <h1 align="center">ITREX Tests
+        <h1 align="center">GenAIEval Tests
         [ <a href="${RUN_DISPLAY_URL}">Job-${BUILD_NUMBER}</a> ]</h1>
       <h1 align="center">Test Status: ${JOB_STATUS}</h1>
         <h2>Summary</h2>
@@ -58,7 +58,7 @@ function generate_html_overview {
               ${Test_Info_Title}
               </tr>
               <tr>
-                    <td><a href="https://github.com/intel/intel-extension-for-transformers">ITREX</a></td>
+                    <td><a href="https://github.com/opea-project/GenAIEval">GenAIEval</a></td>
               ${Test_Info}
                 </tr>
         </table>
diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
index 7d460ac2..92e55090 100644
--- a/.github/workflows/scripts/models/model_test.sh
+++ b/.github/workflows/scripts/models/model_test.sh
@@ -39,9 +39,9 @@ working_dir=""
 main() {
     case ${tasks} in
         "text-generation")
-            working_dir="/GenAIEval/GenAIEval/evaluation/lm_evaluation_harness/examples";;
+            working_dir="/GenAIEval/evals/evaluation/lm_evaluation_harness/examples";;
         "code-generation")
-            working_dir="/GenAIEval/GenAIEval/evaluation/bigcode_evaluation_harness/examples";;
+            working_dir="/GenAIEval/evals/evaluation/bigcode_evaluation_harness/examples";;
         *)
             echo "Not suppotted task"; exit 1;;
     esac
diff --git a/.github/workflows/scripts/unittest/compare_coverage.sh b/.github/workflows/scripts/unittest/compare_coverage.sh
index 88a4e1e5..55b75f44 100644
--- a/.github/workflows/scripts/unittest/compare_coverage.sh
+++ b/.github/workflows/scripts/unittest/compare_coverage.sh
@@ -20,7 +20,7 @@ coverage_PR_lines_rate=$5
 coverage_base_lines_rate=$6
 coverage_PR_branches_rate=$7
 coverage_base_branches_rate=$8
-module_name="GenAIEval"
+module_name="evals"
 [[ ! -f $coverage_pr_log ]] && exit 1
 [[ ! -f $coverage_base_log ]] && exit 1
 file_name="./coverage_compare"
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 4c3807f6..cc4a2712 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -49,9 +49,6 @@ jobs:
     steps:
         - name: Clean Up Working Directory
           run: sudo rm -rf ${{github.workspace}}/*
-        - name: Load environment variables
-          run:
-            cat ~/actions-runner4/.env >> $GITHUB_ENV
         - name: Checkout out Repo
           uses: actions/checkout@v4
           with:
@@ -61,7 +58,7 @@ jobs:
 
         - name: Docker Build
           run: |
-            docker build --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
+            docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
 
         - name: Docker Run
           run: |
@@ -70,7 +67,6 @@ jobs:
               docker rm -vf ${{ env.CONTAINER_NAME }} || true
             fi
             docker run -dit --memory="4g" --memory-reservation="1g" --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} --shm-size="1g" \
-            -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" \
             -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
 
         - name: Install Dependencies
diff --git a/README.md b/README.md
index d4b4e2a3..23838d92 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ For evaluating the models on text-generation tasks, we follow the [lm-evaluation
 ```shell
 
 # pip install --upgrade-strategy eager optimum[habana]
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model gaudi-hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -29,7 +29,7 @@ python main.py \
 ##### CPU
 ```shell
 
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -39,7 +39,7 @@ python main.py \
 ```
 #### function call usage
 ```python
-from GenAIEval.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
+from evals.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
 
 args = LMevalParser(
     model="hf",
@@ -69,7 +69,7 @@ docker run -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Inte
 - set `base_url`, `tokenizer` and `--model genai-hf`
 
 ```
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
+cd evals/evaluation/lm_evaluation_harness/examples
 
 python main.py \
     --model genai-hf \
@@ -83,7 +83,7 @@ For evaluating the models on coding tasks or specifically coding LLMs, we follow
 #### command line usage
 
 ```shell
-cd GenAIEval/evaluation/bigcode_evaluation_harness/examples
+cd evals/evaluation/bigcode_evaluation_harness/examples
 python main.py \
     --model "codeparrot/codeparrot-small" \
     --tasks "humaneval" \
@@ -93,7 +93,7 @@ python main.py \
 ```
 #### function call usage
 ```python
-from GenAIEval.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
+from evals.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
 
 args = BigcodeEvalParser(
     user_model=user_model,
diff --git a/Docker/hpu.dockerfile b/docker/hpu.dockerfile
similarity index 100%
rename from Docker/hpu.dockerfile
rename to docker/hpu.dockerfile
diff --git a/GenAIEval/__init__.py b/evals/__init__.py
similarity index 100%
rename from GenAIEval/__init__.py
rename to evals/__init__.py
diff --git a/GenAIEval/benchmark/__init__.py b/evals/benchmark/__init__.py
similarity index 100%
rename from GenAIEval/benchmark/__init__.py
rename to evals/benchmark/__init__.py
diff --git a/GenAIEval/benchmark/chatqna_benchmark.py b/evals/benchmark/chatqna_benchmark.py
similarity index 100%
rename from GenAIEval/benchmark/chatqna_benchmark.py
rename to evals/benchmark/chatqna_benchmark.py
diff --git a/GenAIEval/benchmark/data.json b/evals/benchmark/data.json
similarity index 100%
rename from GenAIEval/benchmark/data.json
rename to evals/benchmark/data.json
diff --git a/GenAIEval/evaluation/__init__.py b/evals/evaluation/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/__init__.py
rename to evals/evaluation/__init__.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/__init__.py b/evals/evaluation/bigcode_evaluation_harness/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/__init__.py
rename to evals/evaluation/bigcode_evaluation_harness/__init__.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/accuracy.py b/evals/evaluation/bigcode_evaluation_harness/accuracy.py
similarity index 100%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/accuracy.py
rename to evals/evaluation/bigcode_evaluation_harness/accuracy.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/arguments.py b/evals/evaluation/bigcode_evaluation_harness/arguments.py
similarity index 100%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/arguments.py
rename to evals/evaluation/bigcode_evaluation_harness/arguments.py
diff --git a/GenAIEval/evaluation/bigcode_evaluation_harness/examples/main.py b/evals/evaluation/bigcode_evaluation_harness/examples/main.py
similarity index 90%
rename from GenAIEval/evaluation/bigcode_evaluation_harness/examples/main.py
rename to evals/evaluation/bigcode_evaluation_harness/examples/main.py
index 1b998c04..bef7f494 100644
--- a/GenAIEval/evaluation/bigcode_evaluation_harness/examples/main.py
+++ b/evals/evaluation/bigcode_evaluation_harness/examples/main.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from GenAIEval.evaluation.bigcode_evaluation_harness import evaluate, setup_parser
+from evals.evaluation.bigcode_evaluation_harness import evaluate, setup_parser
 
 
 def main():
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/__init__.py b/evals/evaluation/lm_evaluation_harness/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/__init__.py
rename to evals/evaluation/lm_evaluation_harness/__init__.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/accuracy.py b/evals/evaluation/lm_evaluation_harness/accuracy.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/accuracy.py
rename to evals/evaluation/lm_evaluation_harness/accuracy.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/arguments.py b/evals/evaluation/lm_evaluation_harness/arguments.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/arguments.py
rename to evals/evaluation/lm_evaluation_harness/arguments.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/examples/main.py b/evals/evaluation/lm_evaluation_harness/examples/main.py
similarity index 90%
rename from GenAIEval/evaluation/lm_evaluation_harness/examples/main.py
rename to evals/evaluation/lm_evaluation_harness/examples/main.py
index 15b23d2a..ee61377e 100644
--- a/GenAIEval/evaluation/lm_evaluation_harness/examples/main.py
+++ b/evals/evaluation/lm_evaluation_harness/examples/main.py
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from GenAIEval.evaluation.lm_evaluation_harness import evaluate, setup_parser
+from evals.evaluation.lm_evaluation_harness import evaluate, setup_parser
 
 
 def main():
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/__init__.py b/evals/evaluation/lm_evaluation_harness/lm_eval/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/__init__.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/__init__.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/evaluator.py b/evals/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py b/evals/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
similarity index 100%
rename from GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
rename to evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
diff --git a/setup.py b/setup.py
index 56838559..8f51bda0 100644
--- a/setup.py
+++ b/setup.py
@@ -26,8 +26,8 @@ def parse_requirements(filename):
 
 
 setup(
-    name="GenAIEval",
-    version="0.0.0",
+    name="opea_eval",
+    version="0.6",
     author="Intel AISE AIPC Team",
     author_email="haihao.shen@intel.com, feng.tian@intel.com, chang1.wang@intel.com, kaokao.lv@intel.com",
     description="Evaluation and benchmark for Generative AI",
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 3439940c..cc3859dd 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1 +1,2 @@
+bigcode-eval@git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
 lm-eval==0.4.2
diff --git a/tests/test_bigcode_eval.py b/tests/test_bigcode_eval.py
index d57e8a51..09e3f139 100644
--- a/tests/test_bigcode_eval.py
+++ b/tests/test_bigcode_eval.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from GenAIEval.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
+from evals.evaluation.bigcode_evaluation_harness import BigcodeEvalParser, evaluate
 
 
 class TestLMEval(unittest.TestCase):
diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py
index c5e49e14..1f8f4f63 100644
--- a/tests/test_lm_eval.py
+++ b/tests/test_lm_eval.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from GenAIEval.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
+from evals.evaluation.lm_evaluation_harness import LMEvalParser, evaluate
 
 
 class TestLMEval(unittest.TestCase):