From a3511b06ee8d130064f32825734e54c434129cd5 Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 12:30:00 +0800
Subject: [PATCH 01/13] update name.

---
 .../text-generation/lm-eval/self_hosted_hf.py | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 comps/llms/text-generation/lm-eval/self_hosted_hf.py

diff --git a/comps/llms/text-generation/lm-eval/self_hosted_hf.py b/comps/llms/text-generation/lm-eval/self_hosted_hf.py
new file mode 100644
index 000000000..02fa69dd0
--- /dev/null
+++ b/comps/llms/text-generation/lm-eval/self_hosted_hf.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from typing import List
+
+import lm_eval.api.registry
+import torch
+from docarray import BaseDoc
+from GenAIEval.evaluation.lm_evaluation_harness.lm_eval.models.huggingface import HFLM, GaudiHFModelAdapter
+
+from comps import ServiceType, opea_microservices, opea_telemetry, register_microservice
+
+lm_eval.api.registry.MODEL_REGISTRY["hf"] = HFLM
+lm_eval.api.registry.MODEL_REGISTRY["gaudi-hf"] = GaudiHFModelAdapter
+
+
+class LLMCompletionDoc(BaseDoc):
+    batched_inputs: List
+    logprobs: int = 10
+    max_tokens: int = 0
+    temperature: float = 0.0
+
+
+model = os.getenv("MODEL", "")
+model_args = os.getenv("MODEL_ARGS", "")
+device = os.getenv("DEVICE", "")
+
+llm = lm_eval.api.registry.get_model(model).create_from_arg_string(
+    model_args,
+    {
+        "batch_size": 1,  # dummy
+        "max_batch_size": None,
+        "device": device,
+    },
+)
+
+
+@register_microservice(
+    name="opea_service@self_hosted_hf",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/completions",
+    host="0.0.0.0",
+    port=9006,
+)
+@opea_telemetry
+def llm_generate(input: LLMCompletionDoc):
+    global llm
+    batched_inputs = torch.tensor(input.batched_inputs, dtype=torch.long, device=llm.device)
+    with torch.no_grad():
+        # TODO, use model.generate.
+        logits = llm.model(batched_inputs).logits
+
+    logits = torch.nn.functional.log_softmax(logits, dim=-1)
+    # Check if per-token argmax is exactly equal to continuation
+    greedy_tokens = logits.argmax(dim=-1)
+    logprobs = torch.gather(logits, 2, batched_inputs[:, 1:].unsqueeze(-1)).squeeze(-1)
+
+    return {
+        "greedy_tokens": greedy_tokens.detach().cpu().tolist(),
+        "logprobs": logprobs.detach().cpu().tolist(),
+        "batched_inputs": input.batched_inputs,
+    }
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@self_hosted_hf"].start()

From 61ec1248a0efdf72ffd6830b58a61ab9177c371e Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 13:56:00 +0800
Subject: [PATCH 02/13] add requirements.

---
 comps/llms/text-generation/lm-eval/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 comps/llms/text-generation/lm-eval/requirements.txt

diff --git a/comps/llms/text-generation/lm-eval/requirements.txt b/comps/llms/text-generation/lm-eval/requirements.txt
new file mode 100644
index 000000000..0b9ec9eba
--- /dev/null
+++ b/comps/llms/text-generation/lm-eval/requirements.txt
@@ -0,0 +1,2 @@
+git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
+lm-eval==0.4.2

From f847357154631429a646aee1733e1384953ea93a Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 15:42:13 +0800
Subject: [PATCH 03/13] add dockerfile and readme.

---
 comps/llms/lm-eval/Dockerfile.cpu             | 26 +++++++++++++++++++
 comps/llms/lm-eval/Dockerfile.hpu             | 24 +++++++++++++++++
 comps/llms/lm-eval/README.md                  | 12 +++++++++
 comps/llms/lm-eval/requirements.txt           |  1 +
 .../lm-eval/self_hosted_hf.py                 |  0
 .../text-generation/lm-eval/requirements.txt  |  2 --
 6 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 comps/llms/lm-eval/Dockerfile.cpu
 create mode 100644 comps/llms/lm-eval/Dockerfile.hpu
 create mode 100644 comps/llms/lm-eval/README.md
 create mode 100644 comps/llms/lm-eval/requirements.txt
 rename comps/llms/{text-generation => }/lm-eval/self_hosted_hf.py (100%)
 delete mode 100644 comps/llms/text-generation/lm-eval/requirements.txt

diff --git a/comps/llms/lm-eval/Dockerfile.cpu b/comps/llms/lm-eval/Dockerfile.cpu
new file mode 100644
index 000000000..72d6a555e
--- /dev/null
+++ b/comps/llms/lm-eval/Dockerfile.cpu
@@ -0,0 +1,26 @@
+ARG UBUNTU_VER=22.04
+FROM ubuntu:${UBUNTU_VER} as devel
+
+ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git
+ARG BRANCH=main
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    aspell \
+    aspell-en \
+    build-essential \
+    python3 \
+    python3-pip \
+    python3-dev \
+    python3-distutils \
+    git \
+    vim \
+    wget
+
+RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /home/user/GenAIComps/ && \
+    cd /home/user/GenAIComps/ && python3 setup.py install && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/comps/llms/lm-eval/requirements.txt
+
+WORKDIR /home/user/GenAIComps/comps/llms/lm-eval/
+
+ENTRYPOINT ["python3", "self_hosted_hf.py"]
diff --git a/comps/llms/lm-eval/Dockerfile.hpu b/comps/llms/lm-eval/Dockerfile.hpu
new file mode 100644
index 000000000..ed147595c
--- /dev/null
+++ b/comps/llms/lm-eval/Dockerfile.hpu
@@ -0,0 +1,24 @@
+FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu
+
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+
+ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git
+ARG BRANCH=main
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    aspell \
+    aspell-en \
+    build-essential \
+    git \
+    vim \
+    wget
+
+RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /home/user/GenAIComps/ && \
+    cd /home/user/GenAIComps/ && python setup.py install && \
+    pip install --no-cache-dir --upgrade-strategy eager optimum[habana] && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/comps/llms/lm-eval/requirements.txt
+
+WORKDIR /home/user/GenAIComps/comps/llms/lm-eval/
+
+ENTRYPOINT ["python", "self_hosted_hf.py"]
diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
new file mode 100644
index 000000000..259daefeb
--- /dev/null
+++ b/comps/llms/lm-eval/README.md
@@ -0,0 +1,12 @@
+# LM-Eval Microservice
+
+This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness).
+
+
+# CPU service
+
+```
+docker build -f Dockerfile.cpu -t comps:lm-eval .
+```
+
+
diff --git a/comps/llms/lm-eval/requirements.txt b/comps/llms/lm-eval/requirements.txt
new file mode 100644
index 000000000..e068a03a5
--- /dev/null
+++ b/comps/llms/lm-eval/requirements.txt
@@ -0,0 +1 @@
+git+https://github.com/opea-project/GenAIEval.git
diff --git a/comps/llms/text-generation/lm-eval/self_hosted_hf.py b/comps/llms/lm-eval/self_hosted_hf.py
similarity index 100%
rename from comps/llms/text-generation/lm-eval/self_hosted_hf.py
rename to comps/llms/lm-eval/self_hosted_hf.py
diff --git a/comps/llms/text-generation/lm-eval/requirements.txt b/comps/llms/text-generation/lm-eval/requirements.txt
deleted file mode 100644
index 0b9ec9eba..000000000
--- a/comps/llms/text-generation/lm-eval/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
-lm-eval==0.4.2

From 7bc8ac0592e0150fb60926dec2731d7d306e4465 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 07:42:34 +0000
Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/llms/lm-eval/README.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index 259daefeb..6f6f97fea 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -2,11 +2,8 @@
 
 This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness).
 
-
 # CPU service
 
 ```
 docker build -f Dockerfile.cpu -t comps:lm-eval .
 ```
-
-

From 428c355df82493106464fda7aba3cc8d22e9cb2e Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 17:55:19 +0800
Subject: [PATCH 05/13] update requirements.

---
 comps/llms/lm-eval/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/comps/llms/lm-eval/requirements.txt b/comps/llms/lm-eval/requirements.txt
index e068a03a5..5bd1d9332 100644
--- a/comps/llms/lm-eval/requirements.txt
+++ b/comps/llms/lm-eval/requirements.txt
@@ -1 +1,3 @@
 git+https://github.com/opea-project/GenAIEval.git
+git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
+lm-eval==0.4.2

From efeece1eff363578afb0331f29bceea6deaef9fb Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 17:56:58 +0800
Subject: [PATCH 06/13] update requirements.

---
 comps/llms/lm-eval/README.md | 67 ++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index 259daefeb..be0017e8d 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -1,12 +1,75 @@
 # LM-Eval Microservice
 
-This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness).
+This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness), which can host a seperate llm server to evaluate `lm-eval` tasks.
 
 
-# CPU service
+## CPU service
 
+### build cpu docker
 ```
 docker build -f Dockerfile.cpu -t comps:lm-eval .
+
+```
+
+### start the server
+
+- set the environments `MODEL`, `MODEL_ARGS`, `DEVICE` and start the server
+
+```
+docker run -d  -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="cpu" comps:lm-eval
+```
+
+### evaluate the model
+
+- set `base_url` and `tokenizer`
+
+```
+git clone https://github.com/opea-project/GenAIEval
+cd GenAIEval
+pip install -e .
+
+cd GenAIEval/evaluation/lm_evaluation_harness/examples
+
+python main.py \
+    --model genai-hf \
+    --model_args "base_url=http://{your_ip}:9006,tokenizer=Intel/neural-chat-7b-v3-3" \
+    --tasks  "lambada_openai" \
+    --batch_size 2
+
 ```
 
 
+## Gaudi service
+
+### build Gaudi docker
+
+```
+docker build -f Dockerfile.hpu -t comps:lm-eval --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy .
+```
+
+### start the server
+
+- set the environments `MODEL`, `MODEL_ARGS`, `DEVICE` and start the server
+
+```
+docker run -d --runtime=habana -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="hpu" comps:lm-eval
+```
+
+### evaluate the model
+
+- set `base_url` and `tokenizer`
+
+```
+git clone https://github.com/opea-project/GenAIEval
+cd GenAIEval
+pip install -e .
+
+cd GenAIEval/evaluation/lm_evaluation_harness/examples
+
+python main.py \
+    --model genai-hf \
+    --model_args "base_url=http://{your_ip}:9006,tokenizer=Intel/neural-chat-7b-v3-3" \
+    --tasks  "lambada_openai" \
+    --batch_size 2
+
+```

From 8257ae615a48c08e56a4745a6e9048ded398258c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 09:58:29 +0000
Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/llms/lm-eval/README.md        | 5 ++---
 comps/llms/lm-eval/requirements.txt | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index be0017e8d..60981a371 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -1,11 +1,11 @@
 # LM-Eval Microservice
 
-This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness), which can host a seperate llm server to evaluate `lm-eval` tasks.
-
+This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness), which can host a separate llm server to evaluate `lm-eval` tasks.
 
 ## CPU service
 
 ### build cpu docker
+
 ```
 docker build -f Dockerfile.cpu -t comps:lm-eval .
 
@@ -38,7 +38,6 @@ python main.py \
 
 ```
 
-
 ## Gaudi service
 
 ### build Gaudi docker
diff --git a/comps/llms/lm-eval/requirements.txt b/comps/llms/lm-eval/requirements.txt
index 5bd1d9332..4ad0b80e7 100644
--- a/comps/llms/lm-eval/requirements.txt
+++ b/comps/llms/lm-eval/requirements.txt
@@ -1,3 +1,3 @@
-git+https://github.com/opea-project/GenAIEval.git
 git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
+git+https://github.com/opea-project/GenAIEval.git
 lm-eval==0.4.2

From bc085753a0b68de3b8122572f080858c6e6b6879 Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 18:16:01 +0800
Subject: [PATCH 08/13] update requirements.

---
 comps/llms/lm-eval/Dockerfile.hpu   | 28 ++++++++++++++--------------
 comps/llms/lm-eval/requirements.txt |  1 +
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/comps/llms/lm-eval/Dockerfile.hpu b/comps/llms/lm-eval/Dockerfile.hpu
index ed147595c..38275c39b 100644
--- a/comps/llms/lm-eval/Dockerfile.hpu
+++ b/comps/llms/lm-eval/Dockerfile.hpu
@@ -2,23 +2,23 @@ FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installe
 
 ENV LANG=en_US.UTF-8
 ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+ARG REPO_COMPS=https://github.com/lkk12014402/GenAIComps.git
+ARG BRANCH=enable_lm_eval_with_microservice
 
-ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git
-ARG BRANCH=main
+RUN apt-get update && \
+    apt-get install git git-lfs && \
+    git-lfs install
 
-RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
-    aspell \
-    aspell-en \
-    build-essential \
-    git \
-    vim \
-    wget
+# SHELL ["/bin/bash", "-c"]
+SHELL ["/bin/bash", "--login", "-c"]
 
-RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /home/user/GenAIComps/ && \
-    cd /home/user/GenAIComps/ && python setup.py install && \
-    pip install --no-cache-dir --upgrade-strategy eager optimum[habana] && \
-    pip install --no-cache-dir -r /home/user/GenAIComps/comps/llms/lm-eval/requirements.txt
+RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /GenAIComps/ && \
+    cd /GenAIComps/ && \
+    pip install --upgrade-strategy eager optimum[habana] && \
+    pip install -U urllib3 && \
+    pip install -r comps/llms/lm-eval/requirements.txt && \
+    python setup.py install
 
-WORKDIR /home/user/GenAIComps/comps/llms/lm-eval/
+WORKDIR /GenAIComps/comps/llms/lm-eval/
 
 ENTRYPOINT ["python", "self_hosted_hf.py"]
diff --git a/comps/llms/lm-eval/requirements.txt b/comps/llms/lm-eval/requirements.txt
index 5bd1d9332..bbbf8476b 100644
--- a/comps/llms/lm-eval/requirements.txt
+++ b/comps/llms/lm-eval/requirements.txt
@@ -1,3 +1,4 @@
 git+https://github.com/opea-project/GenAIEval.git
 git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@a1b4a7949a24c8e3ef0d05a01097b2d14ffba56e
 lm-eval==0.4.2
+pydantic==2.7.2

From ad18ed786fe4404d9dd6b5f5c39740e77f41b223 Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 21:11:19 +0800
Subject: [PATCH 09/13] update requirements.

---
 comps/llms/lm-eval/Dockerfile.hpu | 24 ----------------------
 comps/llms/lm-eval/README.md      | 34 -------------------------------
 2 files changed, 58 deletions(-)
 delete mode 100644 comps/llms/lm-eval/Dockerfile.hpu

diff --git a/comps/llms/lm-eval/Dockerfile.hpu b/comps/llms/lm-eval/Dockerfile.hpu
deleted file mode 100644
index 38275c39b..000000000
--- a/comps/llms/lm-eval/Dockerfile.hpu
+++ /dev/null
@@ -1,24 +0,0 @@
-FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu
-
-ENV LANG=en_US.UTF-8
-ENV PYTHONPATH=/root:/usr/lib/habanalabs/
-ARG REPO_COMPS=https://github.com/lkk12014402/GenAIComps.git
-ARG BRANCH=enable_lm_eval_with_microservice
-
-RUN apt-get update && \
-    apt-get install git git-lfs && \
-    git-lfs install
-
-# SHELL ["/bin/bash", "-c"]
-SHELL ["/bin/bash", "--login", "-c"]
-
-RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /GenAIComps/ && \
-    cd /GenAIComps/ && \
-    pip install --upgrade-strategy eager optimum[habana] && \
-    pip install -U urllib3 && \
-    pip install -r comps/llms/lm-eval/requirements.txt && \
-    python setup.py install
-
-WORKDIR /GenAIComps/comps/llms/lm-eval/
-
-ENTRYPOINT ["python", "self_hosted_hf.py"]
diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index 60981a371..a6750236e 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -38,37 +38,3 @@ python main.py \
 
 ```
 
-## Gaudi service
-
-### build Gaudi docker
-
-```
-docker build -f Dockerfile.hpu -t comps:lm-eval --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy .
-```
-
-### start the server
-
-- set the environments `MODEL`, `MODEL_ARGS`, `DEVICE` and start the server
-
-```
-docker run -d --runtime=habana -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="hpu" comps:lm-eval
-```
-
-### evaluate the model
-
-- set `base_url` and `tokenizer`
-
-```
-git clone https://github.com/opea-project/GenAIEval
-cd GenAIEval
-pip install -e .
-
-cd GenAIEval/evaluation/lm_evaluation_harness/examples
-
-python main.py \
-    --model genai-hf \
-    --model_args "base_url=http://{your_ip}:9006,tokenizer=Intel/neural-chat-7b-v3-3" \
-    --tasks  "lambada_openai" \
-    --batch_size 2
-
-```

From 34f72192c62e83edc572b3176f8b3de0feb036c8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 13:11:51 +0000
Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/llms/lm-eval/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index a6750236e..bc55bd27e 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -37,4 +37,3 @@ python main.py \
     --batch_size 2
 
 ```
-

From 023ef04830b5e91017316ba17c29fe2ed927a2c1 Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 21:15:50 +0800
Subject: [PATCH 11/13] update requirements.

---
 comps/llms/lm-eval/self_hosted_hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/llms/lm-eval/self_hosted_hf.py b/comps/llms/lm-eval/self_hosted_hf.py
index 02fa69dd0..d5ba45775 100644
--- a/comps/llms/lm-eval/self_hosted_hf.py
+++ b/comps/llms/lm-eval/self_hosted_hf.py
@@ -61,7 +61,7 @@ def llm_generate(input: LLMCompletionDoc):
     batched_inputs = torch.tensor(input.batched_inputs, dtype=torch.long, device=llm.device)
     with torch.no_grad():
         # TODO, use model.generate.
-        logits = llm.model(batched_inputs).logits
+        logits = llm._model_call(batched_inputs)
 
     logits = torch.nn.functional.log_softmax(logits, dim=-1)
     # Check if per-token argmax is exactly equal to continuation

From ec8886227694eddd14830b07b18a580ea75e965f Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Thu, 30 May 2024 21:28:34 +0800
Subject: [PATCH 12/13] update requirements.

---
 comps/llms/lm-eval/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index bc55bd27e..d30b6fde4 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -16,7 +16,7 @@ docker build -f Dockerfile.cpu -t comps:lm-eval .
 - set the environments `MODEL`, `MODEL_ARGS`, `DEVICE` and start the server
 
 ```
-docker run -d  -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="cpu" comps:lm-eval
+docker run -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="cpu" comps:lm-eval
 ```
 
 ### evaluate the model

From 87f7ef365f20bb4a1f20c787ed653886069179a2 Mon Sep 17 00:00:00 2001
From: "Lv, Kaokao" <kaokao.lv@intel.com>
Date: Fri, 31 May 2024 00:24:21 +0800
Subject: [PATCH 13/13] update docker image tag.

---
 comps/llms/lm-eval/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comps/llms/lm-eval/README.md b/comps/llms/lm-eval/README.md
index d30b6fde4..f0097f3b6 100644
--- a/comps/llms/lm-eval/README.md
+++ b/comps/llms/lm-eval/README.md
@@ -7,7 +7,7 @@ This microservice, designed for [lm-eval](https://github.com/EleutherAI/lm-evalu
 ### build cpu docker
 
 ```
-docker build -f Dockerfile.cpu -t comps:lm-eval .
+docker build -f Dockerfile.cpu -t opea/lm-eval:latest .
 
 ```
 
@@ -16,7 +16,7 @@ docker build -f Dockerfile.cpu -t comps:lm-eval .
 - set the environments `MODEL`, `MODEL_ARGS`, `DEVICE` and start the server
 
 ```
-docker run -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="cpu" comps:lm-eval
+docker run -p 9006:9006 --ipc=host  -e MODEL="hf" -e MODEL_ARGS="pretrained=Intel/neural-chat-7b-v3-3" -e DEVICE="cpu" opea/lm-eval:latest
 ```
 
 ### evaluate the model