Added GPU docker image (#7)

NeonGeckoCom · Aug 16, 2023 · 951e35a · 951e35a
2 parents d5da0b2 + 42aa974
commit 951e35a
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 4 deletions.
diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml
@@ -21,5 +21,12 @@ jobs:
           tag: ${{env.VERSION}}
 
   build_and_publish_docker:
-    uses: neongeckocom/.github/.github/workflows/publish_docker.yml@master
+    uses: neongeckocom/.github/.github/workflows/publish_docker.yml@FEAT_SupportDockerfileSpec
     secrets: inherit
+
+  build_and_publish_docker:
+    uses: neongeckocom/.github/.github/workflows/publish_docker.yml@FEAT_SupportDockerfileSpec
+    secrets: inherit
+    with:
+      image_name: ${{ github.repository }}-gpu
+      dockerfile: ./Dockerfile.gpu
diff --git a/.github/workflows/publish_test_build.yml b/.github/workflows/publish_test_build.yml
@@ -18,5 +18,12 @@ jobs:
       publish_pypi: false
   build_and_publish_docker:
     needs: publish_alpha_release
-    uses: neongeckocom/.github/.github/workflows/publish_docker.yml@master
-    secrets: inherit
+    uses: neongeckocom/.github/.github/workflows/publish_docker.yml@FEAT_SupportDockerfileSpec
+    secrets: inherit
+  build_and_publish_docker:
+    needs: publish_alpha_release
+    uses: neongeckocom/.github/.github/workflows/publish_docker.yml@FEAT_SupportDockerfileSpec
+    secrets: inherit
+    with:
+      image_name: ${{ github.repository }}-gpu
+      dockerfile: ./Dockerfile.gpu
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
@@ -0,0 +1,15 @@
+FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
+
+LABEL vendor=neon.ai \
+    ai.neon.name="neon-llm-fastchat"
+
+ENV OVOS_CONFIG_BASE_FOLDER neon
+ENV OVOS_CONFIG_FILENAME diana.yaml
+ENV XDG_CONFIG_HOME /config
+COPY docker_overlay/ /
+
+WORKDIR /app
+COPY . /app
+RUN pip install /app
+
+CMD [ "neon-llm-fastchat" ]
diff --git a/README.md b/README.md
@@ -40,4 +40,23 @@ For example, if your configuration resides in `~/.config`:
 export CONFIG_PATH="/home/${USER}/.config"
 docker run -v ${CONFIG_PATH}:/config neon_llm_fastchat
 ```
-> Note: If connecting to a local MQ server, you may need to specify `--network host`
+> Note: If connecting to a local MQ server, you may need to specify `--network host`
+
+### GPU
+System setup
+```
+# Nvidia Docker
+sudo apt install curl
+distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
+
+sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
+sudo systemctl restart docker
+```
+
+Run docker
+```shell
+export CONFIG_PATH="/home/${USER}/.config"
+docker run --gpus 0 -v ${CONFIG_PATH}:/config neon_llm_fastchat
+```
diff --git a/neon_llm_fastchat/fastchat.py b/neon_llm_fastchat/fastchat.py
@@ -41,6 +41,7 @@ class FastChat(NeonLLM):
 
     def __init__(self, config):
         super().__init__(config)
+        self.warmup()
         self.context_depth = config["context_depth"]
         self.max_tokens = config["max_tokens"]
         self.num_parallel_processes = config["num_parallel_processes"]
@@ -61,6 +62,7 @@ def model(self) -> ctranslate2.Translator:
         if self._model is None:
             repo_path = snapshot_download(repo_id=self.llm_model_name)
             self._model = ctranslate2.Translator(model_path=repo_path,
+                                                 device="auto",
                                                  intra_threads=self.num_threads_per_process,
                                                  inter_threads=self.num_parallel_processes)
         return self._model
@@ -79,6 +81,10 @@ def _system_prompt(self) -> str:
                "geothermal, and biomass. Non-renewable energy sources, on the other hand, " \
                "are finite and will eventually be depleted, such as coal, oil, and natural gas.\n"
 
+    def warmup(self):
+        self.tokenizer
+        self.model
+
     def get_sorted_answer_indexes(self, question: str, answers: List[str]) -> List[int]:
         """
             Creates sorted list of answer indexes with respect to order provided in :param answers based on PPL score