From a87f6a4ef5838a2b981f027c60620df35b303d2f Mon Sep 17 00:00:00 2001
From: Sachin Shetty <26170834+sachinsshetty@users.noreply.github.com>
Date: Sat, 30 Mar 2024 14:42:46 +0100
Subject: [PATCH] Draft - vllm setup for agents

---
 README.md                                 |  2 ++
 docs/vllm.md                              | 14 ++++++++++++-
 src/autogen/prompts/agent-llm-config.md   | 20 +++++++++++++++++++
 src/autogen/{ => prompts}/prompt-coder.md |  0
 src/vllm/docker-compose.yml               | 24 +++++++++++++++++++++++
 5 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 src/autogen/prompts/agent-llm-config.md
 rename src/autogen/{ => prompts}/prompt-coder.md (100%)
 create mode 100644 src/vllm/docker-compose.yml

diff --git a/README.md b/README.md
index b441896..eaf482f 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@ LLM Recipes
 Usage of LLM for Everyday use
 
 - v1
+    - Agents : autogen + vllm + gemma
+        - [VLLM setup](https://github.com/slabstech/llm-recipes/blob/main/docs/vllm.md) 
     - Agents : autogen + ollama + gemma
         - Setup + Documentation at [docs/2024/agent-code.md](https://github.com/slabstech/llm-recipes/blob/main/docs/2024/agent-code.md) 
         - Code examples at [src/autogen](https://github.com/slabstech/llm-recipes/tree/main/src/autogen)
diff --git a/docs/vllm.md b/docs/vllm.md
index f69399f..17a1bbf 100644
--- a/docs/vllm.md
+++ b/docs/vllm.md
@@ -2,6 +2,12 @@ Setup with Vllm
 
 - Creat account in huggingface > Profile > AccessToken > create new user Access token
 
+-- Docker Compose
+    - [Compose](https://github.com/slabstech/llm-recipes/blob/main/src/vllm/docker-compose.yml)
+    - Replace the args 
+        - Hugging face Token
+        - 
+-- Docker Setup 
 
 docker run --gpus all \
     -e HF_TOKEN=$HF_TOKEN -p 8000:8000 \
@@ -16,4 +22,10 @@ curl --location 'http://IP:Port/v1/chat/completions' \
         "messages": [
             {"role": "user", "content": "what minimun materials are necessary to build a Seed harvesting robot, show me how to arrange the parts"}
         ]
-    }'
\ No newline at end of file
+    }'
+
+--
+
+References
+    - wsl + docker + nvidia runtime
+        - https://docs.nvidia.com/cuda/wsl-user-guide/index.html#running-cuda
\ No newline at end of file
diff --git a/src/autogen/prompts/agent-llm-config.md b/src/autogen/prompts/agent-llm-config.md
new file mode 100644
index 0000000..ad74a87
--- /dev/null
+++ b/src/autogen/prompts/agent-llm-config.md
@@ -0,0 +1,20 @@
+Configs for agents
+
+- Name
+- Executor : True/False
+- Token_limit : Long
+- Timeout : 
+- Level : Low/Medium/High
+- llm_config : 
+
+Choose LLM based on levels for Agents in llm_config
+- Low : gemma:2b
+- Medium : mistral:7b
+- High : mixtral 
+
+Test out the examples
+ - Build out the mind map for autogen features
+ - See how your robotic agents use the configs
+ - Build a benchmark to solve leetcode problems
+ - Hyperparamter search - for the best models
+
diff --git a/src/autogen/prompt-coder.md b/src/autogen/prompts/prompt-coder.md
similarity index 100%
rename from src/autogen/prompt-coder.md
rename to src/autogen/prompts/prompt-coder.md
diff --git a/src/vllm/docker-compose.yml b/src/vllm/docker-compose.yml
new file mode 100644
index 0000000..d1af7b0
--- /dev/null
+++ b/src/vllm/docker-compose.yml
@@ -0,0 +1,24 @@
+version: '3.7'
+services:
+  vllm:
+    image: vllm/vllm-openai:latest
+    runtime: nvidia
+    environment:
+      - TRANSFORMERS_OFFLINE=1
+      - HF_DATASET_OFFLINE=1
+      - HUGGING_FACE_HUB_TOKEN=<your_hugging_face_token>
+      - HUGGING_FACE_CACHE="/models"
+    volumes:
+      - "./models:/models"
+      - ~/.cache/huggingface:/root/.cache/huggingface
+    ports:
+      - 8000:8000
+    command: --model google/gemma-2b --tensor-parallel-size 2
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+