From a87f6a4ef5838a2b981f027c60620df35b303d2f Mon Sep 17 00:00:00 2001 From: Sachin Shetty <26170834+sachinsshetty@users.noreply.github.com> Date: Sat, 30 Mar 2024 14:42:46 +0100 Subject: [PATCH] Draft - vllm setup for agents --- README.md | 2 ++ docs/vllm.md | 14 ++++++++++++- src/autogen/prompts/agent-llm-config.md | 20 +++++++++++++++++++ src/autogen/{ => prompts}/prompt-coder.md | 0 src/vllm/docker-compose.yml | 24 +++++++++++++++++++++++ 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 src/autogen/prompts/agent-llm-config.md rename src/autogen/{ => prompts}/prompt-coder.md (100%) create mode 100644 src/vllm/docker-compose.yml diff --git a/README.md b/README.md index b441896..eaf482f 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ LLM Recipes Usage of LLM for Everyday use - v1 + - Agents : autogen + vllm + gemma + - [VLLM setup](https://github.com/slabstech/llm-recipes/blob/main/docs/vllm.md) - Agents : autogen + ollama + gemma - Setup + Documentation at [docs/2024/agent-code.md](https://github.com/slabstech/llm-recipes/blob/main/docs/2024/agent-code.md) - Code examples at [src/autogen](https://github.com/slabstech/llm-recipes/tree/main/src/autogen) diff --git a/docs/vllm.md b/docs/vllm.md index f69399f..17a1bbf 100644 --- a/docs/vllm.md +++ b/docs/vllm.md @@ -2,6 +2,12 @@ Setup with Vllm - Creat account in huggingface > Profile > AccessToken > create new user Access token +-- Docker Compose + - [Compose](https://github.com/slabstech/llm-recipes/blob/main/src/vllm/docker-compose.yml) + - Replace the args + - Hugging face Token + - +-- Docker Setup docker run --gpus all \ -e HF_TOKEN=$HF_TOKEN -p 8000:8000 \ @@ -16,4 +22,10 @@ curl --location 'http://IP:Port/v1/chat/completions' \ "messages": [ {"role": "user", "content": "what minimun materials are necessary to build a Seed harvesting robot, show me how to arrange the parts"} ] - }' \ No newline at end of file + }' + +-- + +References + - wsl + docker + nvidia runtime + - https://docs.nvidia.com/cuda/wsl-user-guide/index.html#running-cuda \ No newline at end of file diff --git a/src/autogen/prompts/agent-llm-config.md b/src/autogen/prompts/agent-llm-config.md new file mode 100644 index 0000000..ad74a87 --- /dev/null +++ b/src/autogen/prompts/agent-llm-config.md @@ -0,0 +1,20 @@ +Configs for agents + +- Name +- Executor : True/False +- Token_limit : Long +- Timeout : +- Level : Low/Medium/High +- llm_config : + +Choose LLM based on levels for Agents in llm_config +- Low : gemma:2b +- Medium : mistral:7b +- High : mixtral + +Test out the examples + - Build out the mind map for autogen features + - See how your robotic agents use the configs + - Build a benchmark to solve leetcode problems + - Hyperparamter search - for the best models + diff --git a/src/autogen/prompt-coder.md b/src/autogen/prompts/prompt-coder.md similarity index 100% rename from src/autogen/prompt-coder.md rename to src/autogen/prompts/prompt-coder.md diff --git a/src/vllm/docker-compose.yml b/src/vllm/docker-compose.yml new file mode 100644 index 0000000..d1af7b0 --- /dev/null +++ b/src/vllm/docker-compose.yml @@ -0,0 +1,24 @@ +version: '3.7' +services: + vllm: + image: vllm/vllm-openai:latest + runtime: nvidia + environment: + - TRANSFORMERS_OFFLINE=1 + - HF_DATASET_OFFLINE=1 + - HUGGING_FACE_HUB_TOKEN= + - HUGGING_FACE_CACHE="/models" + volumes: + - "./models:/models" + - ~/.cache/huggingface:/root/.cache/huggingface + ports: + - 8000:8000 + command: --model google/gemma-2b --tensor-parallel-size 2 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] +