diff --git a/README.md b/README.md
index 78061ecce..a79477c91 100644
--- a/README.md
+++ b/README.md
@@ -205,10 +205,14 @@ https://github.com/OpenBMB/AgentVerse/assets/11704492/4d07da68-f942-4205-b558-f1
   - [Simulation](#simulation)
     - [Framework Required Modules](#framework-required-modules)
     - [CLI Example](#cli-example)
-    - [GUI Example (Local)](#gui-example-local)
+    - [GUI Example](#gui-example)
   - [Task-Solving](#task-solving)
     - [Framework Required Modules](#framework-required-modules-1)
     - [CLI Example](#cli-example-1)
+  - [Local Model Support](#local-model-support)
+    - [1. Install the Additional Dependencies](#1-install-the-additional-dependencies)
+    - [2. Launch the Local Server](#2-launch-the-local-server)
+    - [3. Modify the Config File](#3-modify-the-config-file)
 - [AgentVerse Showcases](#agentverse-showcases)
   - [Simulation Showcases](#simulation-showcases)
   - [Task-Solving Showcases](#task-solving-showcases)
@@ -282,7 +286,7 @@ You can create a multi-agent environments provided by us. Using the classroom sc
 agentverse-simulation --task simulation/nlp_classroom_9players
 ```
 
-### GUI Example (Local)
+### GUI Example
 
 We also provide a local website demo for this environment. You can launch it with
 
@@ -338,6 +342,33 @@ We have provided more tasks in `agentverse/tasks/tasksolving/tool_using/` that s
 
 Also, you can take a look at `agentverse/tasks/tasksolving` for more experiments we have done in our paper.
 
+## Local Model Support
+### 1. Install the Additional Dependencies
+If you want to use local models such as LLaMA, you need to additionally install some other dependencies:
+```bash
+pip install -r requirements_local.txt
+```
+
+### 2. Launch the Local Server
+Then modify the `MODEL_PATH` and `MODEL_NAME` according to your need to launch the local server with the following command:
+```bash
+bash scripts/run_local_model_server.sh
+```
+The script will launch a service for Llama 7B chat model.
+The `MODEL_NAME` in AgentVerse currently supports several models including `llama-2-7b-chat-hf`, `llama-2-13b-chat-hf`, `llama-2-70b-chat-hf`, `vicuna-7b-v1.5`, and `vicuna-13b-v1.5`. If you wish to integrate additional models that are [compatible with FastChat](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md), you need to:
+1. Add the new `MODEL_NAME` into the `LOCAL_LLMS` within `agentverse/llms/__init__.py`. Furthermore, establish
+2. Add the mapping from the new `MODEL_NAME` to its corresponding Huggingface identifier in the `LOCAL_LLMS_MAPPING` within the `agentverse/llms/__init__.py` file.
+
+### 3. Modify the Config File
+In your config file, set the `llm_type` to `local` and `model` to the `MODEL_NAME`. For example
+```yaml
+llm:
+  llm_type: local
+  model: llama-2-7b-chat-hf
+  ...
+```
+
+You can refer to `agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml` for a more detailed example.
 
 # AgentVerse Showcases
 
diff --git a/agentverse/llms/__init__.py b/agentverse/llms/__init__.py
index 0902d3116..b623e6234 100644
--- a/agentverse/llms/__init__.py
+++ b/agentverse/llms/__init__.py
@@ -1,7 +1,20 @@
 from agentverse.registry import Registry
 
 llm_registry = Registry(name="LLMRegistry")
-LOCAL_LLMS = ["llama-2-7b-chat-hf"]
+LOCAL_LLMS = [
+    "llama-2-7b-chat-hf",
+    "llama-2-13b-chat-hf",
+    "llama-2-70b-chat-hf",
+    "vicuna-7b-v1.5",
+    "vicuna-13b-v1.5",
+]
+LOCAL_LLMS_MAPPING = {
+    "llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
+    "llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
+    "llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf",
+    "vicuna-7b-v1.5": "lmsys/vicuna-7b-v1.5",
+    "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5",
+}
 
 from .base import BaseLLM, BaseChatModel, BaseCompletionModel, LLMResult
 from .openai import OpenAIChat
diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py
index 72669fc59..700c826b5 100644
--- a/agentverse/llms/openai.py
+++ b/agentverse/llms/openai.py
@@ -97,7 +97,7 @@ class OpenAIChatArgs(BaseModelArgs):
 @llm_registry.register("gpt-35-turbo")
 @llm_registry.register("gpt-3.5-turbo")
 @llm_registry.register("gpt-4")
-@llm_registry.register("llama-2-7b-chat-hf")
+@llm_registry.register("local")
 class OpenAIChat(BaseChatModel):
     args: OpenAIChatArgs = Field(default_factory=OpenAIChatArgs)
 
diff --git a/agentverse/llms/utils/token_counter.py b/agentverse/llms/utils/token_counter.py
index 8c35b16a3..b594011b7 100644
--- a/agentverse/llms/utils/token_counter.py
+++ b/agentverse/llms/utils/token_counter.py
@@ -4,7 +4,7 @@
 from typing import List, Union, Dict
 from agentverse.logging import logger
 from agentverse.message import Message
-from agentverse.llms import LOCAL_LLMS
+from agentverse.llms import LOCAL_LLMS, LOCAL_LLMS_MAPPING
 
 
 def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int:
@@ -27,9 +27,10 @@ def count_message_tokens(
         tokens_per_message = 3
         tokens_per_name = 1
         encoding_model = "gpt-4"
-    elif model in LOCAL_LLMS:
+    elif model.lower() in LOCAL_LLMS or model in LOCAL_LLMS:
         from transformers import AutoTokenizer
-        encoding = AutoTokenizer.from_pretrained(model)
+
+        encoding = AutoTokenizer.from_pretrained(LOCAL_LLMS_MAPPING[model.lower()])
     else:
         raise NotImplementedError(
             f"count_message_tokens() is not implemented for model {model}.\n"
diff --git a/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml b/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
index 8514b1004..5e3ff2ac5 100644
--- a/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
+++ b/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
@@ -96,8 +96,8 @@ agents:
     memory:
       memory_type: chat_history
     llm:
-      llm_type: llama-2-7b-chat-hf
-      model: "llama-2-7b-chat-hf"
+      llm_type: local
+      model: llama-2-7b-chat-hf
       temperature: 0
       max_tokens: 512
     output_parser:
@@ -113,8 +113,8 @@ agents:
     memory:
       memory_type: chat_history
     llm:
-      llm_type: llama-2-7b-chat-hf
-      model: "llama-2-7b-chat-hf"
+      llm_type: local
+      model: llama-2-7b-chat-hf
       temperature: 0
       max_tokens: 1024
     output_parser:
@@ -138,8 +138,8 @@ agents:
     memory:
       memory_type: chat_history
     llm:
-      llm_type: llama-2-7b-chat-hf
-      model: "llama-2-7b-chat-hf"
+      llm_type: local
+      model: llama-2-7b-chat-hf
       temperature: 0
       max_tokens: 1024
     output_parser:
@@ -154,7 +154,7 @@ agents:
     memory:
       memory_type: chat_history
     llm:
-      llm_type: llama-2-7b-chat-hf
+      llm_type: local
       model: llama-2-7b-chat-hf
       temperature: 0
       max_tokens: 1024
@@ -172,7 +172,7 @@ agents:
     memory:
       memory_type: chat_history
     llm:
-      llm_type: llama-2-7b-chat-hf
+      llm_type: local
       model: llama-2-7b-chat-hf
       temperature: 0.3
       max_tokens: 1024
@@ -189,8 +189,8 @@ agents:
     memory:
       memory_type: chat_history
     llm:
-      llm_type: llama-2-7b-chat-hf
-      model: "llama-2-7b-chat-hf"
+      llm_type: local
+      model: llama-2-7b-chat-hf
       temperature: 0
       max_tokens: 1024
     output_parser:
diff --git a/scripts/run_local_model_server.sh b/scripts/run_local_model_server.sh
index 0d16fb901..2ea115506 100644
--- a/scripts/run_local_model_server.sh
+++ b/scripts/run_local_model_server.sh
@@ -1,9 +1,8 @@
-:<<COMMENT
-See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
-COMMENT
+# See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
+
 # export CUDA_VISIBLE_DEVICES=0
-MODEL_PATH="path_to_the_downloaded_model_dir"
-MODEL_NAME="name_of_the_model"
+MODEL_PATH="meta-llama/Llama-2-7b-chat-hf"   # path_to_the_downloaded_model_dir
+MODEL_NAME="llama-2-7b-chat-hf"              # name_of_the_model
 python3 -m fastchat.serve.controller & \
 python3 -m fastchat.serve.multi_model_worker \
     --model-path ${MODEL_PATH} \