From e66249c5bbbc097b3e7f4855aa01c31e6199d61c Mon Sep 17 00:00:00 2001
From: chenweize1998 <chenweize1998@gmail.com>
Date: Fri, 27 Oct 2023 11:05:26 +0800
Subject: [PATCH] add local model token counter

---
 .github/workflows/test.yml             |  4 ++--
 agentverse/llms/openai.py              |  5 +++--
 agentverse/llms/utils/token_counter.py | 24 ++++++++++++++----------
 agentverse/memory/chat_history.py      | 16 ++++++++--------
 4 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8e5f9a77f..e593f0609 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -21,10 +21,10 @@ jobs:
       with:
         access_token: ${{ github.token }}
     - uses: actions/checkout@v3
-    - name: Set up Python 3.10
+    - name: Set up Python 3.9
       uses: actions/setup-python@v4
       with:
-        python-version: "3.10"
+        python-version: 3.9
     - name: Upgrade pip
       run: |
         python -m pip install --upgrade pip setuptools wheel
diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py
index fb2438e10..cbeb49782 100644
--- a/agentverse/llms/openai.py
+++ b/agentverse/llms/openai.py
@@ -121,10 +121,11 @@ def __init__(self, max_retry: int = 3, **kwargs):
     def send_token_limit(cls, model: str) -> int:
         send_token_limit_dict = {
             "gpt-3.5-turbo": 4096,
-            "gpt-35-turbo": 8192,
+            "gpt-35-turbo": 4096,
+            "gpt-3.5-turbo-16k": 16384,
             "gpt-4": 8192,
+            "gpt-4-32k": 32768,
             "llama-2-7b-chat-hf": 4096,
-            "gpt-3.5-turbo-16k": 16384,
         }
         return send_token_limit_dict[model]
 
diff --git a/agentverse/llms/utils/token_counter.py b/agentverse/llms/utils/token_counter.py
index bf9160700..00f345ce9 100644
--- a/agentverse/llms/utils/token_counter.py
+++ b/agentverse/llms/utils/token_counter.py
@@ -1,7 +1,11 @@
+# Modified from AutoGPT https://github.com/Significant-Gravitas/AutoGPT/blob/release-v0.4.7/autogpt/llm/utils/token_counter.py
+
 import tiktoken
-from typing import List
+from typing import List, Union, Dict
 from agentverse.logging import logger
 from agentverse.message import Message
+from agentverse.llms.openai import LOCAL_LLMS
+from transformers import AutoTokenizer
 
 
 def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int:
@@ -9,11 +13,8 @@ def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int:
 
 
 def count_message_tokens(
-    messages: dict | List[dict], model: str = "gpt-3.5-turbo"
+    messages: Union[Dict, List[Dict]], model: str = "gpt-3.5-turbo"
 ) -> int:
-    """
-    https://github.com/Significant-Gravitas/AutoGPT/blob/16e266c65fb4620a1b1397532c503fa426ec191d/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py#L221
-    """
     if isinstance(messages, dict):
         messages = [messages]
 
@@ -27,17 +28,20 @@ def count_message_tokens(
         tokens_per_message = 3
         tokens_per_name = 1
         encoding_model = "gpt-4"
+    elif model in LOCAL_LLMS:
+        encoding = AutoTokenizer.from_pretrained(model)
     else:
         raise NotImplementedError(
             f"count_message_tokens() is not implemented for model {model}.\n"
             " See https://github.com/openai/openai-python/blob/main/chatml.md for"
             " information on how messages are converted to tokens."
         )
-    try:
-        encoding = tiktoken.encoding_for_model(encoding_model)
-    except KeyError:
-        logger.warn("Warning: model not found. Using cl100k_base encoding.")
-        encoding = tiktoken.get_encoding("cl100k_base")
+    if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
+        try:
+            encoding = tiktoken.encoding_for_model(encoding_model)
+        except KeyError:
+            logger.warn("Warning: model not found. Using cl100k_base encoding.")
+            encoding = tiktoken.get_encoding("cl100k_base")
 
     num_tokens = 0
     for message in messages:
diff --git a/agentverse/memory/chat_history.py b/agentverse/memory/chat_history.py
index f2188c065..0b7ae773b 100644
--- a/agentverse/memory/chat_history.py
+++ b/agentverse/memory/chat_history.py
@@ -1,7 +1,7 @@
 import json
 import logging
 import os
-from typing import List, Optional
+from typing import List, Optional, Tuple, Dict
 
 from pydantic import Field
 
@@ -152,8 +152,8 @@ def reset(self) -> None:
         self.messages = []
 
     async def trim_messages(
-        self, current_message_chain: list[dict], model: str, history: List[dict]
-    ) -> tuple[dict, list[dict]]:
+        self, current_message_chain: List[Dict], model: str, history: List[Dict]
+    ) -> Tuple[Dict, List[Dict]]:
         new_messages_not_in_chain = [
             msg for msg in history if msg not in current_message_chain
         ]
@@ -172,7 +172,7 @@ async def trim_messages(
 
     async def update_running_summary(
         self,
-        new_events: list[Message],
+        new_events: List[Message],
         model: str = "gpt-3.5-turbo",
         max_summary_length: Optional[int] = None,
     ) -> dict:
@@ -233,7 +233,7 @@ async def update_running_summary(
         return self.summary_message()
 
     async def _update_summary_with_batch(
-        self, new_events_batch: list[dict], model: str, max_summary_length: int
+        self, new_events_batch: List[dict], model: str, max_summary_length: int
     ) -> None:
         prompt = self.SUMMARIZATION_PROMPT.format(
             summary=self.summary, new_events=new_events_batch
@@ -254,11 +254,11 @@ def summary_message(self) -> dict:
 
 
 def add_history_upto_token_limit(
-    prompt: list[dict], history: list[dict], t_limit: int, model: str
-) -> list[Message]:
+    prompt: List[dict], history: List[dict], t_limit: int, model: str
+) -> List[Message]:
     limit_reached = False
     current_prompt_length = 0
-    trimmed_messages: list[dict] = []
+    trimmed_messages: List[Dict] = []
     for message in history[::-1]:
         token_to_add = count_message_tokens(message, model)
         if current_prompt_length + token_to_add > t_limit: