add local model token counter

OpenBMB · Oct 27, 2023 · e66249c · e66249c
1 parent f6c76ff
commit e66249c
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 22 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -21,10 +21,10 @@ jobs:
       with:
         access_token: ${{ github.token }}
     - uses: actions/checkout@v3
-    - name: Set up Python 3.10
+    - name: Set up Python 3.9
       uses: actions/setup-python@v4
       with:
-        python-version: "3.10"
+        python-version: 3.9
     - name: Upgrade pip
       run: |
         python -m pip install --upgrade pip setuptools wheel

diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py
@@ -121,10 +121,11 @@ def __init__(self, max_retry: int = 3, **kwargs):
     def send_token_limit(cls, model: str) -> int:
         send_token_limit_dict = {
             "gpt-3.5-turbo": 4096,
-            "gpt-35-turbo": 8192,
+            "gpt-35-turbo": 4096,
+            "gpt-3.5-turbo-16k": 16384,
             "gpt-4": 8192,
+            "gpt-4-32k": 32768,
             "llama-2-7b-chat-hf": 4096,
-            "gpt-3.5-turbo-16k": 16384,
         }
         return send_token_limit_dict[model]
 

diff --git a/agentverse/llms/utils/token_counter.py b/agentverse/llms/utils/token_counter.py
@@ -1,19 +1,20 @@
+# Modified from AutoGPT https://github.com/Significant-Gravitas/AutoGPT/blob/release-v0.4.7/autogpt/llm/utils/token_counter.py
+
 import tiktoken
-from typing import List
+from typing import List, Union, Dict
 from agentverse.logging import logger
 from agentverse.message import Message
+from agentverse.llms.openai import LOCAL_LLMS
+from transformers import AutoTokenizer
 
 
 def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int:
     return len(tiktoken.encoding_for_model(model).encode(prompt))
 
 
 def count_message_tokens(
-    messages: dict | List[dict], model: str = "gpt-3.5-turbo"
+    messages: Union[Dict, List[Dict]], model: str = "gpt-3.5-turbo"
 ) -> int:
-    """
-    https://github.com/Significant-Gravitas/AutoGPT/blob/16e266c65fb4620a1b1397532c503fa426ec191d/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py#L221
-    """
     if isinstance(messages, dict):
         messages = [messages]
 
@@ -27,17 +28,20 @@ def count_message_tokens(
         tokens_per_message = 3
         tokens_per_name = 1
         encoding_model = "gpt-4"
+    elif model in LOCAL_LLMS:
+        encoding = AutoTokenizer.from_pretrained(model)
     else:
         raise NotImplementedError(
             f"count_message_tokens() is not implemented for model {model}.\n"
             " See https://github.com/openai/openai-python/blob/main/chatml.md for"
             " information on how messages are converted to tokens."
         )
-    try:
-        encoding = tiktoken.encoding_for_model(encoding_model)
-    except KeyError:
-        logger.warn("Warning: model not found. Using cl100k_base encoding.")
-        encoding = tiktoken.get_encoding("cl100k_base")
+    if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
+        try:
+            encoding = tiktoken.encoding_for_model(encoding_model)
+        except KeyError:
+            logger.warn("Warning: model not found. Using cl100k_base encoding.")
+            encoding = tiktoken.get_encoding("cl100k_base")
 
     num_tokens = 0
     for message in messages:

diff --git a/agentverse/memory/chat_history.py b/agentverse/memory/chat_history.py
@@ -1,7 +1,7 @@
 import json
 import logging
 import os
-from typing import List, Optional
+from typing import List, Optional, Tuple, Dict
 
 from pydantic import Field
 
@@ -152,8 +152,8 @@ def reset(self) -> None:
         self.messages = []
 
     async def trim_messages(
-        self, current_message_chain: list[dict], model: str, history: List[dict]
-    ) -> tuple[dict, list[dict]]:
+        self, current_message_chain: List[Dict], model: str, history: List[Dict]
+    ) -> Tuple[Dict, List[Dict]]:
         new_messages_not_in_chain = [
             msg for msg in history if msg not in current_message_chain
         ]
@@ -172,7 +172,7 @@ async def trim_messages(
 
     async def update_running_summary(
         self,
-        new_events: list[Message],
+        new_events: List[Message],
         model: str = "gpt-3.5-turbo",
         max_summary_length: Optional[int] = None,
     ) -> dict:
@@ -233,7 +233,7 @@ async def update_running_summary(
         return self.summary_message()
 
     async def _update_summary_with_batch(
-        self, new_events_batch: list[dict], model: str, max_summary_length: int
+        self, new_events_batch: List[dict], model: str, max_summary_length: int
     ) -> None:
         prompt = self.SUMMARIZATION_PROMPT.format(
             summary=self.summary, new_events=new_events_batch
@@ -254,11 +254,11 @@ def summary_message(self) -> dict:
 
 
 def add_history_upto_token_limit(
-    prompt: list[dict], history: list[dict], t_limit: int, model: str
-) -> list[Message]:
+    prompt: List[dict], history: List[dict], t_limit: int, model: str
+) -> List[Message]:
     limit_reached = False
     current_prompt_length = 0
-    trimmed_messages: list[dict] = []
+    trimmed_messages: List[Dict] = []
     for message in history[::-1]:
         token_to_add = count_message_tokens(message, model)
         if current_prompt_length + token_to_add > t_limit: