From e66249c5bbbc097b3e7f4855aa01c31e6199d61c Mon Sep 17 00:00:00 2001 From: chenweize1998 Date: Fri, 27 Oct 2023 11:05:26 +0800 Subject: [PATCH] add local model token counter --- .github/workflows/test.yml | 4 ++-- agentverse/llms/openai.py | 5 +++-- agentverse/llms/utils/token_counter.py | 24 ++++++++++++++---------- agentverse/memory/chat_history.py | 16 ++++++++-------- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8e5f9a77f..e593f0609 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,10 +21,10 @@ jobs: with: access_token: ${{ github.token }} - uses: actions/checkout@v3 - - name: Set up Python 3.10 + - name: Set up Python 3.9 uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: 3.9 - name: Upgrade pip run: | python -m pip install --upgrade pip setuptools wheel diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py index fb2438e10..cbeb49782 100644 --- a/agentverse/llms/openai.py +++ b/agentverse/llms/openai.py @@ -121,10 +121,11 @@ def __init__(self, max_retry: int = 3, **kwargs): def send_token_limit(cls, model: str) -> int: send_token_limit_dict = { "gpt-3.5-turbo": 4096, - "gpt-35-turbo": 8192, + "gpt-35-turbo": 4096, + "gpt-3.5-turbo-16k": 16384, "gpt-4": 8192, + "gpt-4-32k": 32768, "llama-2-7b-chat-hf": 4096, - "gpt-3.5-turbo-16k": 16384, } return send_token_limit_dict[model] diff --git a/agentverse/llms/utils/token_counter.py b/agentverse/llms/utils/token_counter.py index bf9160700..00f345ce9 100644 --- a/agentverse/llms/utils/token_counter.py +++ b/agentverse/llms/utils/token_counter.py @@ -1,7 +1,11 @@ +# Modified from AutoGPT https://github.com/Significant-Gravitas/AutoGPT/blob/release-v0.4.7/autogpt/llm/utils/token_counter.py + import tiktoken -from typing import List +from typing import List, Union, Dict from agentverse.logging import logger from agentverse.message import Message +from agentverse.llms.openai import LOCAL_LLMS +from transformers import AutoTokenizer def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int: @@ -9,11 +13,8 @@ def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int: def count_message_tokens( - messages: dict | List[dict], model: str = "gpt-3.5-turbo" + messages: Union[Dict, List[Dict]], model: str = "gpt-3.5-turbo" ) -> int: - """ - https://github.com/Significant-Gravitas/AutoGPT/blob/16e266c65fb4620a1b1397532c503fa426ec191d/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py#L221 - """ if isinstance(messages, dict): messages = [messages] @@ -27,17 +28,20 @@ def count_message_tokens( tokens_per_message = 3 tokens_per_name = 1 encoding_model = "gpt-4" + elif model in LOCAL_LLMS: + encoding = AutoTokenizer.from_pretrained(model) else: raise NotImplementedError( f"count_message_tokens() is not implemented for model {model}.\n" " See https://github.com/openai/openai-python/blob/main/chatml.md for" " information on how messages are converted to tokens." ) - try: - encoding = tiktoken.encoding_for_model(encoding_model) - except KeyError: - logger.warn("Warning: model not found. Using cl100k_base encoding.") - encoding = tiktoken.get_encoding("cl100k_base") + if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"): + try: + encoding = tiktoken.encoding_for_model(encoding_model) + except KeyError: + logger.warn("Warning: model not found. Using cl100k_base encoding.") + encoding = tiktoken.get_encoding("cl100k_base") num_tokens = 0 for message in messages: diff --git a/agentverse/memory/chat_history.py b/agentverse/memory/chat_history.py index f2188c065..0b7ae773b 100644 --- a/agentverse/memory/chat_history.py +++ b/agentverse/memory/chat_history.py @@ -1,7 +1,7 @@ import json import logging import os -from typing import List, Optional +from typing import List, Optional, Tuple, Dict from pydantic import Field @@ -152,8 +152,8 @@ def reset(self) -> None: self.messages = [] async def trim_messages( - self, current_message_chain: list[dict], model: str, history: List[dict] - ) -> tuple[dict, list[dict]]: + self, current_message_chain: List[Dict], model: str, history: List[Dict] + ) -> Tuple[Dict, List[Dict]]: new_messages_not_in_chain = [ msg for msg in history if msg not in current_message_chain ] @@ -172,7 +172,7 @@ async def trim_messages( async def update_running_summary( self, - new_events: list[Message], + new_events: List[Message], model: str = "gpt-3.5-turbo", max_summary_length: Optional[int] = None, ) -> dict: @@ -233,7 +233,7 @@ async def update_running_summary( return self.summary_message() async def _update_summary_with_batch( - self, new_events_batch: list[dict], model: str, max_summary_length: int + self, new_events_batch: List[dict], model: str, max_summary_length: int ) -> None: prompt = self.SUMMARIZATION_PROMPT.format( summary=self.summary, new_events=new_events_batch @@ -254,11 +254,11 @@ def summary_message(self) -> dict: def add_history_upto_token_limit( - prompt: list[dict], history: list[dict], t_limit: int, model: str -) -> list[Message]: + prompt: List[dict], history: List[dict], t_limit: int, model: str +) -> List[Message]: limit_reached = False current_prompt_length = 0 - trimmed_messages: list[dict] = [] + trimmed_messages: List[Dict] = [] for message in history[::-1]: token_to_add = count_message_tokens(message, model) if current_prompt_length + token_to_add > t_limit: