feat: count tokens in the tools

valory-xyz · Feb 7, 2024 · f13ec8f · f13ec8f
1 parent 3e152f9
commit f13ec8f
Show file tree

Hide file tree

Showing 11 changed files with 77 additions and 28 deletions.
diff --git a/packages/valory/skills/task_execution/utils/benchmarks.py b/packages/valory/skills/task_execution/utils/benchmarks.py
@@ -19,30 +19,12 @@
 """Benchmarking for tools."""
 
 import logging
-from typing import Any, Dict, Union
-
-import anthropic
-import tiktoken
-from tiktoken import Encoding
+from typing import Any, Callable, Dict, Union
 
 
 PRICE_NUM_TOKENS = 1000
 
 
-def encoding_for_model(model: str) -> Encoding:
-    """Get the encoding for a model."""
-    return tiktoken.encoding_for_model(model)
-
-
-def count_tokens(text: str, model: str) -> int:
-    """Count the number of tokens in a text."""
-    if "claude" in model:
-        return anthropic.Anthropic().count_tokens(text)
-
-    enc = encoding_for_model(model)
-    return len(enc.encode(text))
-
-
 class TokenCounterCallback:
     """Callback to count the number of tokens used in a generation."""
 
@@ -73,13 +55,15 @@ def token_to_cost(tokens: int, model: str, tokens_type: str) -> float:
             * TokenCounterCallback.TOKEN_PRICES[model][tokens_type]
         )
 
-    def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
+    def calculate_cost(
+        self, tokens_type: str, model: str, token_counter: Callable, **kwargs: Any
+    ) -> None:
         """Calculate the cost of a generation."""
         # Check if it its prompt or tokens are passed in
         prompt_key = f"{tokens_type}_prompt"
         token_key = f"{tokens_type}_tokens"
         if prompt_key in kwargs:
-            tokens = count_tokens(kwargs[prompt_key], model)
+            tokens = token_counter(kwargs[prompt_key], model)
         elif token_key in kwargs:
             tokens = kwargs[token_key]
         else:
@@ -88,13 +72,13 @@ def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
         self.cost_dict[token_key] += tokens
         self.cost_dict[f"{tokens_type}_cost"] += cost
 
-    def __call__(self, model: str, **kwargs: Any) -> None:
+    def __call__(self, model: str, token_counter: Callable, **kwargs: Any) -> None:
         """Callback to count the number of tokens used in a generation."""
         if model not in list(TokenCounterCallback.TOKEN_PRICES.keys()):
             raise ValueError(f"Model {model} not supported.")
         try:
-            self.calculate_cost("input", model, **kwargs)
-            self.calculate_cost("output", model, **kwargs)
+            self.calculate_cost("input", model, token_counter, **kwargs)
+            self.calculate_cost("output", model, token_counter, **kwargs)
             self.cost_dict["total_tokens"] = (
                 self.cost_dict["input_tokens"] + self.cost_dict["output_tokens"]
             )

diff --git a/tools/native_transfer_request/native_transfer_request.py b/tools/native_transfer_request/native_transfer_request.py
@@ -25,7 +25,7 @@
 from typing import Any, Dict, Optional, Tuple, cast
 
 from openai import OpenAI
-
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -47,6 +47,12 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client = None
 
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
+
 
 ENGINE = "gpt-3.5-turbo"
 MAX_TOKENS = 500

diff --git a/tools/openai_request/openai_request.py b/tools/openai_request/openai_request.py
@@ -21,6 +21,7 @@
 from typing import Any, Dict, Optional, Tuple
 
 from openai import OpenAI
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -42,6 +43,12 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client = None
 
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
+
 DEFAULT_OPENAI_SETTINGS = {
     "max_tokens": 500,
     "temperature": 0.7,

diff --git a/tools/optimization_by_prompting/optimization_by_prompting.py b/tools/optimization_by_prompting/optimization_by_prompting.py
@@ -37,7 +37,7 @@
 from langchain.llms import OpenAI as OpenAILLM
 from langchain.prompts import PromptTemplate
 from sklearn.metrics import roc_auc_score
-
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -58,6 +58,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client.close()
             client = None
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
 
 # Provide several examples in order to backtest the resulted prompt
 EXAMPLES = """query;event

diff --git a/tools/prediction_request/prediction_request.py b/tools/prediction_request/prediction_request.py
@@ -37,6 +37,7 @@
 from spacy.cli import download
 from spacy.lang.en import STOP_WORDS
 from spacy.tokens import Doc, Span
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -58,6 +59,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client.close()
             client = None
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
 
 FrequenciesType = Dict[str, float]
 ScoresType = Dict[Span, float]
@@ -292,6 +298,7 @@ def fetch_additional_information(
             input_tokens=response["usage"]["prompt_tokens"],
             output_tokens=response["usage"]["completion_tokens"],
             model=engine,
+            token_counter=count_tokens,
         )
         return "\n".join(["- " + text for text in texts]), counter_callback
     return "\n".join(["- " + text for text in texts]), None
@@ -421,6 +428,7 @@ def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any]:
                 input_tokens=response["usage"]["prompt_tokens"],
                 output_tokens=response["usage"]["completion_tokens"],
                 model=engine,
+                token_counter=count_tokens,
             )
             return response.choices[0].message.content, prediction_prompt, counter_callback
         return response.choices[0].message.content, prediction_prompt, None
diff --git a/tools/prediction_request_claude/prediction_request_claude.py b/tools/prediction_request_claude/prediction_request_claude.py
@@ -25,6 +25,7 @@
 from typing import Any, Dict, List, Optional, Tuple, Iterator, Callable
 from itertools import islice
 
+import anthropic
 import requests
 from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
 from bs4 import BeautifulSoup
@@ -125,6 +126,10 @@
 STOP_SEQUENCES = ["```"]
 
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    return anthropic.Anthropic().count_tokens(text)
+
 def search_google(query: str, api_key: str, engine: str, num: int = 3) -> List[str]:
     service = build("customsearch", "v1", developerKey=api_key)
     search = (
@@ -248,6 +253,7 @@ def fetch_additional_information(
             model=engine,
             input_prompt=url_query_prompt,
             output_tokens=40,
+            token_counter=count_tokens,
         )
         return "\n".join(["- " + text for text in texts]), counter_callback
     return "\n".join(["- " + text for text in texts]), None
@@ -300,6 +306,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
             model=engine,
             input_prompt=prediction_prompt,
             output_prompt=completion.completion,
+            token_counter=count_tokens,
         )
         return completion.completion, prediction_prompt, counter_callback
 

diff --git a/tools/prediction_request_embedding/prediction_sentence_embedding.py b/tools/prediction_request_embedding/prediction_sentence_embedding.py
@@ -38,6 +38,7 @@
 import tiktoken
 
 from dateutil import parser
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -59,6 +60,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client.close()
             client = None
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
 
 
 NUM_URLS_EXTRACT = 5

diff --git a/tools/prediction_request_sme/prediction_request_sme.py b/tools/prediction_request_sme/prediction_request_sme.py
@@ -30,7 +30,7 @@
 import requests
 from bs4 import BeautifulSoup
 from googleapiclient.discovery import build
-
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -53,6 +53,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client = None
 
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
 
 NUM_URLS_EXTRACT = 5
 DEFAULT_NUM_WORDS: Dict[str, Optional[int]] = defaultdict(lambda: 300)
@@ -310,6 +315,7 @@ def fetch_additional_information(
             input_tokens=response["usage"]["prompt_tokens"],
             output_tokens=response["usage"]["completion_tokens"],
             model=engine,
+            token_counter=count_tokens,
         )
         return "\n".join(["- " + text for text in texts]), counter_callback
     return "\n".join(["- " + text for text in texts]), None
@@ -343,6 +349,7 @@ def get_sme_role(
             output_tokens=response["usage"]["completion_tokens"],
             total_tokens=response["usage"]["total_tokens"],
             model=engine,
+            token_counter=count_tokens,
         )
         return sme["sme"], sme["sme_introduction"], counter_callback
     return sme["sme"], sme["sme_introduction"], None
@@ -424,6 +431,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
                 input_tokens=response["usage"]["prompt_tokens"],
                 output_tokens=response["usage"]["completion_tokens"],
                 model=engine,
+                token_counter=count_tokens,
             )
             return response.choices[0].message.content, prediction_prompt, counter_callback
         return response.choices[0].message.content, prediction_prompt, None
diff --git a/tools/prediction_sum_url_content/prediction_sum_url_content.py b/tools/prediction_sum_url_content/prediction_sum_url_content.py
@@ -36,6 +36,7 @@
 import traceback
 
 from dateutil import parser
+from tiktoken import encoding_for_model
 from tqdm import tqdm
 from sentence_transformers import SentenceTransformer, util
 
@@ -61,6 +62,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client = None
 
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
 
 NUM_URLS_EXTRACT = 5
 MAX_TOTAL_TOKENS_CHAT_COMPLETION = 4096  # Set the limit for cost efficiency

diff --git a/tools/sme_generation_request/sme_generation_request.py b/tools/sme_generation_request/sme_generation_request.py
@@ -3,6 +3,7 @@
 from typing import Any, Dict, Generator, List, Optional, Tuple
 
 from openai import OpenAI
+from tiktoken import encoding_for_model
 
 client: Optional[OpenAI] = None
 
@@ -24,6 +25,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
             client.close()
             client = None
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
 
 DEFAULT_OPENAI_SETTINGS = {
     "max_tokens": 500,

diff --git a/tools/stability_ai_request/stabilityai_request.py b/tools/stability_ai_request/stabilityai_request.py
@@ -24,7 +24,7 @@
 from typing import Any, Dict, Optional, Tuple
 
 import requests
-
+from tiktoken import encoding_for_model
 
 DEFAULT_STABILITYAI_SETTINGS = {
     "cfg_scale": 7,
@@ -54,6 +54,12 @@
 ALLOWED_TOOLS = [PREFIX + value for value in ENGINES["picture"]]
 
 
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
+
 class FinishReason(Enum):
     """The finish reasons of the API."""