Skip to content

Commit

Permalink
feat: count tokens in the tools
Browse files Browse the repository at this point in the history
  • Loading branch information
0xArdi committed Feb 7, 2024
1 parent 3e152f9 commit f13ec8f
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 28 deletions.
32 changes: 8 additions & 24 deletions packages/valory/skills/task_execution/utils/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,12 @@
"""Benchmarking for tools."""

import logging
from typing import Any, Dict, Union

import anthropic
import tiktoken
from tiktoken import Encoding
from typing import Any, Callable, Dict, Union


PRICE_NUM_TOKENS = 1000


def encoding_for_model(model: str) -> Encoding:
"""Get the encoding for a model."""
return tiktoken.encoding_for_model(model)


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
if "claude" in model:
return anthropic.Anthropic().count_tokens(text)

enc = encoding_for_model(model)
return len(enc.encode(text))


class TokenCounterCallback:
"""Callback to count the number of tokens used in a generation."""

Expand Down Expand Up @@ -73,13 +55,15 @@ def token_to_cost(tokens: int, model: str, tokens_type: str) -> float:
* TokenCounterCallback.TOKEN_PRICES[model][tokens_type]
)

def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
def calculate_cost(
self, tokens_type: str, model: str, token_counter: Callable, **kwargs: Any
) -> None:
"""Calculate the cost of a generation."""
# Check if it its prompt or tokens are passed in
prompt_key = f"{tokens_type}_prompt"
token_key = f"{tokens_type}_tokens"
if prompt_key in kwargs:
tokens = count_tokens(kwargs[prompt_key], model)
tokens = token_counter(kwargs[prompt_key], model)
elif token_key in kwargs:
tokens = kwargs[token_key]
else:
Expand All @@ -88,13 +72,13 @@ def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
self.cost_dict[token_key] += tokens
self.cost_dict[f"{tokens_type}_cost"] += cost

def __call__(self, model: str, **kwargs: Any) -> None:
def __call__(self, model: str, token_counter: Callable, **kwargs: Any) -> None:
"""Callback to count the number of tokens used in a generation."""
if model not in list(TokenCounterCallback.TOKEN_PRICES.keys()):
raise ValueError(f"Model {model} not supported.")
try:
self.calculate_cost("input", model, **kwargs)
self.calculate_cost("output", model, **kwargs)
self.calculate_cost("input", model, token_counter, **kwargs)
self.calculate_cost("output", model, token_counter, **kwargs)
self.cost_dict["total_tokens"] = (
self.cost_dict["input_tokens"] + self.cost_dict["output_tokens"]
)
Expand Down
8 changes: 7 additions & 1 deletion tools/native_transfer_request/native_transfer_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from typing import Any, Dict, Optional, Tuple, cast

from openai import OpenAI

from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -47,6 +47,12 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))



ENGINE = "gpt-3.5-turbo"
MAX_TOKENS = 500
Expand Down
7 changes: 7 additions & 0 deletions tools/openai_request/openai_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import Any, Dict, Optional, Tuple

from openai import OpenAI
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -42,6 +43,12 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


DEFAULT_OPENAI_SETTINGS = {
"max_tokens": 500,
"temperature": 0.7,
Expand Down
7 changes: 6 additions & 1 deletion tools/optimization_by_prompting/optimization_by_prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from langchain.llms import OpenAI as OpenAILLM
from langchain.prompts import PromptTemplate
from sklearn.metrics import roc_auc_score

from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -58,6 +58,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


# Provide several examples in order to backtest the resulted prompt
EXAMPLES = """query;event
Expand Down
8 changes: 8 additions & 0 deletions tools/prediction_request/prediction_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from spacy.cli import download
from spacy.lang.en import STOP_WORDS
from spacy.tokens import Doc, Span
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -58,6 +59,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


FrequenciesType = Dict[str, float]
ScoresType = Dict[Span, float]
Expand Down Expand Up @@ -292,6 +298,7 @@ def fetch_additional_information(
input_tokens=response["usage"]["prompt_tokens"],
output_tokens=response["usage"]["completion_tokens"],
model=engine,
token_counter=count_tokens,
)
return "\n".join(["- " + text for text in texts]), counter_callback
return "\n".join(["- " + text for text in texts]), None
Expand Down Expand Up @@ -421,6 +428,7 @@ def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any]:
input_tokens=response["usage"]["prompt_tokens"],
output_tokens=response["usage"]["completion_tokens"],
model=engine,
token_counter=count_tokens,
)
return response.choices[0].message.content, prediction_prompt, counter_callback
return response.choices[0].message.content, prediction_prompt, None
7 changes: 7 additions & 0 deletions tools/prediction_request_claude/prediction_request_claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from typing import Any, Dict, List, Optional, Tuple, Iterator, Callable
from itertools import islice

import anthropic
import requests
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -125,6 +126,10 @@
STOP_SEQUENCES = ["```"]


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
return anthropic.Anthropic().count_tokens(text)

def search_google(query: str, api_key: str, engine: str, num: int = 3) -> List[str]:
service = build("customsearch", "v1", developerKey=api_key)
search = (
Expand Down Expand Up @@ -248,6 +253,7 @@ def fetch_additional_information(
model=engine,
input_prompt=url_query_prompt,
output_tokens=40,
token_counter=count_tokens,
)
return "\n".join(["- " + text for text in texts]), counter_callback
return "\n".join(["- " + text for text in texts]), None
Expand Down Expand Up @@ -300,6 +306,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
model=engine,
input_prompt=prediction_prompt,
output_prompt=completion.completion,
token_counter=count_tokens,
)
return completion.completion, prediction_prompt, counter_callback

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import tiktoken

from dateutil import parser
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -59,6 +60,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))



NUM_URLS_EXTRACT = 5
Expand Down
10 changes: 9 additions & 1 deletion tools/prediction_request_sme/prediction_request_sme.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import requests
from bs4 import BeautifulSoup
from googleapiclient.discovery import build

from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -53,6 +53,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


NUM_URLS_EXTRACT = 5
DEFAULT_NUM_WORDS: Dict[str, Optional[int]] = defaultdict(lambda: 300)
Expand Down Expand Up @@ -310,6 +315,7 @@ def fetch_additional_information(
input_tokens=response["usage"]["prompt_tokens"],
output_tokens=response["usage"]["completion_tokens"],
model=engine,
token_counter=count_tokens,
)
return "\n".join(["- " + text for text in texts]), counter_callback
return "\n".join(["- " + text for text in texts]), None
Expand Down Expand Up @@ -343,6 +349,7 @@ def get_sme_role(
output_tokens=response["usage"]["completion_tokens"],
total_tokens=response["usage"]["total_tokens"],
model=engine,
token_counter=count_tokens,
)
return sme["sme"], sme["sme_introduction"], counter_callback
return sme["sme"], sme["sme_introduction"], None
Expand Down Expand Up @@ -424,6 +431,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
input_tokens=response["usage"]["prompt_tokens"],
output_tokens=response["usage"]["completion_tokens"],
model=engine,
token_counter=count_tokens,
)
return response.choices[0].message.content, prediction_prompt, counter_callback
return response.choices[0].message.content, prediction_prompt, None
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import traceback

from dateutil import parser
from tiktoken import encoding_for_model
from tqdm import tqdm
from sentence_transformers import SentenceTransformer, util

Expand All @@ -61,6 +62,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


NUM_URLS_EXTRACT = 5
MAX_TOTAL_TOKENS_CHAT_COMPLETION = 4096 # Set the limit for cost efficiency
Expand Down
6 changes: 6 additions & 0 deletions tools/sme_generation_request/sme_generation_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any, Dict, Generator, List, Optional, Tuple

from openai import OpenAI
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -24,6 +25,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


DEFAULT_OPENAI_SETTINGS = {
"max_tokens": 500,
Expand Down
8 changes: 7 additions & 1 deletion tools/stability_ai_request/stabilityai_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from typing import Any, Dict, Optional, Tuple

import requests

from tiktoken import encoding_for_model

DEFAULT_STABILITYAI_SETTINGS = {
"cfg_scale": 7,
Expand Down Expand Up @@ -54,6 +54,12 @@
ALLOWED_TOOLS = [PREFIX + value for value in ENGINES["picture"]]


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


class FinishReason(Enum):
"""The finish reasons of the API."""

Expand Down

0 comments on commit f13ec8f

Please sign in to comment.