Skip to content

Commit

Permalink
Merge pull request #161 from valory-xyz/feat/count-token-in-toosl
Browse files Browse the repository at this point in the history
feat: count tokens in the tools
  • Loading branch information
0xArdi authored Feb 8, 2024
2 parents e952e14 + 93622e6 commit 53cd069
Show file tree
Hide file tree
Showing 15 changed files with 83 additions and 34 deletions.
6 changes: 3 additions & 3 deletions packages/packages.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"dev": {
"connection/valory/websocket_client/0.1.0": "bafybeiflmystocxaqblhpzqlcop2vkhsknpzjx2jomohomaxamwskeokzm",
"skill/valory/contract_subscription/0.1.0": "bafybeicyugrkx5glat4p4ezwf6i7oduh26eycfie6ftd4uxrknztzl3ik4",
"agent/valory/mech/0.1.0": "bafybeiepdstu23qid2pcrugkpwtibh7jhlshmnmclwwbgjmefio27cp4im",
"agent/valory/mech/0.1.0": "bafybeibwlyxqtitnfgmt2liuygv75pydlvih23e2ilwpr6xlffac5flyse",
"skill/valory/mech_abci/0.1.0": "bafybeieimp7xzxcnbzsuunf2xkcy5juulhmzsmkq2v3sw3o3lgssb53cnu",
"contract/valory/agent_mech/0.1.0": "bafybeiepxumywg6z2zapqzc3bg3iey23cmlgjzxisqox5j74o5i2texr5e",
"service/valory/mech/0.1.0": "bafybeifzkrmgejdce5nvp7s63dpvthde6wn6etolesh4dmf5pno7jplzcy",
"service/valory/mech/0.1.0": "bafybeicw64tjmcx6fbffwhpgdabmnxfwbuk6cycwrcu4me3hplcleu4mze",
"protocol/valory/acn_data_share/0.1.0": "bafybeih5ydonnvrwvy2ygfqgfabkr47s4yw3uqxztmwyfprulwfsoe7ipq",
"skill/valory/task_submission_abci/0.1.0": "bafybeib4m2bwgchloqss3wotsx4rz7qqkwydaesiqkls2zq7zbtp6jtpsi",
"skill/valory/task_execution/0.1.0": "bafybeicthrgfdv6q56htrsradow445smojjk2zqqizm4cdxyfxfor22vyy",
"skill/valory/task_execution/0.1.0": "bafybeieercgbjemdjiovecetxadurwil26cs2swleupmbgc4py2rg6e2kq",
"contract/valory/agent_registry/0.1.0": "bafybeiargayav6yiztdnwzejoejstcx4idssch2h4f5arlgtzj3tgsgfmu",
"protocol/valory/websocket_client/0.1.0": "bafybeih43mnztdv3v2hetr2k3gezg7d3yj4ur7cxdvcyaqhg65e52s5sf4",
"skill/valory/websocket_client/0.1.0": "bafybeidwntmkk4b2ixq5454ycbkknclqx7a6vpn7aqpm2nw3duszqrxvta",
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/agents/mech/aea-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ skills:
- valory/registration_abci:0.1.0:bafybeic2ynseiak7jpta7jfwuqwyp453b4p7lolr4wihxmpn633uekv5am
- valory/reset_pause_abci:0.1.0:bafybeidzajbe3erygeh2xbd6lrjv7nsptznjuzrt24ykgvhgotdeyhfnba
- valory/subscription_abci:0.1.0:bafybeigaxq7m2dqv2huhg5jvb4jx3rysqwvvjj2xhojow3t3zzuwq2k4ie
- valory/task_execution:0.1.0:bafybeicthrgfdv6q56htrsradow445smojjk2zqqizm4cdxyfxfor22vyy
- valory/task_execution:0.1.0:bafybeieercgbjemdjiovecetxadurwil26cs2swleupmbgc4py2rg6e2kq
- valory/task_submission_abci:0.1.0:bafybeib4m2bwgchloqss3wotsx4rz7qqkwydaesiqkls2zq7zbtp6jtpsi
- valory/termination_abci:0.1.0:bafybeie4zvjfxvdu7qrulmur3chpjz3kpj5m4bjsxvpk4gvj5zbyyayfaa
- valory/transaction_settlement_abci:0.1.0:bafybeiaefgqbs7zsn5xe5kdwrujj7ivygkn3ujpw6crnvi3knvxw75qmja
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/services/mech/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license: Apache-2.0
fingerprint:
README.md: bafybeif7ia4jdlazy6745ke2k2x5yoqlwsgwr6sbztbgqtwvs3ndm2p7ba
fingerprint_ignore_patterns: []
agent: valory/mech:0.1.0:bafybeiepdstu23qid2pcrugkpwtibh7jhlshmnmclwwbgjmefio27cp4im
agent: valory/mech:0.1.0:bafybeibwlyxqtitnfgmt2liuygv75pydlvih23e2ilwpr6xlffac5flyse
number_of_agents: 4
deployment:
agent:
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/skills/task_execution/skill.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fingerprint:
handlers.py: bafybeidbt5ezj74cgfogk3w4uw4si2grlnk5g54veyumw7g5yh6gdscywu
models.py: bafybeihgclxctyltuehj2f4fzj26edptqugrrm4phd6ovuulezrqot6qo4
utils/__init__.py: bafybeiccdijaigu6e5p2iruwo5mkk224o7ywedc7nr6xeu5fpmhjqgk24e
utils/benchmarks.py: bafybeihdutp44ds4cupszbd34gsmcw6fsdda2tzkh5b27fpg65ejbpdvdm
utils/benchmarks.py: bafybeibdwt4svz24ahok4x4h2rpeotlmlmvifccd27oizsz5bjwj6dqree
utils/ipfs.py: bafybeidinbdqkidix44ibz5hug7inkcbijooag53gr5mtbaa72tk335uqq
utils/task.py: bafybeieuziu7owtk543z3umgmayhjh67klftk7vrhz24l6rlaii5lvkqh4
fingerprint_ignore_patterns: []
Expand Down
32 changes: 8 additions & 24 deletions packages/valory/skills/task_execution/utils/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,12 @@
"""Benchmarking for tools."""

import logging
from typing import Any, Dict, Union

import anthropic
import tiktoken
from tiktoken import Encoding
from typing import Any, Callable, Dict, Union


PRICE_NUM_TOKENS = 1000


def encoding_for_model(model: str) -> Encoding:
"""Get the encoding for a model."""
return tiktoken.encoding_for_model(model)


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
if "claude" in model:
return anthropic.Anthropic().count_tokens(text)

enc = encoding_for_model(model)
return len(enc.encode(text))


class TokenCounterCallback:
"""Callback to count the number of tokens used in a generation."""

Expand Down Expand Up @@ -73,13 +55,15 @@ def token_to_cost(tokens: int, model: str, tokens_type: str) -> float:
* TokenCounterCallback.TOKEN_PRICES[model][tokens_type]
)

def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
def calculate_cost(
self, tokens_type: str, model: str, token_counter: Callable, **kwargs: Any
) -> None:
"""Calculate the cost of a generation."""
# Check if it its prompt or tokens are passed in
prompt_key = f"{tokens_type}_prompt"
token_key = f"{tokens_type}_tokens"
if prompt_key in kwargs:
tokens = count_tokens(kwargs[prompt_key], model)
tokens = token_counter(kwargs[prompt_key], model)
elif token_key in kwargs:
tokens = kwargs[token_key]
else:
Expand All @@ -88,13 +72,13 @@ def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
self.cost_dict[token_key] += tokens
self.cost_dict[f"{tokens_type}_cost"] += cost

def __call__(self, model: str, **kwargs: Any) -> None:
def __call__(self, model: str, token_counter: Callable, **kwargs: Any) -> None:
"""Callback to count the number of tokens used in a generation."""
if model not in list(TokenCounterCallback.TOKEN_PRICES.keys()):
raise ValueError(f"Model {model} not supported.")
try:
self.calculate_cost("input", model, **kwargs)
self.calculate_cost("output", model, **kwargs)
self.calculate_cost("input", model, token_counter, **kwargs)
self.calculate_cost("output", model, token_counter, **kwargs)
self.cost_dict["total_tokens"] = (
self.cost_dict["input_tokens"] + self.cost_dict["output_tokens"]
)
Expand Down
8 changes: 7 additions & 1 deletion tools/native_transfer_request/native_transfer_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from typing import Any, Dict, Optional, Tuple, cast

from openai import OpenAI

from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -47,6 +47,12 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))



ENGINE = "gpt-3.5-turbo"
MAX_TOKENS = 500
Expand Down
7 changes: 7 additions & 0 deletions tools/openai_request/openai_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import Any, Dict, Optional, Tuple

from openai import OpenAI
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -42,6 +43,12 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


DEFAULT_OPENAI_SETTINGS = {
"max_tokens": 500,
"temperature": 0.7,
Expand Down
7 changes: 6 additions & 1 deletion tools/optimization_by_prompting/optimization_by_prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from langchain.llms import OpenAI as OpenAILLM
from langchain.prompts import PromptTemplate
from sklearn.metrics import roc_auc_score

from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -58,6 +58,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


# Provide several examples in order to backtest the resulted prompt
EXAMPLES = """query;event
Expand Down
8 changes: 8 additions & 0 deletions tools/prediction_request/prediction_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from spacy.cli import download
from spacy.lang.en import STOP_WORDS
from spacy.tokens import Doc, Span
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -58,6 +59,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


FrequenciesType = Dict[str, float]
ScoresType = Dict[Span, float]
Expand Down Expand Up @@ -292,6 +298,7 @@ def fetch_additional_information(
input_tokens=response.usage.prompt_tokens,
output_tokens=response.usage.completion_tokens,
model=engine,
token_counter=count_tokens,
)
return "\n".join(["- " + text for text in texts]), counter_callback
return "\n".join(["- " + text for text in texts]), None
Expand Down Expand Up @@ -421,6 +428,7 @@ def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any]:
input_tokens=response.usage.prompt_tokens,
output_tokens=response.usage.completion_tokens,
model=engine,
token_counter=count_tokens,
)
return response.choices[0].message.content, prediction_prompt, counter_callback
return response.choices[0].message.content, prediction_prompt, None
7 changes: 7 additions & 0 deletions tools/prediction_request_claude/prediction_request_claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from typing import Any, Dict, List, Optional, Tuple, Iterator, Callable
from itertools import islice

import anthropic
import requests
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -125,6 +126,10 @@
STOP_SEQUENCES = ["```"]


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
return anthropic.Anthropic().count_tokens(text)

def search_google(query: str, api_key: str, engine: str, num: int = 3) -> List[str]:
service = build("customsearch", "v1", developerKey=api_key)
search = (
Expand Down Expand Up @@ -248,6 +253,7 @@ def fetch_additional_information(
model=engine,
input_prompt=url_query_prompt,
output_tokens=40,
token_counter=count_tokens,
)
return "\n".join(["- " + text for text in texts]), counter_callback
return "\n".join(["- " + text for text in texts]), None
Expand Down Expand Up @@ -300,6 +306,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
model=engine,
input_prompt=prediction_prompt,
output_prompt=completion.completion,
token_counter=count_tokens,
)
return completion.completion, prediction_prompt, counter_callback

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import tiktoken

from dateutil import parser
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -59,6 +60,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))



NUM_URLS_EXTRACT = 5
Expand Down
10 changes: 9 additions & 1 deletion tools/prediction_request_sme/prediction_request_sme.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import requests
from bs4 import BeautifulSoup
from googleapiclient.discovery import build

from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -53,6 +53,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


NUM_URLS_EXTRACT = 5
DEFAULT_NUM_WORDS: Dict[str, Optional[int]] = defaultdict(lambda: 300)
Expand Down Expand Up @@ -310,6 +315,7 @@ def fetch_additional_information(
input_tokens=response.usage.prompt_tokens,
output_tokens=response.usage.completion_tokens,
model=engine,
token_counter=count_tokens,
)
return "\n".join(["- " + text for text in texts]), counter_callback
return "\n".join(["- " + text for text in texts]), None
Expand Down Expand Up @@ -343,6 +349,7 @@ def get_sme_role(
output_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens,
model=engine,
token_counter=count_tokens,
)
return sme["sme"], sme["sme_introduction"], counter_callback
return sme["sme"], sme["sme_introduction"], None
Expand Down Expand Up @@ -424,6 +431,7 @@ def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
input_tokens=response.usage.prompt_tokens,
output_tokens=response.usage.completion_tokens,
model=engine,
token_counter=count_tokens,
)
return response.choices[0].message.content, prediction_prompt, counter_callback
return response.choices[0].message.content, prediction_prompt, None
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import traceback

from dateutil import parser
from tiktoken import encoding_for_model
from tqdm import tqdm
from sentence_transformers import SentenceTransformer, util

Expand All @@ -61,6 +62,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client = None


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


NUM_URLS_EXTRACT = 5
MAX_TOTAL_TOKENS_CHAT_COMPLETION = 4096 # Set the limit for cost efficiency
Expand Down
6 changes: 6 additions & 0 deletions tools/sme_generation_request/sme_generation_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any, Dict, Generator, List, Optional, Tuple

from openai import OpenAI
from tiktoken import encoding_for_model

client: Optional[OpenAI] = None

Expand All @@ -24,6 +25,11 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
client.close()
client = None

def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


DEFAULT_OPENAI_SETTINGS = {
"max_tokens": 500,
Expand Down
8 changes: 7 additions & 1 deletion tools/stability_ai_request/stabilityai_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from typing import Any, Dict, Optional, Tuple

import requests

from tiktoken import encoding_for_model

DEFAULT_STABILITYAI_SETTINGS = {
"cfg_scale": 7,
Expand Down Expand Up @@ -54,6 +54,12 @@
ALLOWED_TOOLS = [PREFIX + value for value in ENGINES["picture"]]


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
enc = encoding_for_model(model)
return len(enc.encode(text))


class FinishReason(Enum):
"""The finish reasons of the API."""

Expand Down

0 comments on commit 53cd069

Please sign in to comment.