From db3144982aa26b87a9bdfb692b4fbedfdf8a14d5 Mon Sep 17 00:00:00 2001 From: Teo Musatoiu <156829031+teomusatoiu@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:22:11 +0100 Subject: [PATCH] update for 4o-mini (#1421) --- .../How_to_count_tokens_with_tiktoken.ipynb | 316 ++++++++++++++---- 1 file changed, 259 insertions(+), 57 deletions(-) diff --git a/examples/How_to_count_tokens_with_tiktoken.ipynb b/examples/How_to_count_tokens_with_tiktoken.ipynb index 362b26705c..19ce768921 100644 --- a/examples/How_to_count_tokens_with_tiktoken.ipynb +++ b/examples/How_to_count_tokens_with_tiktoken.ipynb @@ -22,20 +22,21 @@ "\n", "| Encoding name | OpenAI models |\n", "|-------------------------|-----------------------------------------------------|\n", - "| `cl100k_base` | `gpt-4`, `gpt-3.5-turbo`, `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` |\n", + "| `o200k_base` | `gpt-4o`, `gpt-4o-mini` |\n", + "| `cl100k_base` | `gpt-4-turbo`, `gpt-4`, `gpt-3.5-turbo`, `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` |\n", "| `p50k_base` | Codex models, `text-davinci-002`, `text-davinci-003`|\n", "| `r50k_base` (or `gpt2`) | GPT-3 models like `davinci` |\n", "\n", "You can retrieve the encoding for a model using `tiktoken.encoding_for_model()` as follows:\n", "```python\n", - "encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')\n", + "encoding = tiktoken.encoding_for_model('gpt-4o-mini')\n", "```\n", "\n", "Note that `p50k_base` overlaps substantially with `r50k_base`, and for non-code applications, they will usually give the same tokens.\n", "\n", "## Tokenizer libraries by language\n", "\n", - "For `cl100k_base` and `p50k_base` encodings:\n", + "For `o200k_base`, `cl100k_base` and `p50k_base` encodings:\n", "- Python: [tiktoken](https://github.com/openai/tiktoken/blob/main/README.md)\n", "- .NET / C#: [SharpToken](https://github.com/dmitry-brazhenko/SharpToken), [TiktokenSharp](https://github.com/aiqinxuancai/TiktokenSharp)\n", "- Java: [jtokkit](https://github.com/knuddelsgmbh/jtokkit)\n", @@ -71,12 +72,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ - "%pip install --upgrade tiktoken\n", - "%pip install --upgrade openai" + "%pip install --upgrade tiktoken -q\n", + "%pip install --upgrade openai -q" ] }, { @@ -89,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -131,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "encoding = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")" + "encoding = tiktoken.encoding_for_model(\"gpt-4o-mini\")" ] }, { @@ -159,7 +175,7 @@ { "data": { "text/plain": [ - "[83, 1609, 5963, 374, 2294, 0]" + "[83, 8251, 2488, 382, 2212, 0]" ] }, "execution_count": 5, @@ -168,7 +184,7 @@ } ], "source": [ - "encoding.encode(\"tiktoken is great!\")\n" + "encoding.encode(\"tiktoken is great!\")" ] }, { @@ -189,7 +205,7 @@ " \"\"\"Returns the number of tokens in a text string.\"\"\"\n", " encoding = tiktoken.get_encoding(encoding_name)\n", " num_tokens = len(encoding.encode(string))\n", - " return num_tokens\n" + " return num_tokens" ] }, { @@ -209,7 +225,7 @@ } ], "source": [ - "num_tokens_from_string(\"tiktoken is great!\", \"cl100k_base\")\n" + "num_tokens_from_string(\"tiktoken is great!\", \"o200k_base\")" ] }, { @@ -245,7 +261,7 @@ } ], "source": [ - "encoding.decode([83, 1609, 5963, 374, 2294, 0])\n" + "encoding.decode([83, 8251, 2488, 382, 2212, 0])" ] }, { @@ -272,7 +288,7 @@ { "data": { "text/plain": [ - "[b't', b'ik', b'token', b' is', b' great', b'!']" + "[b't', b'ikt', b'oken', b' is', b' great', b'!']" ] }, "execution_count": 9, @@ -281,7 +297,7 @@ } ], "source": [ - "[encoding.decode_single_token_bytes(token) for token in [83, 1609, 5963, 374, 2294, 0]]\n" + "[encoding.decode_single_token_bytes(token) for token in [83, 8251, 2488, 382, 2212, 0]]\n" ] }, { @@ -313,7 +329,7 @@ " # print the example string\n", " print(f'\\nExample string: \"{example_string}\"')\n", " # for each encoding, print the # of tokens, the token integers, and the token bytes\n", - " for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\"]:\n", + " for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\", \"o200k_base\"]:\n", " encoding = tiktoken.get_encoding(encoding_name)\n", " token_integers = encoding.encode(example_string)\n", " num_tokens = len(token_integers)\n", @@ -321,8 +337,7 @@ " print()\n", " print(f\"{encoding_name}: {num_tokens} tokens\")\n", " print(f\"token integers: {token_integers}\")\n", - " print(f\"token bytes: {token_bytes}\")\n", - " " + " print(f\"token bytes: {token_bytes}\")" ] }, { @@ -347,12 +362,16 @@ "\n", "cl100k_base: 6 tokens\n", "token integers: [519, 85342, 34500, 479, 8997, 2191]\n", - "token bytes: [b'ant', b'idis', b'establish', b'ment', b'arian', b'ism']\n" + "token bytes: [b'ant', b'idis', b'establish', b'ment', b'arian', b'ism']\n", + "\n", + "o200k_base: 6 tokens\n", + "token integers: [493, 129901, 376, 160388, 21203, 2367]\n", + "token bytes: [b'ant', b'idis', b'est', b'ablishment', b'arian', b'ism']\n" ] } ], "source": [ - "compare_encodings(\"antidisestablishmentarianism\")\n" + "compare_encodings(\"antidisestablishmentarianism\")" ] }, { @@ -377,12 +396,16 @@ "\n", "cl100k_base: 7 tokens\n", "token integers: [17, 489, 220, 17, 284, 220, 19]\n", + "token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n", + "\n", + "o200k_base: 7 tokens\n", + "token integers: [17, 659, 220, 17, 314, 220, 19]\n", "token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n" ] } ], "source": [ - "compare_encodings(\"2 + 2 = 4\")\n" + "compare_encodings(\"2 + 2 = 4\")" ] }, { @@ -407,12 +430,16 @@ "\n", "cl100k_base: 9 tokens\n", "token integers: [33334, 45918, 243, 21990, 9080, 33334, 62004, 16556, 78699]\n", - "token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n" + "token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n", + "\n", + "o200k_base: 8 tokens\n", + "token integers: [8930, 9697, 243, 128225, 8930, 17693, 4344, 48669]\n", + "token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n" ] } ], "source": [ - "compare_encodings(\"お誕生日おめでとう\")\n" + "compare_encodings(\"お誕生日おめでとう\")" ] }, { @@ -422,9 +449,9 @@ "source": [ "## 6. Counting tokens for chat completions API calls\n", "\n", - "ChatGPT models like `gpt-3.5-turbo` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n", + "ChatGPT models like `gpt-4o-mini` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n", "\n", - "Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo` or `gpt-4`.\n", + "Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`.\n", "\n", "Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee.\n", "\n", @@ -433,33 +460,37 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\"):\n", + "def num_tokens_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\"):\n", " \"\"\"Return the number of tokens used by a list of messages.\"\"\"\n", " try:\n", " encoding = tiktoken.encoding_for_model(model)\n", " except KeyError:\n", - " print(\"Warning: model not found. Using cl100k_base encoding.\")\n", - " encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + " print(\"Warning: model not found. Using o200k_base encoding.\")\n", + " encoding = tiktoken.get_encoding(\"o200k_base\")\n", " if model in {\n", - " \"gpt-3.5-turbo-0613\",\n", - " \"gpt-3.5-turbo-16k-0613\",\n", + " \"gpt-3.5-turbo-0125\",\n", " \"gpt-4-0314\",\n", " \"gpt-4-32k-0314\",\n", " \"gpt-4-0613\",\n", " \"gpt-4-32k-0613\",\n", + " \"gpt-4o-mini-2024-07-18\",\n", + " \"gpt-4o-2024-08-06\"\n", " }:\n", " tokens_per_message = 3\n", " tokens_per_name = 1\n", - " elif model == \"gpt-3.5-turbo-0301\":\n", - " tokens_per_message = 4 # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n", - " tokens_per_name = -1 # if there's a name, the role is omitted\n", " elif \"gpt-3.5-turbo\" in model:\n", - " print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\")\n", - " return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\")\n", + " print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\")\n", + " return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0125\")\n", + " elif \"gpt-4o-mini\" in model:\n", + " print(\"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\")\n", + " return num_tokens_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\")\n", + " elif \"gpt-4o\" in model:\n", + " print(\"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\")\n", + " return num_tokens_from_messages(messages, model=\"gpt-4o-2024-08-06\")\n", " elif \"gpt-4\" in model:\n", " print(\"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\")\n", " return num_tokens_from_messages(messages, model=\"gpt-4-0613\")\n", @@ -480,27 +511,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "gpt-3.5-turbo-0301\n", - "127 prompt tokens counted by num_tokens_from_messages().\n", - "127 prompt tokens counted by the OpenAI API.\n", - "\n", - "gpt-3.5-turbo-0613\n", - "129 prompt tokens counted by num_tokens_from_messages().\n", - "129 prompt tokens counted by the OpenAI API.\n", - "\n", "gpt-3.5-turbo\n", - "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\n", - "129 prompt tokens counted by num_tokens_from_messages().\n", - "129 prompt tokens counted by the OpenAI API.\n", - "\n", - "gpt-4-0314\n", + "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n", "129 prompt tokens counted by num_tokens_from_messages().\n", "129 prompt tokens counted by the OpenAI API.\n", "\n", @@ -512,6 +531,16 @@ "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n", "129 prompt tokens counted by num_tokens_from_messages().\n", "129 prompt tokens counted by the OpenAI API.\n", + "\n", + "gpt-4o\n", + "Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n", + "124 prompt tokens counted by num_tokens_from_messages().\n", + "124 prompt tokens counted by the OpenAI API.\n", + "\n", + "gpt-4o-mini\n", + "Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n", + "124 prompt tokens counted by num_tokens_from_messages().\n", + "124 prompt tokens counted by the OpenAI API.\n", "\n" ] } @@ -556,12 +585,11 @@ "]\n", "\n", "for model in [\n", - " \"gpt-3.5-turbo-0301\",\n", - " \"gpt-3.5-turbo-0613\",\n", " \"gpt-3.5-turbo\",\n", - " \"gpt-4-0314\",\n", " \"gpt-4-0613\",\n", " \"gpt-4\",\n", + " \"gpt-4o\",\n", + " \"gpt-4o-mini\"\n", " ]:\n", " print(model)\n", " # example token count from the function defined above\n", @@ -575,12 +603,186 @@ " print()\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Counting tokens for chat completions with tool calls\n", + "\n", + "Next, we will look into how to apply this calculations to messages that may contain function calls. This is not immediately trivial, due to the formatting of the tools themselves. \n", + "\n", + "Below is an example function for counting tokens for messages that contain tools, passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def num_tokens_for_tools(functions, messages, model):\n", + " \n", + " # Initialize function settings to 0\n", + " func_init = 0\n", + " prop_init = 0\n", + " prop_key = 0\n", + " enum_init = 0\n", + " enum_item = 0\n", + " func_end = 0\n", + " \n", + " if model in [\n", + " \"gpt-4o\",\n", + " \"gpt-4o-mini\"\n", + " ]:\n", + " \n", + " # Set function settings for the above models\n", + " func_init = 7\n", + " prop_init = 3\n", + " prop_key = 3\n", + " enum_init = -3\n", + " enum_item = 3\n", + " func_end = 12\n", + " elif model in [\n", + " \"gpt-3.5-turbo\",\n", + " \"gpt-4\"\n", + " ]:\n", + " # Set function settings for the above models\n", + " func_init = 10\n", + " prop_init = 3\n", + " prop_key = 3\n", + " enum_init = -3\n", + " enum_item = 3\n", + " func_end = 12\n", + " else:\n", + " raise NotImplementedError(\n", + " f\"\"\"num_tokens_for_tools() is not implemented for model {model}.\"\"\"\n", + " )\n", + " \n", + " try:\n", + " encoding = tiktoken.encoding_for_model(model)\n", + " except KeyError:\n", + " print(\"Warning: model not found. Using o200k_base encoding.\")\n", + " encoding = tiktoken.get_encoding(\"o200k_base\")\n", + " \n", + " func_token_count = 0\n", + " if len(functions) > 0:\n", + " for f in functions:\n", + " func_token_count += func_init # Add tokens for start of each function\n", + " function = f[\"function\"]\n", + " f_name = function[\"name\"]\n", + " f_desc = function[\"description\"]\n", + " if f_desc.endswith(\".\"):\n", + " f_desc = f_desc[:-1]\n", + " line = f_name + \":\" + f_desc\n", + " func_token_count += len(encoding.encode(line)) # Add tokens for set name and description\n", + " if len(function[\"parameters\"][\"properties\"]) > 0:\n", + " func_token_count += prop_init # Add tokens for start of each property\n", + " for key in list(function[\"parameters\"][\"properties\"].keys()):\n", + " func_token_count += prop_key # Add tokens for each set property\n", + " p_name = key\n", + " p_type = function[\"parameters\"][\"properties\"][key][\"type\"]\n", + " p_desc = function[\"parameters\"][\"properties\"][key][\"description\"]\n", + " if \"enum\" in function[\"parameters\"][\"properties\"][key].keys():\n", + " func_token_count += enum_init # Add tokens if property has enum list\n", + " for item in function[\"parameters\"][\"properties\"][key][\"enum\"]:\n", + " func_token_count += enum_item\n", + " func_token_count += len(encoding.encode(item))\n", + " if p_desc.endswith(\".\"):\n", + " p_desc = p_desc[:-1]\n", + " line = f\"{p_name}:{p_type}:{p_desc}\"\n", + " func_token_count += len(encoding.encode(line))\n", + " func_token_count += func_end\n", + " \n", + " messages_token_count = num_tokens_from_messages(messages, model)\n", + " total_tokens = messages_token_count + func_token_count\n", + " \n", + " return total_tokens" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gpt-3.5-turbo\n", + "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n", + "105 prompt tokens counted by num_tokens_for_tools().\n", + "105 prompt tokens counted by the OpenAI API.\n", + "\n", + "gpt-4\n", + "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n", + "105 prompt tokens counted by num_tokens_for_tools().\n", + "105 prompt tokens counted by the OpenAI API.\n", + "\n", + "gpt-4o\n", + "Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n", + "101 prompt tokens counted by num_tokens_for_tools().\n", + "101 prompt tokens counted by the OpenAI API.\n", + "\n", + "gpt-4o-mini\n", + "Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n", + "101 prompt tokens counted by num_tokens_for_tools().\n", + "101 prompt tokens counted by the OpenAI API.\n", + "\n" + ] + } + ], + "source": [ + "tools = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city and state, e.g. San Francisco, CA\",\n", + " },\n", + " \"unit\": {\"type\": \"string\", \n", + " \"description\": \"The unit of temperature to return\",\n", + " \"enum\": [\"celsius\", \"fahrenheit\"]},\n", + " },\n", + " \"required\": [\"location\"],\n", + " },\n", + " }\n", + " }\n", + "]\n", + "\n", + "example_messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"You are a helpful assistant that can answer to questions about the weather.\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"What's the weather like in San Francisco?\",\n", + " },\n", + "]\n", + "\n", + "for model in [\n", + " \"gpt-3.5-turbo\",\n", + " \"gpt-4\",\n", + " \"gpt-4o\",\n", + " \"gpt-4o-mini\"\n", + " ]:\n", + " print(model)\n", + " # example token count from the function defined above\n", + " print(f\"{num_tokens_for_tools(tools, example_messages, model)} prompt tokens counted by num_tokens_for_tools().\")\n", + " # example token count from the OpenAI API\n", + " response = client.chat.completions.create(model=model,\n", + " messages=example_messages,\n", + " tools=tools,\n", + " temperature=0)\n", + " print(f'{response.usage.prompt_tokens} prompt tokens counted by the OpenAI API.')\n", + " print()" + ] } ], "metadata": { @@ -599,7 +801,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.7" }, "vscode": { "interpreter": {