From 752d8aa6a376ffdd69d075dd47f47e6d36672258 Mon Sep 17 00:00:00 2001 From: Huanghe Date: Sat, 10 Aug 2024 16:47:47 -0500 Subject: [PATCH] Reset API&bug fixes&benchmark --- README.md | 2 + benchmarks/address.json | 24 ++++++ benchmarks/{json.py => exllamav2_json.py} | 20 ++--- benchmarks/linkedlist.json | 24 ++++++ benchmarks/orders.json | 24 ++++++ benchmarks/result.md | 27 +++++++ benchmarks/utils.py | 49 ++++++++++++ benchmarks/vllm_json.py | 92 ++++++++++++++++++++++ benchmarks/vllm_json_bench.txt | 6 ++ pyproject.toml | 2 +- src/formatron/integrations/exllamav2.py | 3 + src/formatron/integrations/transformers.py | 5 ++ src/formatron/integrations/vllm.py | 9 +++ 13 files changed, 271 insertions(+), 16 deletions(-) create mode 100644 benchmarks/address.json rename benchmarks/{json.py => exllamav2_json.py} (83%) create mode 100644 benchmarks/linkedlist.json create mode 100644 benchmarks/orders.json create mode 100644 benchmarks/result.md create mode 100644 benchmarks/utils.py create mode 100644 benchmarks/vllm_json.py create mode 100644 benchmarks/vllm_json_bench.txt diff --git a/README.md b/README.md index 858b1b32..b36f2aec 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,8 @@ Check out integration examples in the [tests](https://github.com/Dan-wanna-M/for You may also want to check the minimum compatible version in [pyproject.toml](https://github.com/Dan-wanna-M/formatron/blob/master/pyproject.toml). ## API Reference Check out the API reference [here](https://dan-wanna-m.github.io/formatron/). +## Benchmark +Check out the benchmark [here](benchmarks/result.md). ## What Formatron Won't Do ### Implement an End-to-End Inference Pipeline Every library related to large language models(LLM) must consider that LLMs diff --git a/benchmarks/address.json b/benchmarks/address.json new file mode 100644 index 00000000..a953ac8a --- /dev/null +++ b/benchmarks/address.json @@ -0,0 +1,24 @@ +{ + "sentences": [ + "I work at 1209 Maple Avenue, Boston, Massachusetts, USA, postal code 02139.", + "My friend's shop is located at 2312 Pine Street, San Francisco, California, 94115.", + "The office is at 789 Oak Lane, Los Angeles, California, 90001.", + "We just moved to 4321 Elm Road, Austin, Texas, 73301.", + "My school is at 5678 Birch Street, New York, New York, 10001.", + "The library is at 980 Cedar Avenue, Seattle, Washington, 98101.", + "Our warehouse is at 312 Spruce Drive, Miami, Florida, 33101.", + "The café is on 654 Pine Lane, Chicago, Illinois, 60601.", + "My apartment is at 876 Maple Street, Denver, Colorado, 80201.", + "The headquarters is at 1122 Cedar Boulevard, Dallas, Texas, 75001.", + "I used to live at 3456 Walnut Avenue, San Diego, California, 92101.", + "Their office is at 7890 Ash Street, Portland, Oregon, 97201.", + "The restaurant is located at 234 Oak Avenue, Atlanta, Georgia, 30301.", + "We had a meeting at 1234 Birch Boulevard, Philadelphia, Pennsylvania, 19101.", + "The clinic is at 567 Maple Road, Phoenix, Arizona, 85001.", + "My favorite bookstore is at 678 Oak Drive, Charlotte, North Carolina, 28201.", + "The park is near 910 Cedar Lane, Las Vegas, Nevada, 89101.", + "Our hotel is at 8765 Pine Avenue, Nashville, Tennessee, 37201.", + "The museum is located at 432 Elm Boulevard, San Antonio, Texas, 78201.", + "I attended a conference at 6543 Maple Street, Orlando, Florida, 32801." + ] +} diff --git a/benchmarks/json.py b/benchmarks/exllamav2_json.py similarity index 83% rename from benchmarks/json.py rename to benchmarks/exllamav2_json.py index bad94b45..e01532ba 100644 --- a/benchmarks/json.py +++ b/benchmarks/exllamav2_json.py @@ -2,23 +2,12 @@ from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache, ExLlamaV2Tokenizer from exllamav2.generator import ExLlamaV2DynamicGenerator -from typing import Optional - from formatron.formatter import FormatterBuilder from formatron.grammar_generators.json_generator import JsonGenerator from formatron.integrations.exllamav2 import create_formatter_filter - from formatron.schemas.pydantic import ClassSchema -class Address(ClassSchema): - street: str - city: str - state: Optional[str] = None - postal_code: str - country: str - - def create_exllamav2_6bpw_llama3_8b(): model_dir = "../tests/local_assets/Llama-3-8B-exl2/" config = ExLlamaV2Config(model_dir) @@ -37,15 +26,16 @@ def create_exllamav2_6bpw_llama3_8b(): return generator, exllama_filter -def test_address_exllamav2(): +def address_exllamav2(): prompt = f"""{system_prompt}I live in 5033 Broccoli street, Houston, Texas, the United States with postal\ - code 66004<|eot_id|><|start_header_id|>assistant<|end_header_id|>""" + code 66004<|eot_id|><|start_header_id|>assistant<|end_header_id|> Sure! Here is the json: """ output = generator.generate( prompt=prompt, - max_new_tokens=200, + max_new_tokens=100, add_bos=False, filters=[exllama_filter] ) + # exllama_filter.reset() if __name__ == '__main__': @@ -55,4 +45,4 @@ def test_address_exllamav2(): Extract information into json format: """ generator, exllama_filter = create_exllamav2_6bpw_llama3_8b() - print(f"Test_address_exllamav2: {timeit(test_address_exllamav2, number=1000, globals=globals())/1000} seconds") + print(f"address_exllamav2: {timeit(address_exllamav2, number=1000, globals=globals())/1000} seconds") diff --git a/benchmarks/linkedlist.json b/benchmarks/linkedlist.json new file mode 100644 index 00000000..d2b07f9a --- /dev/null +++ b/benchmarks/linkedlist.json @@ -0,0 +1,24 @@ +{ + "sentences": [ + "LinkedList: 2->6->8", + "LinkedList: 5->3->7->1->9", + "LinkedList: 4->2->9", + "LinkedList: 7->1->8->3->5", + "LinkedList: 3->4->6", + "LinkedList: 9->5->2->8->7->1", + "LinkedList: 1->7->3", + "LinkedList: 6->9->1->5", + "LinkedList: 8->2", + "LinkedList: 5->1->3->6->4", + "LinkedList: 4->7->9->2", + "LinkedList: 3->8->1", + "LinkedList: 2->4->7->9", + "LinkedList: 7->5->1->8->6", + "LinkedList: 6->3->4", + "LinkedList: 9->2->5->7->3->1", + "LinkedList: 1->8->6", + "LinkedList: 5->7->4->2", + "LinkedList: 3->6->9", + "LinkedList: 8->4->2->7->5" + ] +} diff --git a/benchmarks/orders.json b/benchmarks/orders.json new file mode 100644 index 00000000..7a88dc66 --- /dev/null +++ b/benchmarks/orders.json @@ -0,0 +1,24 @@ +{ + "orders":[ + "The order with ID 54321 was placed by Jane Smith, a loyal customer with ID 9876. Jane, who resides at 456 Oak Avenue, Metropolis, and can be reached at 555-6789, has accumulated 200 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 201, variant 1, with a quantity of 1 at a price of $49.99; the second item is a product with ID 202, variant 4, with a quantity of 3 at a price of $10.99 each. The total amount for the order comes to $82.96. The current status of this order is Shipped.", + "The order with ID 67890 was placed by Robert Brown, a loyal customer with ID 2468. Robert, who resides at 789 Pine Road, Gotham, and can be reached at 555-2468, has accumulated 75 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 301, variant 3, with a quantity of 2 at a price of $25.99 each; the second item is a product with ID 302, variant 6, with a quantity of 1 at a price of $19.99; the third item is a product with ID 303, variant 8, with a quantity of 5 at a price of $5.99 each; the fourth item is a product with ID 304, variant 2, with a quantity of 1 at a price of $35.00. The total amount for the order comes to $159.90. The current status of this order is Delivered.", + "The order with ID 13579 was placed by Emily Davis, a loyal customer with ID 1357. Emily, who resides at 321 Birch Lane, Star City, and can be reached at 555-1357, has accumulated 120 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 401, variant 4, with a quantity of 3 at a price of $15.99 each; the second item is a product with ID 402, variant 7, with a quantity of 2 at a price of $12.50 each; the third item is a product with ID 403, variant 1, with a quantity of 1 at a price of $20.00. The total amount for the order comes to $97.46. The current status of this order is In Transit.", + "The order with ID 24680 was placed by Michael Johnson, a loyal customer with ID 3698. Michael, who resides at 654 Cedar Street, Central City, and can be reached at 555-3698, has accumulated 90 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 501, variant 2, with a quantity of 1 at a price of $39.99; the second item is a product with ID 502, variant 9, with a quantity of 4 at a price of $9.99 each. The total amount for the order comes to $79.95. The current status of this order is Processing.", + "The order with ID 98765 was placed by Sarah White, a loyal customer with ID 1470. Sarah, who resides at 789 Willow Way, Coast City, and can be reached at 555-1470, has accumulated 180 loyalty points and is currently an active customer. The order consists of five items: the first item is a product with ID 601, variant 6, with a quantity of 2 at a price of $19.99 each; the second item is a product with ID 602, variant 3, with a quantity of 1 at a price of $22.50; the third item is a product with ID 603, variant 5, with a quantity of 4 at a price of $7.49 each; the fourth item is a product with ID 604, variant 8, with a quantity of 1 at a price of $50.00; the fifth item is a product with ID 605, variant 2, with a quantity of 2 at a price of $14.99 each. The total amount for the order comes to $181.92. The current status of this order is Delivered.", + "The order with ID 11223 was placed by James Clark, a loyal customer with ID 2046. James, who resides at 456 Maple Boulevard, Smallville, and can be reached at 555-2046, has accumulated 95 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 701, variant 7, with a quantity of 1 at a price of $29.99; the second item is a product with ID 702, variant 4, with a quantity of 3 at a price of $8.99 each. The total amount for the order comes to $56.96. The current status of this order is Shipped.", + "The order with ID 33445 was placed by Linda Wilson, a loyal customer with ID 1836. Linda, who resides at 123 Palm Drive, Riverdale, and can be reached at 555-1836, has accumulated 110 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 801, variant 1, with a quantity of 2 at a price of $17.99 each; the second item is a product with ID 802, variant 5, with a quantity of 1 at a price of $14.99; the third item is a product with ID 803, variant 3, with a quantity of 4 at a price of $6.49 each; the fourth item is a product with ID 804, variant 2, with a quantity of 1 at a price of $25.00. The total amount for the order comes to $102.93. The current status of this order is In Transit.", + "The order with ID 55667 was placed by Steven Miller, a loyal customer with ID 1624. Steven, who resides at 789 Redwood Court, Blüdhaven, and can be reached at 555-1624, has accumulated 130 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 901, variant 8, with a quantity of 3 at a price of $12.99 each; the second item is a product with ID 902, variant 2, with a quantity of 2 at a price of $16.50 each; the third item is a product with ID 903, variant 7, with a quantity of 1 at a price of $19.99. The total amount for the order comes to $91.45. The current status of this order is Processing.", + "The order with ID 77889 was placed by Patricia Taylor, a loyal customer with ID 1410. Patricia, who resides at 456 Aspen Grove, Fawcett City, and can be reached at 555-1410, has accumulated 140 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1001, variant 9, with a quantity of 1 at a price of $44.99; the second item is a product with ID 1002, variant 6, with a quantity of 4 at a price of $11.99 each. The total amount for the order comes to $92.95. The current status of this order is Delivered.", + "The order with ID 99001 was placed by Charles Anderson, a loyal customer with ID 1208. Charles, who resides at 123 Cherry Circle, Keystone City, and can be reached at 555-1208, has accumulated 65 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 1101, variant 5, with a quantity of 2 at a price of $13.99 each; the second item is a product with ID 1102, variant 1, with a quantity of 1 at a price of $18.99; the third item is a product with ID 1103, variant 4, with a quantity of 3 at a price of $9.99 each; the fourth item is a product with ID 1104, variant 7, with a quantity of 2 at a price of $21.00 each. The total amount for the order comes to $136.93. The current status of this order is In Transit.", + "The order with ID 11223 was placed by Deborah Lee, a loyal customer with ID 2057. Deborah, who resides at 456 Palm Street, Starling City, and can be reached at 555-2057, has accumulated 105 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 1201, variant 6, with a quantity of 1 at a price of $22.99; the second item is a product with ID 1202, variant 3, with a quantity of 2 at a price of $10.50 each; the third item is a product with ID 1203, variant 8, with a quantity of 4 at a price of $8.99 each. The total amount for the order comes to $88.94. The current status of this order is Shipped.", + "The order with ID 33445 was placed by Kevin Martin, a loyal customer with ID 1867. Kevin, who resides at 789 Oak Circle, Coast City, and can be reached at 555-1867, has accumulated 150 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1301, variant 2, with a quantity of 3 at a price of $15.99 each; the second item is a product with ID 1302, variant 4, with a quantity of 1 at a price of $28.50. The total amount for the order comes to $76.47. The current status of this order is Delivered.", + "The order with ID 55667 was placed by Laura Moore, a loyal customer with ID 1679. Laura, who resides at 123 Birch Boulevard, Blüdhaven, and can be reached at 555-1679, has accumulated 95 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 1401, variant 7, with a quantity of 2 at a price of $19.99 each; the second item is a product with ID 1402, variant 9, with a quantity of 3 at a price of $7.50 each; the third item is a product with ID 1403, variant 5, with a quantity of 1 at a price of $24.99; the fourth item is a product with ID 1404, variant 1, with a quantity of 2 at a price of $15.00 each. The total amount for the order comes to $117.97. The current status of this order is In Transit.", + "The order with ID 77889 was placed by Donald Martinez, a loyal customer with ID 1498. Donald, who resides at 456 Cedar Drive, Riverdale, and can be reached at 555-1498, has accumulated 125 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 1501, variant 8, with a quantity of 1 at a price of $34.99; the second item is a product with ID 1502, variant 2, with a quantity of 4 at a price of $9.99 each; the third item is a product with ID 1503, variant 3, with a quantity of 2 at a price of $14.50 each. The total amount for the order comes to $108.94. The current status of this order is Processing.", + "The order with ID 99001 was placed by Jessica Hall, a loyal customer with ID 1207. Jessica, who resides at 789 Pine Court, Gotham, and can be reached at 555-1207, has accumulated 135 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1601, variant 3, with a quantity of 1 at a price of $45.99; the second item is a product with ID 1602, variant 6, with a quantity of 3 at a price of $12.99 each. The total amount for the order comes to $84.96. The current status of this order is Delivered.", + "The order with ID 11223 was placed by Brian Robinson, a loyal customer with ID 2058. Brian, who resides at 123 Maple Street, Metropolis, and can be reached at 555-2058, has accumulated 85 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 1701, variant 4, with a quantity of 2 at a price of $11.99 each; the second item is a product with ID 1702, variant 7, with a quantity of 1 at a price of $32.50; the third item is a product with ID 1703, variant 1, with a quantity of 3 at a price of $9.49 each; the fourth item is a product with ID 1704, variant 8, with a quantity of 2 at a price of $14.99 each. The total amount for the order comes to $123.43. The current status of this order is Shipped.", + "The order with ID 33445 was placed by Kimberly Garcia, a loyal customer with ID 1870. Kimberly, who resides at 456 Oak Avenue, Star City, and can be reached at 555-1870, has accumulated 160 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 1801, variant 9, with a quantity of 1 at a price of $27.99; the second item is a product with ID 1802, variant 6, with a quantity of 2 at a price of $18.50 each; the third item is a product with ID 1803, variant 4, with a quantity of 4 at a price of $8.99 each. The total amount for the order comes to $100.45. The current status of this order is In Transit.", + "The order with ID 55667 was placed by Kenneth Harris, a loyal customer with ID 1680. Kenneth, who resides at 789 Palm Way, Central City, and can be reached at 555-1680, has accumulated 115 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1901, variant 2, with a quantity of 3 at a price of $14.99 each; the second item is a product with ID 1902, variant 8, with a quantity of 1 at a price of $29.50. The total amount for the order comes to $74.47. The current status of this order is Delivered.", + "The order with ID 77889 was placed by Lisa Clark, a loyal customer with ID 1509. Lisa, who resides at 123 Pine Avenue, Smallville, and can be reached at 555-1509, has accumulated 140 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 2001, variant 3, with a quantity of 1 at a price of $19.99; the second item is a product with ID 2002, variant 5, with a quantity of 4 at a price of $8.99 each; the third item is a product with ID 2003, variant 7, with a quantity of 2 at a price of $12.50 each. The total amount for the order comes to $84.94. The current status of this order is Processing.", + "The order with ID 99001 was placed by Daniel King, a loyal customer with ID 1219. Daniel, who resides at 789 Cedar Lane, Keystone City, and can be reached at 555-1219, has accumulated 165 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 2101, variant 4, with a quantity of 2 at a price of $11.99 each; the second item is a product with ID 2102, variant 9, with a quantity of 1 at a price of $37.50; the third item is a product with ID 2103, variant 1, with a quantity of 3 at a price of $9.99 each; the fourth item is a product with ID 2104, variant 8, with a quantity of 2 at a price of $14.50 each. The total amount for the order comes to $131.93. The current status of this order is Shipped." + ] +} \ No newline at end of file diff --git a/benchmarks/result.md b/benchmarks/result.md new file mode 100644 index 00000000..d93a7504 --- /dev/null +++ b/benchmarks/result.md @@ -0,0 +1,27 @@ +# Benchmark +This benchmark is far from comprehensive and more benchmarks will be added in the +near future. +## Benchmark Setting +CPU: AMD EPYC 7513 32-Core Processor + +GPU: NVIDIA RTX A5000 +## Schemas +To summarize, `address` is a plain JSON schema, `linkedlist` is recursive, +and `order` is a JSON schema that includes other nested schemas. +You can find their definitions in `utils.py`. +## Why warm up? +`formatron` uses lazy caching, +so the first run is typically about 15% slower than subsequent runs. +Performing a warm-up run allows us to better measure latency under realistic workloads, +where a few schemas are created but many requests are made. + +We also plan to add the "first-run" benchmark, which will measure the time taken from +schema creation to the first run ends. +## vllm +Default vllm setting are used. + +| model | schema | constrained(with warm-up) / tps | unconstrained / tps | +|-----------------|-----------------|---------------------------------|---------------------| +| Llama3-8B(fp16) | address_json | 40.72 | 42.02 | +| Llama3-8B(fp16) | linkedlist_json | 40.57 | 41.95 | +| Llama3-8B(fp16) | order_json | 40.10 | 41.56 | diff --git a/benchmarks/utils.py b/benchmarks/utils.py new file mode 100644 index 00000000..0a7ab6b3 --- /dev/null +++ b/benchmarks/utils.py @@ -0,0 +1,49 @@ +from typing import Optional + +from attr import dataclass +from formatron.schemas.pydantic import ClassSchema + + +class Address(ClassSchema): + street: str + city: str + state: Optional[str] = None + postal_code: str + country: str + +class LinkedList(ClassSchema): + value: int + next: Optional["LinkedList"] + +class OrderItem(ClassSchema): + product_id: int + variant_id: int + quantity: int + price: float + +class Customer(ClassSchema): + id: int + name: str + phone: str + address: str + loyalty_points: int = 0 + is_active: bool = True + +class Order(ClassSchema): + id: int + customer: Customer + items: list[OrderItem] + total_amount: float + status: str + +@dataclass +class BenchResult: + t1:int + s1:float + t2:int + s2:float + +@dataclass +class Context: + index:int + tokens:int \ No newline at end of file diff --git a/benchmarks/vllm_json.py b/benchmarks/vllm_json.py new file mode 100644 index 00000000..5d56c934 --- /dev/null +++ b/benchmarks/vllm_json.py @@ -0,0 +1,92 @@ +import json +import typing +from timeit import timeit + +from formatron.grammar_generators.json_generator import JsonGenerator +from vllm import LLM, SamplingParams + +from utils import Order +from utils import LinkedList +from utils import BenchResult, Context +from utils import Address +from formatter import FormatterBuilder +from integrations.vllm import create_formatters_logits_processor, FormattersLogitsProcessor + +def execute(): + prompts = [ + f"{system_prompt}{inputs[context.index]}<|eot_id|><|start_header_id|>assistant<|end_header_id|>", + ] + context.index+=1 + outputs = llm.generate(prompts, sampling_params) + context.tokens += len(outputs[0].outputs[0].token_ids) + + l = sampling_params.logits_processors + if l and isinstance(l[0], FormattersLogitsProcessor): + l[0].reset() + + +def get_vllm_address(): + f = FormatterBuilder() + f.append_line(f"```json\n{f.schema(Address, JsonGenerator(), capture_name='json')}```") + logits_processor = create_formatters_logits_processor(llm, [f]) + sampling_params = SamplingParams(temperature=0.8, top_p=0.95,max_tokens=100, logits_processors=[logits_processor]) + return sampling_params + +def get_vllm_linkedlist(): + f = FormatterBuilder() + f.append_line(f"```json\n{f.schema(LinkedList, JsonGenerator(), capture_name='json')}```") + logits_processor = create_formatters_logits_processor(llm, [f]) + sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=100, logits_processors=[logits_processor]) + return sampling_params + +def get_vllm_order(): + f = FormatterBuilder() + f.append_line(f"```json\n{f.schema(Order, JsonGenerator(), capture_name='json')}```") + logits_processor = create_formatters_logits_processor(llm, [f]) + sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=256, logits_processors=[logits_processor]) + return sampling_params + +def warm_up(f): + f() + context.index = 0 + context.tokens = 0 + +def log(func_name:str, data:BenchResult,f): + a = f"{func_name} generated {data.t1} tokens with {data.t1 / data.s1} tps (with warm up)\n" + b = (f"{func_name} unconstrained generated {data.t2} tokens with" + f" {data.t2 / data.s2} tps\n") + print(a) + print(b) + f.writelines([a,b]) + +def bench(result:BenchResult, context:Context,func, bench_name:str, f): + context.index = 0 + context.tokens = 0 + result.s1 = (timeit(func, setup=lambda: warm_up(func), number=len(inputs))) + result.t1 = context.tokens + context.index = 0 + context.tokens = 0 + sampling_params.logits_processors = [] + result.s2 = (timeit(func, number=len(inputs))) + result.t2 = context.tokens + log(bench_name, result, f) + +if __name__ == "__main__": + system_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> + + You are a helpful AI assistant for information extraction.<|eot_id|><|start_header_id|>user<|end_header_id|> + + Extract information into json format: """ + llm = LLM(model="NurtureAI/Meta-Llama-3-8B-Instruct-32k", max_model_len=4096) + sampling_params = get_vllm_address() + data = BenchResult(0, 0, 0, 0) + context = Context(0, 0) + inputs = json.load(open("address.json"))["sentences"] + with open("vllm_json_bench.txt", "w") as f: + bench(data, context,execute, "vllm_address", f) + sampling_params = get_vllm_linkedlist() + inputs = json.load(open("linkedlist.json"))["sentences"] + bench(data, context, execute, "linkedlist", f) + sampling_params = get_vllm_order() + inputs = json.load(open("orders.json"))["orders"] + bench(data, context, execute, "orders", f) \ No newline at end of file diff --git a/benchmarks/vllm_json_bench.txt b/benchmarks/vllm_json_bench.txt new file mode 100644 index 00000000..510ba070 --- /dev/null +++ b/benchmarks/vllm_json_bench.txt @@ -0,0 +1,6 @@ +vllm_address generated 1032 tokens with 40.715563518001495 tps (with warm up) +vllm_address unconstrained generated 1500 tokens with 42.01920504622742 tps +linkedlist generated 1262 tokens with 40.568280943625346 tps (with warm up) +linkedlist unconstrained generated 1337 tokens with 41.946869871332254 tps +orders generated 4271 tokens with 40.09933827742811 tps (with warm up) +orders unconstrained generated 4589 tokens with 41.564089177085926 tps diff --git a/pyproject.toml b/pyproject.toml index bfc2a000..364dbea9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ ] description = "Formatron empowers everyone to control the output format of language models with minimal overhead." readme = "README.md" -dependencies = ["pydantic>=2","kbnf>=0.2.4"] +dependencies = ["pydantic>=2","kbnf>=0.2.7"] license = {file = "LICENSE"} keywords = ["deep learning", "language model", "guided generation", "structured generation","constrained decoding"] requires-python = ">=3.10" diff --git a/src/formatron/integrations/exllamav2.py b/src/formatron/integrations/exllamav2.py index 2e13eccc..47f5ba6d 100644 --- a/src/formatron/integrations/exllamav2.py +++ b/src/formatron/integrations/exllamav2.py @@ -67,6 +67,9 @@ def begin(self, prefix_str: str) -> None: prompt = prefix_str.encode("utf-8") self._formatter.accept_bytes(prompt) + def reset(self)->None: + self._formatter.reset() + def feed(self, token: int): self._formatter.accept_token(token) diff --git a/src/formatron/integrations/transformers.py b/src/formatron/integrations/transformers.py index dcc1fcbe..0bc3364b 100644 --- a/src/formatron/integrations/transformers.py +++ b/src/formatron/integrations/transformers.py @@ -61,6 +61,11 @@ def __init__(self, formatters: typing.Sequence[Formatter], eos_token_id: int, f"Number of formatters({len(formatters)}) must match number of configs({len(configs)})" self.configs = configs + def reset(self)->None: + self._last_input_id_length = None + for f in self._formatters: + f.reset() + @property def formatters_captures(self)->list[dict[str,typing.Any]]: return [f.captures for f in self._formatters] diff --git a/src/formatron/integrations/vllm.py b/src/formatron/integrations/vllm.py index 4c40fc42..d36665bb 100644 --- a/src/formatron/integrations/vllm.py +++ b/src/formatron/integrations/vllm.py @@ -5,6 +5,7 @@ import typing import kbnf +import torch from vllm import LLM from config import EngineGenerationConfig @@ -34,6 +35,12 @@ def __init__(self, formatters: typing.Sequence[Formatter], eos_token_id: int, def formatters_captures(self) -> list[dict[str, typing.Any]]: return [f.captures for f in self._formatters] + def reset(self)->None: + for f in self._formatters: + f.reset() + self._to_next_batch_step() + self._last_input_id_length = 0 + def _to_next_batch_step(self): self._iter = zip(self._formatters, self._configs) self._debug_counter = 0 @@ -58,11 +65,13 @@ def __call__(self, prompt, generated_tokens, logits): self._to_next_batch_step() result = next(self._iter) self._last_input_id_length += 1 + formatter, _ = result if len(generated_tokens) != 0: # accept new token input_id = generated_tokens[-1] if input_id != self._eos_token_id: formatter.accept_token(input_id) + if formatter.is_completed(): logits[:] = float("-inf") logits[self._eos_token_id] = 0.0