From 752d8aa6a376ffdd69d075dd47f47e6d36672258 Mon Sep 17 00:00:00 2001
From: Huanghe <xs28@rice.edu>
Date: Sat, 10 Aug 2024 16:47:47 -0500
Subject: [PATCH] Reset API&bug fixes&benchmark

---
 README.md                                  |  2 +
 benchmarks/address.json                    | 24 ++++++
 benchmarks/{json.py => exllamav2_json.py}  | 20 ++---
 benchmarks/linkedlist.json                 | 24 ++++++
 benchmarks/orders.json                     | 24 ++++++
 benchmarks/result.md                       | 27 +++++++
 benchmarks/utils.py                        | 49 ++++++++++++
 benchmarks/vllm_json.py                    | 92 ++++++++++++++++++++++
 benchmarks/vllm_json_bench.txt             |  6 ++
 pyproject.toml                             |  2 +-
 src/formatron/integrations/exllamav2.py    |  3 +
 src/formatron/integrations/transformers.py |  5 ++
 src/formatron/integrations/vllm.py         |  9 +++
 13 files changed, 271 insertions(+), 16 deletions(-)
 create mode 100644 benchmarks/address.json
 rename benchmarks/{json.py => exllamav2_json.py} (83%)
 create mode 100644 benchmarks/linkedlist.json
 create mode 100644 benchmarks/orders.json
 create mode 100644 benchmarks/result.md
 create mode 100644 benchmarks/utils.py
 create mode 100644 benchmarks/vllm_json.py
 create mode 100644 benchmarks/vllm_json_bench.txt

diff --git a/README.md b/README.md
index 858b1b32..b36f2aec 100644
--- a/README.md
+++ b/README.md
@@ -191,6 +191,8 @@ Check out integration examples in the [tests](https://github.com/Dan-wanna-M/for
 You may also want to check the minimum compatible version in [pyproject.toml](https://github.com/Dan-wanna-M/formatron/blob/master/pyproject.toml).
 ## API Reference
 Check out the API reference [here](https://dan-wanna-m.github.io/formatron/).
+## Benchmark
+Check out the benchmark [here](benchmarks/result.md).
 ## What Formatron Won't Do
 ### Implement an End-to-End Inference Pipeline
 Every library related to large language models(LLM) must consider that LLMs
diff --git a/benchmarks/address.json b/benchmarks/address.json
new file mode 100644
index 00000000..a953ac8a
--- /dev/null
+++ b/benchmarks/address.json
@@ -0,0 +1,24 @@
+{
+  "sentences": [
+    "I work at 1209 Maple Avenue, Boston, Massachusetts, USA, postal code 02139.",
+    "My friend's shop is located at 2312 Pine Street, San Francisco, California, 94115.",
+    "The office is at 789 Oak Lane, Los Angeles, California, 90001.",
+    "We just moved to 4321 Elm Road, Austin, Texas, 73301.",
+    "My school is at 5678 Birch Street, New York, New York, 10001.",
+    "The library is at 980 Cedar Avenue, Seattle, Washington, 98101.",
+    "Our warehouse is at 312 Spruce Drive, Miami, Florida, 33101.",
+    "The café is on 654 Pine Lane, Chicago, Illinois, 60601.",
+    "My apartment is at 876 Maple Street, Denver, Colorado, 80201.",
+    "The headquarters is at 1122 Cedar Boulevard, Dallas, Texas, 75001.",
+    "I used to live at 3456 Walnut Avenue, San Diego, California, 92101.",
+    "Their office is at 7890 Ash Street, Portland, Oregon, 97201.",
+    "The restaurant is located at 234 Oak Avenue, Atlanta, Georgia, 30301.",
+    "We had a meeting at 1234 Birch Boulevard, Philadelphia, Pennsylvania, 19101.",
+    "The clinic is at 567 Maple Road, Phoenix, Arizona, 85001.",
+    "My favorite bookstore is at 678 Oak Drive, Charlotte, North Carolina, 28201.",
+    "The park is near 910 Cedar Lane, Las Vegas, Nevada, 89101.",
+    "Our hotel is at 8765 Pine Avenue, Nashville, Tennessee, 37201.",
+    "The museum is located at 432 Elm Boulevard, San Antonio, Texas, 78201.",
+    "I attended a conference at 6543 Maple Street, Orlando, Florida, 32801."
+  ]
+}
diff --git a/benchmarks/json.py b/benchmarks/exllamav2_json.py
similarity index 83%
rename from benchmarks/json.py
rename to benchmarks/exllamav2_json.py
index bad94b45..e01532ba 100644
--- a/benchmarks/json.py
+++ b/benchmarks/exllamav2_json.py
@@ -2,23 +2,12 @@
 
 from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache, ExLlamaV2Tokenizer
 from exllamav2.generator import ExLlamaV2DynamicGenerator
-from typing import Optional
-
 from formatron.formatter import FormatterBuilder
 from formatron.grammar_generators.json_generator import JsonGenerator
 from formatron.integrations.exllamav2 import create_formatter_filter
-
 from formatron.schemas.pydantic import ClassSchema
 
 
-class Address(ClassSchema):
-    street: str
-    city: str
-    state: Optional[str] = None
-    postal_code: str
-    country: str
-
-
 def create_exllamav2_6bpw_llama3_8b():
     model_dir = "../tests/local_assets/Llama-3-8B-exl2/"
     config = ExLlamaV2Config(model_dir)
@@ -37,15 +26,16 @@ def create_exllamav2_6bpw_llama3_8b():
     return generator, exllama_filter
 
 
-def test_address_exllamav2():
+def address_exllamav2():
     prompt = f"""{system_prompt}I live in 5033 Broccoli street, Houston, Texas, the United States with postal\
- code 66004<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
+ code 66004<|eot_id|><|start_header_id|>assistant<|end_header_id|> Sure! Here is the json: """
     output = generator.generate(
         prompt=prompt,
-        max_new_tokens=200,
+        max_new_tokens=100,
         add_bos=False,
         filters=[exllama_filter]
     )
+    # exllama_filter.reset()
 
 
 if __name__ == '__main__':
@@ -55,4 +45,4 @@ def test_address_exllamav2():
 
 Extract information into json format: """
     generator, exllama_filter = create_exllamav2_6bpw_llama3_8b()
-    print(f"Test_address_exllamav2: {timeit(test_address_exllamav2, number=1000, globals=globals())/1000} seconds")
+    print(f"address_exllamav2: {timeit(address_exllamav2, number=1000, globals=globals())/1000} seconds")
diff --git a/benchmarks/linkedlist.json b/benchmarks/linkedlist.json
new file mode 100644
index 00000000..d2b07f9a
--- /dev/null
+++ b/benchmarks/linkedlist.json
@@ -0,0 +1,24 @@
+{
+  "sentences": [
+    "LinkedList: 2->6->8",
+    "LinkedList: 5->3->7->1->9",
+    "LinkedList: 4->2->9",
+    "LinkedList: 7->1->8->3->5",
+    "LinkedList: 3->4->6",
+    "LinkedList: 9->5->2->8->7->1",
+    "LinkedList: 1->7->3",
+    "LinkedList: 6->9->1->5",
+    "LinkedList: 8->2",
+    "LinkedList: 5->1->3->6->4",
+    "LinkedList: 4->7->9->2",
+    "LinkedList: 3->8->1",
+    "LinkedList: 2->4->7->9",
+    "LinkedList: 7->5->1->8->6",
+    "LinkedList: 6->3->4",
+    "LinkedList: 9->2->5->7->3->1",
+    "LinkedList: 1->8->6",
+    "LinkedList: 5->7->4->2",
+    "LinkedList: 3->6->9",
+    "LinkedList: 8->4->2->7->5"
+  ]
+}
diff --git a/benchmarks/orders.json b/benchmarks/orders.json
new file mode 100644
index 00000000..7a88dc66
--- /dev/null
+++ b/benchmarks/orders.json
@@ -0,0 +1,24 @@
+{
+  "orders":[
+    "The order with ID 54321 was placed by Jane Smith, a loyal customer with ID 9876. Jane, who resides at 456 Oak Avenue, Metropolis, and can be reached at 555-6789, has accumulated 200 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 201, variant 1, with a quantity of 1 at a price of $49.99; the second item is a product with ID 202, variant 4, with a quantity of 3 at a price of $10.99 each. The total amount for the order comes to $82.96. The current status of this order is Shipped.",
+    "The order with ID 67890 was placed by Robert Brown, a loyal customer with ID 2468. Robert, who resides at 789 Pine Road, Gotham, and can be reached at 555-2468, has accumulated 75 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 301, variant 3, with a quantity of 2 at a price of $25.99 each; the second item is a product with ID 302, variant 6, with a quantity of 1 at a price of $19.99; the third item is a product with ID 303, variant 8, with a quantity of 5 at a price of $5.99 each; the fourth item is a product with ID 304, variant 2, with a quantity of 1 at a price of $35.00. The total amount for the order comes to $159.90. The current status of this order is Delivered.",
+    "The order with ID 13579 was placed by Emily Davis, a loyal customer with ID 1357. Emily, who resides at 321 Birch Lane, Star City, and can be reached at 555-1357, has accumulated 120 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 401, variant 4, with a quantity of 3 at a price of $15.99 each; the second item is a product with ID 402, variant 7, with a quantity of 2 at a price of $12.50 each; the third item is a product with ID 403, variant 1, with a quantity of 1 at a price of $20.00. The total amount for the order comes to $97.46. The current status of this order is In Transit.",
+    "The order with ID 24680 was placed by Michael Johnson, a loyal customer with ID 3698. Michael, who resides at 654 Cedar Street, Central City, and can be reached at 555-3698, has accumulated 90 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 501, variant 2, with a quantity of 1 at a price of $39.99; the second item is a product with ID 502, variant 9, with a quantity of 4 at a price of $9.99 each. The total amount for the order comes to $79.95. The current status of this order is Processing.",
+    "The order with ID 98765 was placed by Sarah White, a loyal customer with ID 1470. Sarah, who resides at 789 Willow Way, Coast City, and can be reached at 555-1470, has accumulated 180 loyalty points and is currently an active customer. The order consists of five items: the first item is a product with ID 601, variant 6, with a quantity of 2 at a price of $19.99 each; the second item is a product with ID 602, variant 3, with a quantity of 1 at a price of $22.50; the third item is a product with ID 603, variant 5, with a quantity of 4 at a price of $7.49 each; the fourth item is a product with ID 604, variant 8, with a quantity of 1 at a price of $50.00; the fifth item is a product with ID 605, variant 2, with a quantity of 2 at a price of $14.99 each. The total amount for the order comes to $181.92. The current status of this order is Delivered.",
+    "The order with ID 11223 was placed by James Clark, a loyal customer with ID 2046. James, who resides at 456 Maple Boulevard, Smallville, and can be reached at 555-2046, has accumulated 95 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 701, variant 7, with a quantity of 1 at a price of $29.99; the second item is a product with ID 702, variant 4, with a quantity of 3 at a price of $8.99 each. The total amount for the order comes to $56.96. The current status of this order is Shipped.",
+    "The order with ID 33445 was placed by Linda Wilson, a loyal customer with ID 1836. Linda, who resides at 123 Palm Drive, Riverdale, and can be reached at 555-1836, has accumulated 110 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 801, variant 1, with a quantity of 2 at a price of $17.99 each; the second item is a product with ID 802, variant 5, with a quantity of 1 at a price of $14.99; the third item is a product with ID 803, variant 3, with a quantity of 4 at a price of $6.49 each; the fourth item is a product with ID 804, variant 2, with a quantity of 1 at a price of $25.00. The total amount for the order comes to $102.93. The current status of this order is In Transit.",
+    "The order with ID 55667 was placed by Steven Miller, a loyal customer with ID 1624. Steven, who resides at 789 Redwood Court, Blüdhaven, and can be reached at 555-1624, has accumulated 130 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 901, variant 8, with a quantity of 3 at a price of $12.99 each; the second item is a product with ID 902, variant 2, with a quantity of 2 at a price of $16.50 each; the third item is a product with ID 903, variant 7, with a quantity of 1 at a price of $19.99. The total amount for the order comes to $91.45. The current status of this order is Processing.",
+    "The order with ID 77889 was placed by Patricia Taylor, a loyal customer with ID 1410. Patricia, who resides at 456 Aspen Grove, Fawcett City, and can be reached at 555-1410, has accumulated 140 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1001, variant 9, with a quantity of 1 at a price of $44.99; the second item is a product with ID 1002, variant 6, with a quantity of 4 at a price of $11.99 each. The total amount for the order comes to $92.95. The current status of this order is Delivered.",
+    "The order with ID 99001 was placed by Charles Anderson, a loyal customer with ID 1208. Charles, who resides at 123 Cherry Circle, Keystone City, and can be reached at 555-1208, has accumulated 65 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 1101, variant 5, with a quantity of 2 at a price of $13.99 each; the second item is a product with ID 1102, variant 1, with a quantity of 1 at a price of $18.99; the third item is a product with ID 1103, variant 4, with a quantity of 3 at a price of $9.99 each; the fourth item is a product with ID 1104, variant 7, with a quantity of 2 at a price of $21.00 each. The total amount for the order comes to $136.93. The current status of this order is In Transit.",
+    "The order with ID 11223 was placed by Deborah Lee, a loyal customer with ID 2057. Deborah, who resides at 456 Palm Street, Starling City, and can be reached at 555-2057, has accumulated 105 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 1201, variant 6, with a quantity of 1 at a price of $22.99; the second item is a product with ID 1202, variant 3, with a quantity of 2 at a price of $10.50 each; the third item is a product with ID 1203, variant 8, with a quantity of 4 at a price of $8.99 each. The total amount for the order comes to $88.94. The current status of this order is Shipped.",
+    "The order with ID 33445 was placed by Kevin Martin, a loyal customer with ID 1867. Kevin, who resides at 789 Oak Circle, Coast City, and can be reached at 555-1867, has accumulated 150 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1301, variant 2, with a quantity of 3 at a price of $15.99 each; the second item is a product with ID 1302, variant 4, with a quantity of 1 at a price of $28.50. The total amount for the order comes to $76.47. The current status of this order is Delivered.",
+    "The order with ID 55667 was placed by Laura Moore, a loyal customer with ID 1679. Laura, who resides at 123 Birch Boulevard, Blüdhaven, and can be reached at 555-1679, has accumulated 95 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 1401, variant 7, with a quantity of 2 at a price of $19.99 each; the second item is a product with ID 1402, variant 9, with a quantity of 3 at a price of $7.50 each; the third item is a product with ID 1403, variant 5, with a quantity of 1 at a price of $24.99; the fourth item is a product with ID 1404, variant 1, with a quantity of 2 at a price of $15.00 each. The total amount for the order comes to $117.97. The current status of this order is In Transit.",
+    "The order with ID 77889 was placed by Donald Martinez, a loyal customer with ID 1498. Donald, who resides at 456 Cedar Drive, Riverdale, and can be reached at 555-1498, has accumulated 125 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 1501, variant 8, with a quantity of 1 at a price of $34.99; the second item is a product with ID 1502, variant 2, with a quantity of 4 at a price of $9.99 each; the third item is a product with ID 1503, variant 3, with a quantity of 2 at a price of $14.50 each. The total amount for the order comes to $108.94. The current status of this order is Processing.",
+    "The order with ID 99001 was placed by Jessica Hall, a loyal customer with ID 1207. Jessica, who resides at 789 Pine Court, Gotham, and can be reached at 555-1207, has accumulated 135 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1601, variant 3, with a quantity of 1 at a price of $45.99; the second item is a product with ID 1602, variant 6, with a quantity of 3 at a price of $12.99 each. The total amount for the order comes to $84.96. The current status of this order is Delivered.",
+    "The order with ID 11223 was placed by Brian Robinson, a loyal customer with ID 2058. Brian, who resides at 123 Maple Street, Metropolis, and can be reached at 555-2058, has accumulated 85 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 1701, variant 4, with a quantity of 2 at a price of $11.99 each; the second item is a product with ID 1702, variant 7, with a quantity of 1 at a price of $32.50; the third item is a product with ID 1703, variant 1, with a quantity of 3 at a price of $9.49 each; the fourth item is a product with ID 1704, variant 8, with a quantity of 2 at a price of $14.99 each. The total amount for the order comes to $123.43. The current status of this order is Shipped.",
+    "The order with ID 33445 was placed by Kimberly Garcia, a loyal customer with ID 1870. Kimberly, who resides at 456 Oak Avenue, Star City, and can be reached at 555-1870, has accumulated 160 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 1801, variant 9, with a quantity of 1 at a price of $27.99; the second item is a product with ID 1802, variant 6, with a quantity of 2 at a price of $18.50 each; the third item is a product with ID 1803, variant 4, with a quantity of 4 at a price of $8.99 each. The total amount for the order comes to $100.45. The current status of this order is In Transit.",
+    "The order with ID 55667 was placed by Kenneth Harris, a loyal customer with ID 1680. Kenneth, who resides at 789 Palm Way, Central City, and can be reached at 555-1680, has accumulated 115 loyalty points and is currently an active customer. The order consists of two items: the first item is a product with ID 1901, variant 2, with a quantity of 3 at a price of $14.99 each; the second item is a product with ID 1902, variant 8, with a quantity of 1 at a price of $29.50. The total amount for the order comes to $74.47. The current status of this order is Delivered.",
+    "The order with ID 77889 was placed by Lisa Clark, a loyal customer with ID 1509. Lisa, who resides at 123 Pine Avenue, Smallville, and can be reached at 555-1509, has accumulated 140 loyalty points and is currently an active customer. The order consists of three items: the first item is a product with ID 2001, variant 3, with a quantity of 1 at a price of $19.99; the second item is a product with ID 2002, variant 5, with a quantity of 4 at a price of $8.99 each; the third item is a product with ID 2003, variant 7, with a quantity of 2 at a price of $12.50 each. The total amount for the order comes to $84.94. The current status of this order is Processing.",
+    "The order with ID 99001 was placed by Daniel King, a loyal customer with ID 1219. Daniel, who resides at 789 Cedar Lane, Keystone City, and can be reached at 555-1219, has accumulated 165 loyalty points and is currently an active customer. The order consists of four items: the first item is a product with ID 2101, variant 4, with a quantity of 2 at a price of $11.99 each; the second item is a product with ID 2102, variant 9, with a quantity of 1 at a price of $37.50; the third item is a product with ID 2103, variant 1, with a quantity of 3 at a price of $9.99 each; the fourth item is a product with ID 2104, variant 8, with a quantity of 2 at a price of $14.50 each. The total amount for the order comes to $131.93. The current status of this order is Shipped."
+  ]
+}
\ No newline at end of file
diff --git a/benchmarks/result.md b/benchmarks/result.md
new file mode 100644
index 00000000..d93a7504
--- /dev/null
+++ b/benchmarks/result.md
@@ -0,0 +1,27 @@
+# Benchmark
+This benchmark is far from comprehensive and more benchmarks will be added in the
+near future. 
+## Benchmark Setting
+CPU: AMD EPYC 7513 32-Core Processor
+
+GPU: NVIDIA RTX A5000
+## Schemas
+To summarize, `address` is a plain JSON schema, `linkedlist` is recursive,
+and `order` is a JSON schema that includes other nested schemas.
+You can find their definitions in `utils.py`.
+## Why warm up?
+`formatron` uses lazy caching,
+so the first run is typically about 15% slower than subsequent runs.
+Performing a warm-up run allows us to better measure latency under realistic workloads,
+where a few schemas are created but many requests are made.
+
+We also plan to add the "first-run" benchmark, which will measure the time taken from
+schema creation to the first run ends. 
+## vllm
+Default vllm setting are used.
+
+| model           | schema          | constrained(with warm-up) / tps | unconstrained / tps |
+|-----------------|-----------------|---------------------------------|---------------------|
+| Llama3-8B(fp16) | address_json    | 40.72                           | 42.02               |
+| Llama3-8B(fp16) | linkedlist_json | 40.57                           | 41.95               |
+| Llama3-8B(fp16) | order_json      | 40.10                           | 41.56               |
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
new file mode 100644
index 00000000..0a7ab6b3
--- /dev/null
+++ b/benchmarks/utils.py
@@ -0,0 +1,49 @@
+from typing import Optional
+
+from attr import dataclass
+from formatron.schemas.pydantic import ClassSchema
+
+
+class Address(ClassSchema):
+    street: str
+    city: str
+    state: Optional[str] = None
+    postal_code: str
+    country: str
+
+class LinkedList(ClassSchema):
+    value: int
+    next: Optional["LinkedList"]
+
+class OrderItem(ClassSchema):
+    product_id: int
+    variant_id: int
+    quantity: int
+    price: float
+
+class Customer(ClassSchema):
+    id: int
+    name: str
+    phone: str
+    address: str
+    loyalty_points: int = 0
+    is_active: bool = True
+
+class Order(ClassSchema):
+    id: int
+    customer: Customer
+    items: list[OrderItem]
+    total_amount: float
+    status: str
+
+@dataclass
+class BenchResult:
+    t1:int
+    s1:float
+    t2:int
+    s2:float
+
+@dataclass
+class Context:
+    index:int
+    tokens:int
\ No newline at end of file
diff --git a/benchmarks/vllm_json.py b/benchmarks/vllm_json.py
new file mode 100644
index 00000000..5d56c934
--- /dev/null
+++ b/benchmarks/vllm_json.py
@@ -0,0 +1,92 @@
+import json
+import typing
+from timeit import timeit
+
+from formatron.grammar_generators.json_generator import JsonGenerator
+from vllm import LLM, SamplingParams
+
+from utils import Order
+from utils import LinkedList
+from utils import BenchResult, Context
+from utils import Address
+from formatter import FormatterBuilder
+from integrations.vllm import create_formatters_logits_processor, FormattersLogitsProcessor
+
+def execute():
+    prompts = [
+        f"{system_prompt}{inputs[context.index]}<|eot_id|><|start_header_id|>assistant<|end_header_id|>",
+    ]
+    context.index+=1
+    outputs = llm.generate(prompts, sampling_params)
+    context.tokens += len(outputs[0].outputs[0].token_ids)
+
+    l = sampling_params.logits_processors
+    if l and isinstance(l[0], FormattersLogitsProcessor):
+        l[0].reset()
+
+
+def get_vllm_address():
+    f = FormatterBuilder()
+    f.append_line(f"```json\n{f.schema(Address, JsonGenerator(), capture_name='json')}```")
+    logits_processor = create_formatters_logits_processor(llm, [f])
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95,max_tokens=100, logits_processors=[logits_processor])
+    return sampling_params
+
+def get_vllm_linkedlist():
+    f = FormatterBuilder()
+    f.append_line(f"```json\n{f.schema(LinkedList, JsonGenerator(), capture_name='json')}```")
+    logits_processor = create_formatters_logits_processor(llm, [f])
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=100, logits_processors=[logits_processor])
+    return sampling_params
+
+def get_vllm_order():
+    f = FormatterBuilder()
+    f.append_line(f"```json\n{f.schema(Order, JsonGenerator(), capture_name='json')}```")
+    logits_processor = create_formatters_logits_processor(llm, [f])
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=256, logits_processors=[logits_processor])
+    return sampling_params
+
+def warm_up(f):
+    f()
+    context.index = 0
+    context.tokens = 0
+
+def log(func_name:str, data:BenchResult,f):
+    a = f"{func_name} generated {data.t1} tokens with {data.t1 / data.s1} tps (with warm up)\n"
+    b = (f"{func_name} unconstrained generated {data.t2} tokens with"
+          f" {data.t2 / data.s2} tps\n")
+    print(a)
+    print(b)
+    f.writelines([a,b])
+
+def bench(result:BenchResult, context:Context,func, bench_name:str, f):
+    context.index = 0
+    context.tokens = 0
+    result.s1 = (timeit(func, setup=lambda: warm_up(func), number=len(inputs)))
+    result.t1 = context.tokens
+    context.index = 0
+    context.tokens = 0
+    sampling_params.logits_processors = []
+    result.s2 = (timeit(func, number=len(inputs)))
+    result.t2 = context.tokens
+    log(bench_name, result, f)
+
+if __name__ == "__main__":
+    system_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+    You are a helpful AI assistant for information extraction.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+    Extract information into json format: """
+    llm = LLM(model="NurtureAI/Meta-Llama-3-8B-Instruct-32k", max_model_len=4096)
+    sampling_params = get_vllm_address()
+    data = BenchResult(0, 0, 0, 0)
+    context = Context(0, 0)
+    inputs = json.load(open("address.json"))["sentences"]
+    with open("vllm_json_bench.txt", "w") as f:
+        bench(data, context,execute, "vllm_address", f)
+        sampling_params = get_vllm_linkedlist()
+        inputs = json.load(open("linkedlist.json"))["sentences"]
+        bench(data, context, execute, "linkedlist", f)
+        sampling_params = get_vllm_order()
+        inputs = json.load(open("orders.json"))["orders"]
+        bench(data, context, execute, "orders", f)
\ No newline at end of file
diff --git a/benchmarks/vllm_json_bench.txt b/benchmarks/vllm_json_bench.txt
new file mode 100644
index 00000000..510ba070
--- /dev/null
+++ b/benchmarks/vllm_json_bench.txt
@@ -0,0 +1,6 @@
+vllm_address generated 1032 tokens with 40.715563518001495 tps (with warm up)
+vllm_address unconstrained generated 1500 tokens with 42.01920504622742 tps
+linkedlist generated 1262 tokens with 40.568280943625346 tps (with warm up)
+linkedlist unconstrained generated 1337 tokens with 41.946869871332254 tps
+orders generated 4271 tokens with 40.09933827742811 tps (with warm up)
+orders unconstrained generated 4589 tokens with 41.564089177085926 tps
diff --git a/pyproject.toml b/pyproject.toml
index bfc2a000..364dbea9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
 ]
 description = "Formatron empowers everyone to control the output format of language models with minimal overhead."
 readme = "README.md"
-dependencies = ["pydantic>=2","kbnf>=0.2.4"]
+dependencies = ["pydantic>=2","kbnf>=0.2.7"]
 license = {file = "LICENSE"}
 keywords = ["deep learning", "language model", "guided generation", "structured generation","constrained decoding"]
 requires-python = ">=3.10"
diff --git a/src/formatron/integrations/exllamav2.py b/src/formatron/integrations/exllamav2.py
index 2e13eccc..47f5ba6d 100644
--- a/src/formatron/integrations/exllamav2.py
+++ b/src/formatron/integrations/exllamav2.py
@@ -67,6 +67,9 @@ def begin(self, prefix_str: str) -> None:
             prompt = prefix_str.encode("utf-8")
             self._formatter.accept_bytes(prompt)
 
+    def reset(self)->None:
+        self._formatter.reset()
+
     def feed(self, token: int):
         self._formatter.accept_token(token)
 
diff --git a/src/formatron/integrations/transformers.py b/src/formatron/integrations/transformers.py
index dcc1fcbe..0bc3364b 100644
--- a/src/formatron/integrations/transformers.py
+++ b/src/formatron/integrations/transformers.py
@@ -61,6 +61,11 @@ def __init__(self, formatters: typing.Sequence[Formatter], eos_token_id: int,
             f"Number of formatters({len(formatters)}) must match number of configs({len(configs)})"
         self.configs = configs
 
+    def reset(self)->None:
+        self._last_input_id_length = None
+        for f in self._formatters:
+            f.reset()
+
     @property
     def formatters_captures(self)->list[dict[str,typing.Any]]:
         return [f.captures for f in self._formatters]
diff --git a/src/formatron/integrations/vllm.py b/src/formatron/integrations/vllm.py
index 4c40fc42..d36665bb 100644
--- a/src/formatron/integrations/vllm.py
+++ b/src/formatron/integrations/vllm.py
@@ -5,6 +5,7 @@
 import typing
 
 import kbnf
+import torch
 from vllm import LLM
 
 from config import EngineGenerationConfig
@@ -34,6 +35,12 @@ def __init__(self, formatters: typing.Sequence[Formatter], eos_token_id: int,
     def formatters_captures(self) -> list[dict[str, typing.Any]]:
         return [f.captures for f in self._formatters]
 
+    def reset(self)->None:
+        for f in self._formatters:
+            f.reset()
+        self._to_next_batch_step()
+        self._last_input_id_length = 0
+
     def _to_next_batch_step(self):
         self._iter = zip(self._formatters, self._configs)
         self._debug_counter = 0
@@ -58,11 +65,13 @@ def __call__(self, prompt, generated_tokens, logits):
             self._to_next_batch_step()
             result = next(self._iter)
             self._last_input_id_length += 1
+
         formatter, _ = result
         if len(generated_tokens) != 0:  # accept new token
             input_id = generated_tokens[-1]
             if input_id != self._eos_token_id:
                 formatter.accept_token(input_id)
+
         if formatter.is_completed():
             logits[:] = float("-inf")
             logits[self._eos_token_id] = 0.0