From 0836a9317f82c15b9948cae06aea841b024109e4 Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Sat, 23 Nov 2024 10:27:37 -0800 Subject: [PATCH 1/7] Grammar: Initial Formatron regex and JSON schema implementation * Replace LMFE's regex and JSON schema filters with Formatron's * Remove Outlines EBNF filter in preparation for Formatron KBNF filter * TODO: Implement Formatron KBNF filter --- backends/exllamav2/grammar.py | 156 ++++++++-------------------------- backends/exllamav2/model.py | 2 +- pyproject.toml | 3 +- 3 files changed, 39 insertions(+), 122 deletions(-) diff --git a/backends/exllamav2/grammar.py b/backends/exllamav2/grammar.py index 3ad2f44a..9c274915 100644 --- a/backends/exllamav2/grammar.py +++ b/backends/exllamav2/grammar.py @@ -1,110 +1,20 @@ import traceback from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer -from exllamav2.generator.filters import ExLlamaV2Filter, ExLlamaV2PrefixFilter -from lmformatenforcer import ( - JsonSchemaParser, - RegexParser, - TokenEnforcer, - CharacterLevelParser, -) -from lmformatenforcer.integrations.exllamav2 import ( - build_token_enforcer_tokenizer_data, -) +from exllamav2.generator.filters import ExLlamaV2Filter from loguru import logger from typing import List -from functools import lru_cache - -class OutlinesTokenizerWrapper: - """Wrapper for Outlines tokenizer""" - - def __init__(self, tokenizer): - self.tokenizer = tokenizer - id_to_piece = self.tokenizer.get_id_to_piece_list() - self.vocabulary = {piece: idx for idx, piece in enumerate(id_to_piece)} - self.eos_token_id = self.tokenizer.eos_token_id - self.eos_token = id_to_piece[self.tokenizer.eos_token_id] - self.special_tokens = list(self.tokenizer.extended_id_to_piece.keys()) - - def convert_token_to_string(self, token): - return token - - def decode(self, tokens): - s = "" - id_to_piece = self.tokenizer.get_id_to_piece_list() - for t in tokens: - s += id_to_piece[t] - return s - - -class ExLlamaV2EbnfFilter(ExLlamaV2Filter): - """Filter class for context-free grammar via outlines""" - - def __init__(self, model, tokenizer, grammar): - from outlines.fsm.fsm import CFGFSM - - super().__init__(model, tokenizer) - - self.wrapped_tokenizer = OutlinesTokenizerWrapper(tokenizer) - self.fsm = CFGFSM(grammar, self.wrapped_tokenizer) - self.state = self.fsm.first_state - - def begin(self, prefix_str=""): - self.state = self.fsm.first_state - - def feed(self, token): - self.state = self.fsm.next_state(self.state, token.item()) - - def next(self): - return self.fsm.allowed_token_ids(self.state), set() - - def use_background_worker(self): - return True - - -@lru_cache(10) -def _get_lmfe_tokenizer_data(tokenizer: ExLlamaV2Tokenizer): - return build_token_enforcer_tokenizer_data(tokenizer) - - -class ExLlamaV2TokenEnforcerFilter(ExLlamaV2Filter): - """Filter class for LMFE""" - - token_sequence: List[int] - - def __init__( - self, - model: ExLlamaV2, - tokenizer: ExLlamaV2Tokenizer, - character_level_parser: CharacterLevelParser, - ): - super().__init__(model, tokenizer) - tokenizer_data = _get_lmfe_tokenizer_data(tokenizer) - self.token_enforcer = TokenEnforcer(tokenizer_data, character_level_parser) - self.token_sequence = [] - - def begin(self, prefix_str: str): - self.token_sequence = [] - - def feed(self, token): - self.token_sequence.append(int(token[0][0])) - - def next(self): - allowed_tokens = self.token_enforcer.get_allowed_tokens(self.token_sequence) - if not hasattr(self, "allow_return_type_list"): - return set(allowed_tokens), set() - else: - return sorted(allowed_tokens), [] - - def use_background_worker(self): - return True +from formatron.formatter import FormatterBuilder +from formatron.schemas import json_schema +from formatron.integrations.exllamav2 import create_formatter_filter def clear_grammar_func_cache(): """Flush tokenizer_data cache to avoid holding references to tokenizers after unloading a model""" - _get_lmfe_tokenizer_data.cache_clear() + # TODO: Unsure if this is needed with formatron + pass class ExLlamaV2Grammar: @@ -117,7 +27,7 @@ def __init__(self): def add_json_schema_filter( self, - json_schema: dict, + schema: dict, model: ExLlamaV2, tokenizer: ExLlamaV2Tokenizer, ): @@ -125,7 +35,16 @@ def add_json_schema_filter( # Create the parser try: - schema_parser = JsonSchemaParser(json_schema) + # Add fields required by formatron if not present + if "$id" not in schema: + schema["$id"] = "https://example.com/example.json" + if "$schema" not in schema: + schema["$schema"] = "http://json-schema.org/draft-07/schema#" + + # Validate schema and create formatter + schema = json_schema.create_schema(schema) + f = FormatterBuilder() + f.append_line(f"{f.json(schema)}") except Exception: traceback.print_exc() logger.error( @@ -135,14 +54,10 @@ def add_json_schema_filter( return - # Allow JSON objects or JSON arrays at the top level - json_prefixes = ["[", "{"] - - lmfilter = ExLlamaV2TokenEnforcerFilter(model, tokenizer, schema_parser) - prefix_filter = ExLlamaV2PrefixFilter(model, tokenizer, json_prefixes) + lmfilter = create_formatter_filter(model, tokenizer, f) # Append the filters - self.filters.extend([lmfilter, prefix_filter]) + self.filters.append(lmfilter) def add_regex_filter( self, @@ -154,7 +69,9 @@ def add_regex_filter( # Create the parser try: - pattern_parser = RegexParser(pattern) + # Validate regex and create formatter + f = FormatterBuilder() + f.append_line(f"{f.regex(pattern)}") except Exception: traceback.print_exc() logger.error( @@ -164,32 +81,33 @@ def add_regex_filter( return - lmfilter = ExLlamaV2TokenEnforcerFilter(model, tokenizer, pattern_parser) + lmfilter = create_formatter_filter(model, tokenizer, f) # Append the filters self.filters.append(lmfilter) - def add_ebnf_filter( + def add_kbnf_filter( self, - ebnf_string: str, + kbnf_string: str, model: ExLlamaV2, tokenizer: ExLlamaV2Tokenizer, ): - """ - Add an EBNF grammar filter. - Possibly replace outlines with an in-house solution in the future. - """ + """Adds an ExllamaV2 filter based on KBNF grammar.""" + # Create the parser try: - ebnf_filter = ExLlamaV2EbnfFilter(model, tokenizer, ebnf_string) - except ImportError: + # Validate KBNF and create formatter + f = FormatterBuilder() + # TODO: Implement this + except Exception: logger.error( - "Skipping EBNF parsing because Outlines is not installed.\n" - "Please run the following command in your environment " - "to install extra packages:\n" - "pip install -U .[extras]" + "Skipping because the KBNF string couldn't be parsed. " + "Please read the above error for more information." ) return - self.filters.append(ebnf_filter) + lmfilter = create_formatter_filter(model, tokenizer, f) + + # Append the filters + self.filters.append(lmfilter) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index ff11531a..50cef42c 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -1194,7 +1194,7 @@ async def generate_gen( # Add EBNF filter if it exists grammar_string = unwrap(kwargs.get("grammar_string")) if grammar_string: - grammar_handler.add_ebnf_filter(grammar_string, self.model, self.tokenizer) + grammar_handler.add_kbnf_filter(grammar_string, self.model, self.tokenizer) # Set banned strings banned_strings: List[str] = unwrap(kwargs.get("banned_strings"), []) diff --git a/pyproject.toml b/pyproject.toml index de782b73..efa1d760 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "sse-starlette", "packaging", "tokenizers", - "lm-format-enforcer >= 0.9.6", + "formatron", "aiofiles", "aiohttp", "async_lru", @@ -53,7 +53,6 @@ dependencies = [ [project.optional-dependencies] extras = [ # Heavy dependencies that aren't for everyday use - "outlines", "infinity-emb", "sentence-transformers", ] From a9f39bcff36f934577d963904b9274dbdf984b92 Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Sat, 23 Nov 2024 12:05:41 -0800 Subject: [PATCH 2/7] Grammar: Preliminary Formatron KBNF support --- backends/exllamav2/grammar.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/backends/exllamav2/grammar.py b/backends/exllamav2/grammar.py index 9c274915..9621e3f8 100644 --- a/backends/exllamav2/grammar.py +++ b/backends/exllamav2/grammar.py @@ -1,4 +1,5 @@ import traceback +import typing from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer from exllamav2.generator.filters import ExLlamaV2Filter from loguru import logger @@ -7,6 +8,7 @@ from formatron.formatter import FormatterBuilder from formatron.schemas import json_schema from formatron.integrations.exllamav2 import create_formatter_filter +from formatron.extractor import NonterminalExtractor def clear_grammar_func_cache(): @@ -98,7 +100,11 @@ def add_kbnf_filter( try: # Validate KBNF and create formatter f = FormatterBuilder() - # TODO: Implement this + f.append_line( + f"{f.extractor( + lambda nonterminal: CustomExtractor(nonterminal, kbnf_string) + )}" + ) except Exception: logger.error( "Skipping because the KBNF string couldn't be parsed. " @@ -111,3 +117,19 @@ def add_kbnf_filter( # Append the filters self.filters.append(lmfilter) + + +class CustomExtractor(NonterminalExtractor): + def __init__(self, nonterminal: str, kbnf_string: str): + super().__init__(nonterminal) + self.kbnf_string = kbnf_string + + # Fails without an extract function defined + # No idea what it does or why it's needed, but this seems to work + # TODO: Figure out how to do this properly + def extract(self, input_str: str) -> typing.Optional[tuple[str, typing.Any]]: + return input_str[len(input_str) :], input_str[: len(input_str)] + + @property + def kbnf_definition(self) -> str: + return self.kbnf_string.replace("start", self.nonterminal) From 8f209efb99c0aa06f23098e0a47ffc3216fc1a64 Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Sun, 24 Nov 2024 10:44:45 -0800 Subject: [PATCH 3/7] Grammar: Clean up KBNF implementation * Also remove empty cache clear function --- backends/exllamav2/grammar.py | 30 +++++++++++------------------- backends/exllamav2/model.py | 3 --- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/backends/exllamav2/grammar.py b/backends/exllamav2/grammar.py index 9621e3f8..9a6b5204 100644 --- a/backends/exllamav2/grammar.py +++ b/backends/exllamav2/grammar.py @@ -1,22 +1,14 @@ import traceback import typing -from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer -from exllamav2.generator.filters import ExLlamaV2Filter -from loguru import logger from typing import List +from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer +from exllamav2.generator.filters import ExLlamaV2Filter +from formatron.extractor import NonterminalExtractor from formatron.formatter import FormatterBuilder -from formatron.schemas import json_schema from formatron.integrations.exllamav2 import create_formatter_filter -from formatron.extractor import NonterminalExtractor - - -def clear_grammar_func_cache(): - """Flush tokenizer_data cache to avoid holding references to - tokenizers after unloading a model""" - - # TODO: Unsure if this is needed with formatron - pass +from formatron.schemas import json_schema +from loguru import logger class ExLlamaV2Grammar: @@ -102,7 +94,7 @@ def add_kbnf_filter( f = FormatterBuilder() f.append_line( f"{f.extractor( - lambda nonterminal: CustomExtractor(nonterminal, kbnf_string) + lambda nonterminal: CFGExtractor(nonterminal, kbnf_string) )}" ) except Exception: @@ -119,16 +111,16 @@ def add_kbnf_filter( self.filters.append(lmfilter) -class CustomExtractor(NonterminalExtractor): +class CFGExtractor(NonterminalExtractor): + """Extractor class for KBNF context-free grammar""" + def __init__(self, nonterminal: str, kbnf_string: str): super().__init__(nonterminal) self.kbnf_string = kbnf_string - # Fails without an extract function defined - # No idea what it does or why it's needed, but this seems to work - # TODO: Figure out how to do this properly + # Return the entire input string as the extracted string def extract(self, input_str: str) -> typing.Optional[tuple[str, typing.Any]]: - return input_str[len(input_str) :], input_str[: len(input_str)] + return "", input_str @property def kbnf_definition(self) -> str: diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 50cef42c..64ed5b90 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -833,9 +833,6 @@ async def unload(self, loras_only: bool = False, **kwargs): # Wait for other jobs to finish await self.wait_for_jobs(kwargs.get("skip_wait")) - # Delete references held in the grammar module - clear_grammar_func_cache() - # Clear the image embedding cache clear_image_embedding_cache() From 6f2dc2ea99bcb9b430d27ec08047353fc0c5f3cd Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Sun, 24 Nov 2024 11:35:45 -0800 Subject: [PATCH 4/7] Grammar: Fix syntax, lint --- backends/exllamav2/grammar.py | 5 ++--- backends/exllamav2/model.py | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backends/exllamav2/grammar.py b/backends/exllamav2/grammar.py index 9a6b5204..62f50a31 100644 --- a/backends/exllamav2/grammar.py +++ b/backends/exllamav2/grammar.py @@ -93,9 +93,8 @@ def add_kbnf_filter( # Validate KBNF and create formatter f = FormatterBuilder() f.append_line( - f"{f.extractor( - lambda nonterminal: CFGExtractor(nonterminal, kbnf_string) - )}" + f"""{f.extractor(lambda nonterminal: + CFGExtractor(nonterminal, kbnf_string))}""" ) except Exception: logger.error( diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 64ed5b90..7a5324f3 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -39,7 +39,6 @@ from backends.exllamav2.grammar import ( ExLlamaV2Grammar, - clear_grammar_func_cache, ) from backends.exllamav2.utils import ( exllama_disabled_flash_attn, From 3c4211c963494618efc5253049444f588ddd118a Mon Sep 17 00:00:00 2001 From: kingbri <8082010+bdashore3@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:10:20 -0500 Subject: [PATCH 5/7] Dependencies: Ensure updated kbnf Signed-off-by: kingbri <8082010+bdashore3@users.noreply.github.com> --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index efa1d760..b27f4532 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "packaging", "tokenizers", "formatron", + "kbnf>=0.4.1", "aiofiles", "aiohttp", "async_lru", From 7f899734c012ed51b776be767f4f89736109afdf Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Thu, 5 Dec 2024 21:36:37 -0800 Subject: [PATCH 6/7] Grammar: Cache the engine vocabulary * Avoid rebuilding the KBNF engine vocabulary on every grammar-enabled request --- backends/exllamav2/grammar.py | 40 +++++++++++++++++++++++++++++++---- backends/exllamav2/model.py | 4 ++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/backends/exllamav2/grammar.py b/backends/exllamav2/grammar.py index 62f50a31..47c5ed58 100644 --- a/backends/exllamav2/grammar.py +++ b/backends/exllamav2/grammar.py @@ -1,12 +1,14 @@ import traceback import typing +from functools import lru_cache from typing import List +import torch from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer from exllamav2.generator.filters import ExLlamaV2Filter from formatron.extractor import NonterminalExtractor from formatron.formatter import FormatterBuilder -from formatron.integrations.exllamav2 import create_formatter_filter +from formatron.integrations.exllamav2 import FormatterFilter, create_engine_vocabulary from formatron.schemas import json_schema from loguru import logger @@ -48,7 +50,7 @@ def add_json_schema_filter( return - lmfilter = create_formatter_filter(model, tokenizer, f) + lmfilter = _create_formatter_filter(model, tokenizer, f) # Append the filters self.filters.append(lmfilter) @@ -75,7 +77,7 @@ def add_regex_filter( return - lmfilter = create_formatter_filter(model, tokenizer, f) + lmfilter = _create_formatter_filter(model, tokenizer, f) # Append the filters self.filters.append(lmfilter) @@ -104,7 +106,7 @@ def add_kbnf_filter( return - lmfilter = create_formatter_filter(model, tokenizer, f) + lmfilter = _create_formatter_filter(model, tokenizer, f) # Append the filters self.filters.append(lmfilter) @@ -124,3 +126,33 @@ def extract(self, input_str: str) -> typing.Optional[tuple[str, typing.Any]]: @property def kbnf_definition(self) -> str: return self.kbnf_string.replace("start", self.nonterminal) + + +@lru_cache(1) +def _create_cached_engine_vocabulary(tokenizer: ExLlamaV2Tokenizer): + """Build and cache engine vocabulary on first grammar run""" + + return create_engine_vocabulary(tokenizer) + + +def _create_formatter_filter( + model: ExLlamaV2, tokenizer: ExLlamaV2Tokenizer, formatter_builder: FormatterBuilder +) -> ExLlamaV2Filter: + """ + Create a formatter filter for the ExLlamaV2 engine. + Minimalist clone of formatron.integrations.exllamav2.create_formatter_filter + with lru_cache enabled for engine vocabulary + """ + + vocab = _create_cached_engine_vocabulary(tokenizer) + f = formatter_builder.build( + vocab, lambda tokens: tokenizer.decode(torch.tensor(tokens)) + ) + return FormatterFilter(model, tokenizer, f) + + +def clear_grammar_func_cache(): + """Flush tokenizer_data cache to avoid holding references to + tokenizers after unloading a model""" + + _create_cached_engine_vocabulary.cache_clear() diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 7a5324f3..50cef42c 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -39,6 +39,7 @@ from backends.exllamav2.grammar import ( ExLlamaV2Grammar, + clear_grammar_func_cache, ) from backends.exllamav2.utils import ( exllama_disabled_flash_attn, @@ -832,6 +833,9 @@ async def unload(self, loras_only: bool = False, **kwargs): # Wait for other jobs to finish await self.wait_for_jobs(kwargs.get("skip_wait")) + # Delete references held in the grammar module + clear_grammar_func_cache() + # Clear the image embedding cache clear_image_embedding_cache() From f25ac4b833efb1b364e73aba75da87e293601344 Mon Sep 17 00:00:00 2001 From: kingbri <8082010+bdashore3@users.noreply.github.com> Date: Wed, 11 Dec 2024 21:58:25 -0500 Subject: [PATCH 7/7] Dependencies: Update ExllamaV2 v0.2.6 Signed-off-by: kingbri <8082010+bdashore3@users.noreply.github.com> --- pyproject.toml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6f23bd0e..021b6a9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,12 +70,12 @@ cu121 = [ "torch @ https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'", # Exl2 - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+cu121.torch2.5.0-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+cu121.torch2.5.0-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+cu121.torch2.5.0-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+cu121.torch2.5.0-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+cu121.torch2.5.0-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+cu121.torch2.5.0-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.5.0-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.5.0-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.5.0-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.5.0-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.5.0-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.5.0-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'", # Windows FA2 from https://github.com/bdashore3/flash-attention/releases "flash_attn @ https://github.com/bdashore3/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu124torch2.5.1cxx11abiFALSE-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'", @@ -99,9 +99,9 @@ amd = [ "torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.4.1%2Brocm6.0-cp310-cp310-linux_x86_64.whl ; python_version == '3.10'", # Exl2 - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+rocm6.1.torch2.4.0-cp312-cp312-linux_x86_64.whl ; python_version == '3.12'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+rocm6.1.torch2.4.0-cp311-cp311-linux_x86_64.whl ; python_version == '3.11'", - "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.5/exllamav2-0.2.5+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl ; python_version == '3.10'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.0-cp312-cp312-linux_x86_64.whl ; python_version == '3.12'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.0-cp311-cp311-linux_x86_64.whl ; python_version == '3.11'", + "exllamav2 @ https://github.com/turboderp/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl ; python_version == '3.10'", ] # MARK: Ruff options