diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 047cea27..2da49e41 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -399,19 +399,7 @@ def progress(loaded_modules: int, total_modules: int) ExLlamaV2Tokenizer.extended_id_to_piece = {} ExLlamaV2Tokenizer.extended_piece_to_id = {} - try: - self.tokenizer = ExLlamaV2Tokenizer(self.config) - except AssertionError as exc: - if "HF tokenizer" in str(exc): - raise ImportError( - "Could not create ExllamaV2's tokenizer for this model " - "because tokenizers is not installed.\n" - "Please run the following command in your environment " - "to install extra packages:\n" - "pip install -U .[extras]" - ) from exc - else: - raise exc + self.tokenizer = ExLlamaV2Tokenizer(self.config) # Calculate autosplit reserve for all GPUs gpu_count = torch.cuda.device_count() diff --git a/pyproject.toml b/pyproject.toml index c4e0459d..00cda211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "loguru", "sse-starlette", "packaging", + "tokenizers", ] [project.urls] @@ -32,7 +33,6 @@ dependencies = [ [project.optional-dependencies] extras = [ - "tokenizers", "outlines", "lm-format-enforcer", ]