fix models.mlxlm whitespace prefix handling

dottxt-ai · Jun 23, 2024 · 566fe3c · 566fe3c
1 parent f20c774
commit 566fe3c
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 4 deletions.
diff --git a/outlines/models/mlxlm.py b/outlines/models/mlxlm.py
@@ -106,13 +106,20 @@ def stream(
         # https://github.com/ml-explore/mlx-examples/blob/4872727/llms/mlx_lm/utils.py#L267
         prompt_tokens = mx.array(self.mlx_tokenizer.encode(prompts))
 
+        detokenizer = self.mlx_tokenizer.detokenizer
+        detokenizer.reset()
+
         for (token, prob), n in zip(
             self.generate_step(prompt_tokens, **generate_kwargs),
             range(max_tokens),
         ):
             if token == self.tokenizer.eos_token_id:
                 break
-            yield self.tokenizer.decode([token])[0]
+            detokenizer.add_token(token)
+            yield detokenizer.last_segment
+
+        detokenizer.finalize()
+        yield detokenizer.last_segment
 
     def generate_step(
         self,

diff --git a/tests/generate/conftest.py b/tests/generate/conftest.py
@@ -17,7 +17,7 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "model_fixture" in item.fixturenames:
                 model_param = item.callspec.params.get("model_fixture", None)
-                if model_param == "model_mlxlm":
+                if model_param.startswith("model_mlxlm"):
                     item.add_marker(skip_marker)
 
 

diff --git a/tests/generate/test_generate.py b/tests/generate/test_generate.py
@@ -19,14 +19,19 @@ def model_mlxlm(tmp_path_factory):
     return models.mlxlm("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit")
 
 
+@pytest.fixture(scope="session")
+def model_mlxlm_phi3(tmp_path_factory):
+    return models.mlxlm("mlx-community/Phi-3-mini-4k-instruct-4bit")
+
+
 @pytest.fixture(scope="session")
 def model_transformers(tmp_path_factory):
     return models.transformers("Locutusque/TinyMistral-248M-v2-Instruct", device="cpu")
 
 
 @pytest.mark.parametrize(
     "model_fixture",
-    ("model_llamacpp", "model_mlxlm", "model_transformers"),
+    ("model_llamacpp", "model_mlxlm", "model_transformers", "model_mlxlm_phi3"),
 )
 def test_generate_text(request, model_fixture):
     model = request.getfixturevalue(model_fixture)
@@ -37,11 +42,12 @@ def test_generate_text(request, model_fixture):
 
 @pytest.mark.parametrize(
     "model_fixture",
-    ("model_llamacpp", "model_mlxlm", "model_transformers"),
+    ("model_llamacpp", "model_mlxlm", "model_transformers", "model_mlxlm_phi3"),
 )
 @pytest.mark.parametrize(
     "pattern",
     (
+        "a b c d e",  # test model tokenizer whitespace prefix handling
         "[0-9]",
         "abc*",
         "\\+?[1-9][0-9]{7,14}",