vllm-project · simon-mo · Nov 1, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/docs/source/serving/compatibility_matrix.rst b/docs/source/serving/compatibility_matrix.rst
@@ -283,7 +283,7 @@ Feature x Feature
      - ✅
      - ✅
      - ✅
-     - `✗ <https://github.com/vllm-project/vllm/issues/8985>`__ 
+     - `✗ <https://github.com/vllm-project/vllm/issues/9893>`__ 
      - ?
      - ✅
      - ✅

@@ -35,3 +35,23 @@ async def test_out_of_vocab_token_ids():
                                             prompt=[999999],
                                             max_tokens=5,
                                             temperature=0.0)
+
+
+@pytest.mark.asyncio
+async def test_reject_multistep_with_guided_decoding():
+    model_name = "gpt2"
+    server_args = ["--enforce-eager", "--num-scheduler-steps", "8"]
+    with RemoteOpenAIServer(model_name, server_args) as remote_server:
+        client = remote_server.get_async_client()
+
+        with pytest.raises(openai.BadRequestError,
+                           match=re.compile(
+                               '.*Guided decoding .* multi-step decoding.*')):
+            await client.completions.create(
+                model=model_name,
+                prompt="Hello",
+                max_tokens=5,
+                temperature=0.0,
+                extra_body={"response_format": {
+                    "type": "json_object"
+                }})
@@ -829,6 +829,13 @@ def add_request(
             raise ValueError(f"Got priority {priority} but "
                              "Priority scheduling is not enabled.")
 
+        if isinstance(params, SamplingParams) \
+            and (params.guided_decoding or params.logits_processors) \
+            and self.scheduler_config.num_scheduler_steps > 1:
+            raise ValueError(
+                "Guided decoding and logits processors are not supported "
+                "in multi-step decoding")
+
         if arrival_time is None:
             arrival_time = time.time()
 

diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
@@ -485,8 +485,8 @@ def __repr__(self) -> str:
             f"skip_special_tokens={self.skip_special_tokens}, "
             "spaces_between_special_tokens="
             f"{self.spaces_between_special_tokens}, "
-            f"truncate_prompt_tokens={self.truncate_prompt_tokens}), "
-            f"guided_decoding={self.guided_decoding}")
+            f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
+            f"guided_decoding={self.guided_decoding})")
 
 
 class BeamSearchParams(
-Original file line number
+Diff line change
@@ Expand Up / @@ -283,7 +283,7 @@ Feature x Feature @@
          - ✅
          - ✅
          - ✅
-         - `✗ <https://github.com/vllm-project/vllm/issues/8985>`__
+         - `✗ <https://github.com/vllm-project/vllm/issues/9893>`__
          - ?
          - ✅
          - ✅
@@ Expand Down @@