add link to online server api and move to instruct models

Signed-off-by: ismael-dm <[email protected]>
vllm-project · Nov 2, 2024 · ffc8a71 · ffc8a71
1 parent 9914dcf
commit ffc8a71
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 9 deletions.
diff --git a/docs/source/models/structured_outputs.rst b/docs/source/models/structured_outputs.rst
@@ -21,6 +21,8 @@ The following parameters are supported, which must be added as extra parameters:
 - ``guided_whitespace_pattern``: used to override the default whitespace pattern for guided json decoding.
 - ``guided_decoding_backend``: used to select the guided decoding backend to use.
 
+You can see the complete list of supported parameters on the `OpenAI Compatible Server </../serving/openai_compatible_server.html>`_ page. 
+
 Now let´s see an example for each of the cases, starting with the ``guided_choice``, as it´s the easiest one: 
 
 .. code-block:: python
@@ -32,7 +34,7 @@ Now let´s see an example for each of the cases, starting with the ``guided_choi
     )
 
     completion = client.chat.completions.create(
-        model="Qwen/Qwen2.5-3B",
+        model="Qwen/Qwen2.5-3B-Instruct",
         messages=[
             {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
         ],
@@ -46,7 +48,7 @@ The next example shows how to use the ``guided_regex``. The idea is to generate
 .. code-block:: python
 
     completion = client.chat.completions.create(
-        model="Qwen/Qwen2.5-3B",
+        model="Qwen/Qwen2.5-3B-Instruct",
         messages=[
             {
                 "role": "user",
@@ -86,7 +88,7 @@ The next example shows how to use the ``guided_json`` parameter with a Pydantic
     json_schema = CarDescription.model_json_schema()
 
     completion = client.chat.completions.create(
-        model="Qwen/Qwen2.5-3B",
+        model="Qwen/Qwen2.5-3B-Instruct",
         messages=[
             {
                 "role": "user",
@@ -122,7 +124,7 @@ It works by using a context free EBNF grammar, which for example we can use to d
     """
 
     completion = client.chat.completions.create(
-        model="Qwen/Qwen2.5-3B",
+        model="Qwen/Qwen2.5-3B-Instruct",
         messages=[
             {
                 "role": "user",

diff --git a/examples/offline_inference_structured_outputs.py b/examples/offline_inference_structured_outputs.py
@@ -5,7 +5,7 @@
 from vllm import LLM, SamplingParams
 from vllm.sampling_params import GuidedDecodingParams
 
-llm = LLM(model="Qwen/Qwen2.5-3B", max_model_len=100)
+llm = LLM(model="Qwen/Qwen2.5-3B-Instruct", max_model_len=100)
 
 # Guided decoding by Choice (list of possible options)
 guided_decoding_params = GuidedDecodingParams(choice=["Positive", "Negative"])

diff --git a/examples/openai_chat_completion_structured_outputs.py b/examples/openai_chat_completion_structured_outputs.py
@@ -10,7 +10,7 @@
 
 # Guided decoding by Choice (list of possible options)
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-3B",
+    model="Qwen/Qwen2.5-3B-Instruct",
     messages=[{
         "role": "user",
         "content": "Classify this sentiment: vLLM is wonderful!"
@@ -25,7 +25,7 @@
           "[email protected]\n")
 
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-3B",
+    model="Qwen/Qwen2.5-3B-Instruct",
     messages=[{
         "role": "user",
         "content": prompt,
@@ -57,7 +57,7 @@ class CarDescription(BaseModel):
 prompt = ("Generate a JSON with the brand, model and car_type of"
           "the most iconic car from the 90's")
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-3B",
+    model="Qwen/Qwen2.5-3B-Instruct",
     messages=[{
         "role": "user",
         "content": prompt,
@@ -84,7 +84,7 @@ class CarDescription(BaseModel):
 prompt = ("Generate an SQL query to show the 'username' and 'email'"
           "from the 'users' table.")
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-3B",
+    model="Qwen/Qwen2.5-3B-Instruct",
     messages=[{
         "role": "user",
         "content": prompt,