From c2ae3f1f0543e0b7a6089779044da63ed60cd966 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gabriel=20Mart=C3=ADn=20Bl=C3=A1zquez?=
 <gmartinbdev@gmail.com>
Date: Wed, 18 Dec 2024 11:34:55 +0100
Subject: [PATCH] Fix `vLLM` unload logic when model is `None` (#1080)

---
 src/distilabel/models/llms/_dummy.py          | 70 -------------------
 src/distilabel/models/llms/vllm.py            |  3 +
 .../integration/test_generator_and_sampler.py | 26 ++++++-
 tests/unit/models/llms/test_vllm.py           |  4 +-
 .../tasks/structured_outputs/test_outlines.py |  3 +
 5 files changed, 34 insertions(+), 72 deletions(-)
 delete mode 100644 src/distilabel/models/llms/_dummy.py

diff --git a/src/distilabel/models/llms/_dummy.py b/src/distilabel/models/llms/_dummy.py
deleted file mode 100644
index de89356d0f..0000000000
--- a/src/distilabel/models/llms/_dummy.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2023-present, Argilla, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import TYPE_CHECKING, Any, List
-
-from distilabel.models.llms.base import LLM, AsyncLLM
-from distilabel.models.mixins.magpie import MagpieChatTemplateMixin
-
-if TYPE_CHECKING:
-    from distilabel.models.llms.typing import GenerateOutput
-    from distilabel.steps.tasks.typing import FormattedInput
-
-
-class DummyAsyncLLM(AsyncLLM):
-    structured_output: Any = None
-
-    def load(self) -> None:
-        pass
-
-    @property
-    def model_name(self) -> str:
-        return "test"
-
-    async def agenerate(  # type: ignore
-        self, input: "FormattedInput", num_generations: int = 1
-    ) -> "GenerateOutput":
-        return ["output" for _ in range(num_generations)]
-
-
-class DummySyncLLM(LLM):
-    structured_output: Any = None
-
-    def load(self) -> None:
-        super().load()
-
-    @property
-    def model_name(self) -> str:
-        return "test"
-
-    def generate(  # type: ignore
-        self, inputs: "FormattedInput", num_generations: int = 1
-    ) -> "GenerateOutput":
-        return [["output" for _ in range(num_generations)] for _ in range(len(inputs))]
-
-
-class DummyMagpieLLM(LLM, MagpieChatTemplateMixin):
-    def load(self) -> None:
-        pass
-
-    @property
-    def model_name(self) -> str:
-        return "test"
-
-    def generate(
-        self, inputs: List["FormattedInput"], num_generations: int = 1, **kwargs: Any
-    ) -> List["GenerateOutput"]:
-        return [
-            ["Hello Magpie" for _ in range(num_generations)] for _ in range(len(inputs))
-        ]
diff --git a/src/distilabel/models/llms/vllm.py b/src/distilabel/models/llms/vllm.py
index 9a75cd47c2..401bc66d09 100644
--- a/src/distilabel/models/llms/vllm.py
+++ b/src/distilabel/models/llms/vllm.py
@@ -224,6 +224,9 @@ def unload(self) -> None:
         super().unload()
 
     def _cleanup_vllm_model(self) -> None:
+        if self._model is None:
+            return
+
         import torch  # noqa
         from vllm.distributed.parallel_state import (
             destroy_distributed_environment,
diff --git a/tests/integration/test_generator_and_sampler.py b/tests/integration/test_generator_and_sampler.py
index cdbeb5703a..5c53346f48 100644
--- a/tests/integration/test_generator_and_sampler.py
+++ b/tests/integration/test_generator_and_sampler.py
@@ -12,12 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from distilabel.models.llms._dummy import DummyAsyncLLM
+from typing import TYPE_CHECKING, Any
+
+from distilabel.models.llms.base import AsyncLLM
 from distilabel.pipeline import Pipeline
 from distilabel.steps import CombineOutputs, LoadDataFromDicts
 from distilabel.steps.generators.data_sampler import DataSampler
 from distilabel.steps.tasks import TextGeneration
 
+if TYPE_CHECKING:
+    from distilabel.typing import FormattedInput, GenerateOutput
+
+
+class DummyAsyncLLM(AsyncLLM):
+    structured_output: Any = None
+
+    def load(self) -> None:
+        pass
+
+    @property
+    def model_name(self) -> str:
+        return "test"
+
+    async def agenerate(  # type: ignore
+        self, input: "FormattedInput", num_generations: int = 1
+    ) -> "GenerateOutput":
+        return {
+            "generations": ["output" for _ in range(num_generations)],
+            "statistics": {},
+        }
+
 
 def get_pipeline():
     with Pipeline() as pipe:
diff --git a/tests/unit/models/llms/test_vllm.py b/tests/unit/models/llms/test_vllm.py
index 6babb6232c..2230186bf3 100644
--- a/tests/unit/models/llms/test_vllm.py
+++ b/tests/unit/models/llms/test_vllm.py
@@ -105,7 +105,9 @@ class Animal(BaseModel):
 class TestvLLM:
     @pytest.mark.parametrize(
         "multi_structured_output",
-        (True, False),
+        # TODO:  uncomment once with update our code to work with `outlines>0.1.0`
+        # (True, False),
+        (False,),
     )
     @pytest.mark.parametrize(
         "num_generations, expected_result",
diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py
index fc6f9a2f7c..e4eb2025c8 100644
--- a/tests/unit/steps/tasks/structured_outputs/test_outlines.py
+++ b/tests/unit/steps/tasks/structured_outputs/test_outlines.py
@@ -100,6 +100,9 @@ class DummyUserTest(BaseModel):
 }
 
 
+@pytest.mark.skip(
+    reason="won't work until we update our code to work with `outlines>0.1.0`"
+)
 class TestOutlinesIntegration:
     @pytest.mark.parametrize(
         "format, schema, prompt",