From 5e93c1f1c4dcce4b3cf208679e52b18f582ed8bb Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 29 Aug 2024 16:40:48 +0300 Subject: [PATCH 01/19] =?UTF-8?q?=F0=9F=9A=9A=20Better=20naming=20is=20pro?= =?UTF-8?q?vided?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * backend/test_openai_backend.py -> backend/test_openai.py * backend/test_deepsparse_backend.py -> backend/test_deepsparse.py --- .../backend/{test_deepsparse_backend.py => test_deepsparse.py} | 0 tests/unit/backend/{test_openai_backend.py => test_openai.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/unit/backend/{test_deepsparse_backend.py => test_deepsparse.py} (100%) rename tests/unit/backend/{test_openai_backend.py => test_openai.py} (100%) diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse.py similarity index 100% rename from tests/unit/backend/test_deepsparse_backend.py rename to tests/unit/backend/test_deepsparse.py diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai.py similarity index 100% rename from tests/unit/backend/test_openai_backend.py rename to tests/unit/backend/test_openai.py From cea679e9ec6b2439566c0bdbb775d281a7ed81cc Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 13:22:38 +0300 Subject: [PATCH 02/19] =?UTF-8?q?=E2=9C=A8=20vllm=20backend=20integration?= =?UTF-8?q?=20is=20added=20not=20tested?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 5 +- src/guidellm/backend/__init__.py | 16 ++++ src/guidellm/backend/base.py | 2 +- src/guidellm/backend/vllm/__init__.py | 21 +++++ src/guidellm/backend/vllm/backend.py | 122 ++++++++++++++++++++++++++ tests/unit/backend/test_vllm.py | 49 +++++++++++ 6 files changed, 213 insertions(+), 2 deletions(-) create mode 100644 src/guidellm/backend/vllm/__init__.py create mode 100644 src/guidellm/backend/vllm/backend.py create mode 100644 tests/unit/backend/test_vllm.py diff --git a/pyproject.toml b/pyproject.toml index 942c1cd..62bb64b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ dev = [ deepsparse = [ "deepsparse; python_version < '3.12'", ] +vllm = [ + "vllm", +] [project.entry-points.console_scripts] @@ -108,7 +111,7 @@ exclude = ["venv", ".tox"] follow_imports = 'silent' [[tool.mypy.overrides]] -module = ["deepsparse.*", "transformers.*"] +module = ["deepsparse.*", "transformers.*", "vllm.*"] ignore_missing_imports=true diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index b6d1b9d..4498498 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,3 +1,19 @@ from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse +<<<<<<< HEAD __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"] +======= +from .deepsparse.backend import DeepsparseBackend +from .openai import OpenAIBackend +from .vllm.backend import VllmBackend + +__all__ = [ + "Backend", + "BackendEngine", + "BackendEnginePublic", + "GenerativeResponse", + "OpenAIBackend", + "DeepsparseBackend", + "VllmBackend", +] +>>>>>>> 8a8e2ff (✨ vllm backend integration is added) diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 010cdd2..8500369 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -15,7 +15,7 @@ __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"] -BackendEnginePublic = Literal["openai_server", "deepsparse"] +BackendEnginePublic = Literal["openai_server", "deepsparse", "vllm"] BackendEngine = Union[BackendEnginePublic, Literal["test"]] diff --git a/src/guidellm/backend/vllm/__init__.py b/src/guidellm/backend/vllm/__init__.py new file mode 100644 index 0000000..e9da0f4 --- /dev/null +++ b/src/guidellm/backend/vllm/__init__.py @@ -0,0 +1,21 @@ +""" +This package encapsulates the "vLLM Backend" implementation. +ref: https://github.com/vllm-project/vllm + +The `vllm` package supports Python3.8..Python3.11, +when the `guidellm` start from Python3.8. + +Safe range of versions is Python3.8..Python3.11 +for the vLLM Backend implementation. +""" + +from guidellm.utils import check_python_version, module_is_available + +# Ensure that python is in valid range +check_python_version(min_version="3.8", max_version="3.11") + +# Ensure that vllm is installed +module_is_available( + module="vllm", + helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"), +) diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py new file mode 100644 index 0000000..db5180b --- /dev/null +++ b/src/guidellm/backend/vllm/backend.py @@ -0,0 +1,122 @@ +from typing import Any, AsyncGenerator, Dict, List, Optional + +from loguru import logger +from vllm import LLM, CompletionOutput, SamplingParams + +from guidellm.backend import Backend, GenerativeResponse +from guidellm.config import settings +from guidellm.core import TextGenerationRequest + + +@Backend.register(backend_type="deepsparse") +class VllmBackend(Backend): + """ + An vLLM Backend implementation for the generative AI result. + """ + + def __init__(self, model: Optional[str] = None, **request_args): + super().__init__( + type_="vllm", + model=self._get_model(model), + target="not used", + ) + + self._request_args: Dict[str, Any] = request_args + self.llm = LLM(self._model) + + logger.info(f"vLLM Backend uses model '{self._model}'") + + def _get_model(self, model_from_cli: Optional[str] = None) -> str: + """Provides the model by the next priority list: + 1. from function argument (comes from CLI) + 1. from environment variable + 2. `self.default_model` from `self.available_models` + """ + + if model_from_cli is not None: + return model_from_cli + elif settings.llm_model is not None: + logger.info( + "Using vLLM model from environment variable: " f"{settings.llm_model}" + ) + return settings.llm_model + else: + logger.info(f"Using default vLLM model: {self.default_model}") + return self.default_model + + async def make_request( + self, request: TextGenerationRequest + ) -> AsyncGenerator[GenerativeResponse, None]: + """ + Make a request to the vLLM Python API client. + + :param request: The result request to submit. + :type request: TextGenerationRequest + :return: An iterator over the generative responses. + :rtype: Iterator[GenerativeResponse] + """ + + logger.debug(f"Making request to vLLM backend with prompt: {request.prompt}") + + token_count = 0 + request_args = { + **self._request_args, + "inputs": [request.prompt], + "sampling_params": SamplingParams(max_tokens=request.output_token_count), + } + + final_response = GenerativeResponse( + type_="final", + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + + if not (result := self.llm.generate(**request_args)): + yield final_response + return + + try: + generations: List[CompletionOutput] = result[0].outputs + except IndexError: + yield final_response + return + + for generation in generations: + if not (token := generation.text): + yield GenerativeResponse( + type_="final", + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + break + else: + token_count += 1 + yield GenerativeResponse( + type_="token_iter", + add_token=token, + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + + def available_models(self) -> List[str]: + """ + Get the available models for the backend. + + ref: https://docs.vllm.ai/en/v0.4.1/models/supported_models.html + + :return: A list of available models. + :rtype: List[str] + """ + + return [ + "mistralai/Mistral-7B-Instruct-v0.3", + "meta-llama/Meta-Llama-3-8B-Instruct", + ] + + def _token_count(self, text: str) -> int: + token_count = len(text.split()) + logger.debug(f"Token count for text '{text}': {token_count}") + return token_count diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py new file mode 100644 index 0000000..13be052 --- /dev/null +++ b/tests/unit/backend/test_vllm.py @@ -0,0 +1,49 @@ +from typing import Dict, List, cast + +import pytest +from vllm import LLM + +from guidellm.backend import Backend, VllmBackend + + +@pytest.fixture(autouse=True) +def mock_vllm_llm(mocker): + llm = LLM( + model="facebook/opt-125m", + max_num_batched_tokens=4096, + tensor_parallel_size=1, + gpu_memory_utilization=0.10, + enforce_eager=True, + ) + + return mocker.patch("vllm.LLM", return_value=llm) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + "create_payload", + [ + {}, + {"model": "test/custom_llm"}, + ], +) +def test_backend_creation(create_payload: Dict): + """Test the "Deepspaarse Backend" class + with defaults and custom input parameters. + """ + + backends: List[VllmBackend] = cast( + List[VllmBackend], + [ + Backend.create("vllm", **create_payload), + VllmBackend(**create_payload), + ], + ) + + for backend in backends: + assert backend.llm + ( + backend.model == custom_model + if (custom_model := create_payload.get("model")) + else backend.default_model + ) From 440d4beb1dc372530dba0b35367877d905b12335 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 13:52:08 +0300 Subject: [PATCH 03/19] =?UTF-8?q?=E2=9C=85=20vllm=20tests=20are=20skipped?= =?UTF-8?q?=20if=20platform=20is=20not=20Linux?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/__init__.py | 16 ---------- src/guidellm/backend/openai.py | 2 +- src/guidellm/backend/vllm/__init__.py | 9 ++++-- src/guidellm/backend/vllm/backend.py | 2 +- tests/unit/backend/test_deepsparse.py | 7 +++++ tests/unit/backend/test_vllm.py | 42 +++++++++++++++++++-------- 6 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index 4498498..b6d1b9d 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,19 +1,3 @@ from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse -<<<<<<< HEAD __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"] -======= -from .deepsparse.backend import DeepsparseBackend -from .openai import OpenAIBackend -from .vllm.backend import VllmBackend - -__all__ = [ - "Backend", - "BackendEngine", - "BackendEnginePublic", - "GenerativeResponse", - "OpenAIBackend", - "DeepsparseBackend", - "VllmBackend", -] ->>>>>>> 8a8e2ff (✨ vllm backend integration is added) diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index b5cbc12..8a12c92 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -10,7 +10,7 @@ __all__ = ["OpenAIBackend"] -@Backend.register("openai_server") +@Backend.register(backend_type="openai_server") class OpenAIBackend(Backend): """ An OpenAI backend implementation for generative AI results. diff --git a/src/guidellm/backend/vllm/__init__.py b/src/guidellm/backend/vllm/__init__.py index e9da0f4..d3b06b4 100644 --- a/src/guidellm/backend/vllm/__init__.py +++ b/src/guidellm/backend/vllm/__init__.py @@ -1,5 +1,6 @@ """ This package encapsulates the "vLLM Backend" implementation. + ref: https://github.com/vllm-project/vllm The `vllm` package supports Python3.8..Python3.11, @@ -7,15 +8,19 @@ Safe range of versions is Python3.8..Python3.11 for the vLLM Backend implementation. + +In the end ensure that the `vllm` package is installed. """ from guidellm.utils import check_python_version, module_is_available -# Ensure that python is in valid range check_python_version(min_version="3.8", max_version="3.11") -# Ensure that vllm is installed module_is_available( module="vllm", helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"), ) + +from .backend import VllmBackend # noqa: E402 + +__all__ = ["VllmBackend"] diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py index db5180b..fd99b28 100644 --- a/src/guidellm/backend/vllm/backend.py +++ b/src/guidellm/backend/vllm/backend.py @@ -8,7 +8,7 @@ from guidellm.core import TextGenerationRequest -@Backend.register(backend_type="deepsparse") +@Backend.register(backend_type="vllm") class VllmBackend(Backend): """ An vLLM Backend implementation for the generative AI result. diff --git a/tests/unit/backend/test_deepsparse.py b/tests/unit/backend/test_deepsparse.py index 58e5761..cac49ca 100644 --- a/tests/unit/backend/test_deepsparse.py +++ b/tests/unit/backend/test_deepsparse.py @@ -1,3 +1,10 @@ +""" +This module includes unit tests for the Deepsparse backend. + +Notes: tests from this module are going to be skipped in case + the Python version is >= 3.12 according to the deepsparse limitation. +""" + import sys from typing import Any, Dict, Generator, List, Optional diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index 13be052..de432f6 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -1,14 +1,35 @@ -from typing import Dict, List, cast +""" +This module includes unit tests for the vLLM backend. + +Notes: tests from this module are going to be skipped in case + the rimtime platform is not a Linux / WSL according to vllm documentation. +""" + +import importlib +import sys +from typing import Dict import pytest -from vllm import LLM -from guidellm.backend import Backend, VllmBackend +from guidellm.backend import Backend + +pytestmark = pytest.mark.skipif( + sys.platform != "linux", + reason="Unsupported Platform. Try using Linux or WSL instead.", +) + + +@pytest.fixture(scope="module") +def backend_class(): + from guidellm.backend.vllm import VllmBackend + + return VllmBackend @pytest.fixture(autouse=True) def mock_vllm_llm(mocker): - llm = LLM( + module = importlib.import_module("vllm") + llm = module.LLM( model="facebook/opt-125m", max_num_batched_tokens=4096, tensor_parallel_size=1, @@ -27,18 +48,15 @@ def mock_vllm_llm(mocker): {"model": "test/custom_llm"}, ], ) -def test_backend_creation(create_payload: Dict): +def test_backend_creation(create_payload: Dict, backend_class): """Test the "Deepspaarse Backend" class with defaults and custom input parameters. """ - backends: List[VllmBackend] = cast( - List[VllmBackend], - [ - Backend.create("vllm", **create_payload), - VllmBackend(**create_payload), - ], - ) + backends = [ + Backend.create("vllm", **create_payload), + backend_class(**create_payload), + ] for backend in backends: assert backend.llm From 1a715a676b51f62d9d3474389bbdd6004e1d11c1 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 13:55:34 +0300 Subject: [PATCH 04/19] =?UTF-8?q?=F0=9F=93=8C=20vLLM=20python=20version=20?= =?UTF-8?q?is=20increased=20to=203.12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/vllm/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/guidellm/backend/vllm/__init__.py b/src/guidellm/backend/vllm/__init__.py index d3b06b4..b4f0504 100644 --- a/src/guidellm/backend/vllm/__init__.py +++ b/src/guidellm/backend/vllm/__init__.py @@ -14,7 +14,7 @@ from guidellm.utils import check_python_version, module_is_available -check_python_version(min_version="3.8", max_version="3.11") +check_python_version(min_version="3.8", max_version="3.12") module_is_available( module="vllm", From 14f4c42d20a73e8619ccb69e96637be86ae9afbf Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 14:02:39 +0300 Subject: [PATCH 05/19] =?UTF-8?q?=F0=9F=93=8C=20`vllm`=20dependency=20is?= =?UTF-8?q?=20skipped=20if=20platform=20IS=20NOT=20Linux?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- tox.ini | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 62bb64b..407260a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ deepsparse = [ "deepsparse; python_version < '3.12'", ] vllm = [ - "vllm", + "vllm; sys_platform == 'linux'", ] diff --git a/tox.ini b/tox.ini index 40611c5..76b04cc 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ env_list = py38,py39,py310,py311,py312 [testenv] description = Run all tests deps = - .[dev,deepsparse] + .[dev,deepsparse,vllm] commands = pytest tests/ {posargs} @@ -14,7 +14,7 @@ commands = [testenv:test-unit] description = Run unit tests deps = - .[dev,deepsparse] + .[dev,deepsparse,vllm] commands = python -m pytest tests/unit {posargs} @@ -22,7 +22,7 @@ commands = [testenv:test-integration] description = Run integration tests deps = - .[dev,deepsparse] + .[dev,deepsparse,vllm] commands = python -m pytest tests/integration {posargs} @@ -30,7 +30,7 @@ commands = [testenv:test-e2e] description = Run end-to-end tests deps = - .[dev,deepsparse] + .[dev,deepsparse,vllm] commands = python -m pytest tests/e2e {posargs} From 6e3087027217bf66e405fbc98a8b819eb2fa46b1 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 2 Sep 2024 08:58:33 +0300 Subject: [PATCH 06/19] =?UTF-8?q?=F0=9F=9A=A7=20WIP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 20 +++++++++ tests/unit/backend/test_vllm.py | 78 ++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 6 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..61aaac4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM --platform=linux/amd64 python:3.8-slim + +# Environment variables +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update \ + # dependencies for building Python packages && cleaning up unused files + && apt-get install -y build-essential \ + libcurl4-openssl-dev libssl-dev \ + && rm -rf /var/lib/apt/lists/* + + +# Python dependencies +RUN pip install --upgrade pip setuptools + +WORKDIR /app/ + +COPY ./ ./ + +RUN pip install -e '.[dev,deepsparse,vllm]' diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index de432f6..148c37c 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -7,16 +7,18 @@ import importlib import sys -from typing import Dict +from typing import Dict, List import pytest from guidellm.backend import Backend +from guidellm.config import reload_settings +from guidellm.core import TextGenerationRequest -pytestmark = pytest.mark.skipif( - sys.platform != "linux", - reason="Unsupported Platform. Try using Linux or WSL instead.", -) +# pytestmark = pytest.mark.skipif( +# sys.platform != "linux", +# reason="Unsupported Platform. Try using Linux or WSL instead.", +# ) @pytest.fixture(scope="module") @@ -29,7 +31,7 @@ def backend_class(): @pytest.fixture(autouse=True) def mock_vllm_llm(mocker): module = importlib.import_module("vllm") - llm = module.LLM( + llm = getattr(module, "LLM")( model="facebook/opt-125m", max_num_batched_tokens=4096, tensor_parallel_size=1, @@ -65,3 +67,67 @@ def test_backend_creation(create_payload: Dict, backend_class): if (custom_model := create_payload.get("model")) else backend.default_model ) + + +@pytest.mark.smoke() +def test_backend_model_from_env(mocker, backend_class): + mocker.patch.dict( + "os.environ", + {"GUIDELLM__LLM_MODEL": "test_backend_model_from_env"}, + ) + + reload_settings() + + backends = [Backend.create("vllm"), backend_class()] + + for backend in backends: + assert backend.model == "test_backend_model_from_env" + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + "text_generation_request_create_payload", + [ + {"prompt": "Test prompt"}, + {"prompt": "Test prompt", "output_token_count": 20}, + ], +) +@pytest.mark.asyncio() +async def test_make_request( + text_generation_request_create_payload: Dict, backend_class +): + backend = backend_class() + + output_tokens: List[str] = [] + async for response in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ): + if response.add_token: + output_tokens.append(response.add_token) + assert "".join(output_tokens) == "".join( + generation.text for generation in backend.pipeline._generations + ) + + if max_tokens := text_generation_request_create_payload.get("output_token_count"): + assert len(backend.pipeline._generations) == max_tokens + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("text_generation_request_create_payload", "error"), + [ + ({"prompt": "Test prompt"}, ValueError), + ], +) +@pytest.mark.asyncio() +async def test_make_request_invalid_request_payload( + text_generation_request_create_payload: Dict, error, backend_class +): + backend = backend_class() + with pytest.raises(error): + [ + respnose + async for respnose in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ) + ] From b0c0acb733ab98516c101a23c8434324da53516a Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 2 Sep 2024 23:25:24 +0300 Subject: [PATCH 07/19] =?UTF-8?q?=F0=9F=9A=A7=20WIP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/vllm/__init__.py | 8 ++-- tests/dummy/__init__.py | 2 +- tests/dummy/data/__init__.py | 3 +- tests/dummy/vllm.py | 65 +++++++++++++++++++++++++++ tests/unit/backend/test_vllm.py | 9 +--- 5 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 tests/dummy/vllm.py diff --git a/src/guidellm/backend/vllm/__init__.py b/src/guidellm/backend/vllm/__init__.py index b4f0504..18040a7 100644 --- a/src/guidellm/backend/vllm/__init__.py +++ b/src/guidellm/backend/vllm/__init__.py @@ -16,10 +16,10 @@ check_python_version(min_version="3.8", max_version="3.12") -module_is_available( - module="vllm", - helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"), -) +# module_is_available( +# module="vllm", +# helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"), +# ) from .backend import VllmBackend # noqa: E402 diff --git a/tests/dummy/__init__.py b/tests/dummy/__init__.py index a0cccdb..dc04765 100644 --- a/tests/dummy/__init__.py +++ b/tests/dummy/__init__.py @@ -5,4 +5,4 @@ test.dummy.data.openai_completion_factory - openai.types.Completion test factory """ -from . import data, services # noqa: F401 +from . import data, services, vllm # noqa: F401 diff --git a/tests/dummy/data/__init__.py b/tests/dummy/data/__init__.py index 95a2c94..4e5b820 100644 --- a/tests/dummy/data/__init__.py +++ b/tests/dummy/data/__init__.py @@ -1,3 +1,4 @@ +from . import vllm from .openai import openai_completion_factory, openai_model_factory -__all__ = ["openai_completion_factory", "openai_model_factory"] +__all__ = ["openai_completion_factory", "openai_model_factory", "vllm"] diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py new file mode 100644 index 0000000..7874c43 --- /dev/null +++ b/tests/dummy/vllm.py @@ -0,0 +1,65 @@ +""" +This module includes data models factories for the `vllm` 3-rd party package +""" + +import random +from functools import partial +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict, Field + +from guidellm.utils import random_strings + +__all__ = ["TestLLM", "CompletionOutput"] + + +class CompletionOutput(BaseModel): + """Test interface of `vllm.CompletionOutput`.""" + + text: str + + +class SamplingParams(BaseModel): + """Test interface of `vllm.SamplingParams`.""" + + max_tokens: int + + +class TestLLM(BaseModel): + """Test interface of `vllm.LLM`. + + Args: + _outputs_number(int | None): the number of generated tokens per output. + Should be used only for testing purposes. + Default: randint(10..20) + + """ + + model_config = ConfigDict( + extra="allow", + validate_assignment=True, + arbitrary_types_allowed=True, + from_attributes=True, + ) + + model: str + max_num_batched_tokens: int + + _outputs_number: int = Field(default_factory=partial(random.randint, 10, 20)) + + def _generate_completion_outputs(self, max_tokens: int) -> List[CompletionOutput]: + self._outputs_number = random.randint(10, 20) + + return [ + CompletionOutput(text=text) + for text in random_strings( + min_chars=0, max_chars=max_tokens, n=self._outputs_number + ) + ] + + def generate( + self, inputs: List[str], sampling_params: SamplingParams + ) -> Optional[List[List[CompletionOutput]]]: + return [ + self._generate_completion_outputs(max_tokens=sampling_params.max_tokens) + ] diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index 148c37c..a77e5fd 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -5,8 +5,6 @@ the rimtime platform is not a Linux / WSL according to vllm documentation. """ -import importlib -import sys from typing import Dict, List import pytest @@ -14,6 +12,7 @@ from guidellm.backend import Backend from guidellm.config import reload_settings from guidellm.core import TextGenerationRequest +from tests import dummy # pytestmark = pytest.mark.skipif( # sys.platform != "linux", @@ -30,13 +29,9 @@ def backend_class(): @pytest.fixture(autouse=True) def mock_vllm_llm(mocker): - module = importlib.import_module("vllm") - llm = getattr(module, "LLM")( + llm = dummy.vllm.TestLLM( model="facebook/opt-125m", max_num_batched_tokens=4096, - tensor_parallel_size=1, - gpu_memory_utilization=0.10, - enforce_eager=True, ) return mocker.patch("vllm.LLM", return_value=llm) From 9f431ea8beb81091e13229bf82ede6174b1cba3c Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 2 Sep 2024 23:35:01 +0300 Subject: [PATCH 08/19] dummy.data.openai is removed --- tests/dummy/__init__.py | 3 -- tests/dummy/data/__init__.py | 4 --- tests/dummy/data/openai.py | 54 ------------------------------------ tests/dummy/vllm.py | 3 +- 4 files changed, 2 insertions(+), 62 deletions(-) delete mode 100644 tests/dummy/data/openai.py diff --git a/tests/dummy/__init__.py b/tests/dummy/__init__.py index dc04765..216b702 100644 --- a/tests/dummy/__init__.py +++ b/tests/dummy/__init__.py @@ -1,8 +1,5 @@ """ The tests.dummy package package represents dummy data factories and test services. - -test.dummy.data.openai_model_factory - openai.types.Model test factory -test.dummy.data.openai_completion_factory - openai.types.Completion test factory """ from . import data, services, vllm # noqa: F401 diff --git a/tests/dummy/data/__init__.py b/tests/dummy/data/__init__.py index 4e5b820..e69de29 100644 --- a/tests/dummy/data/__init__.py +++ b/tests/dummy/data/__init__.py @@ -1,4 +0,0 @@ -from . import vllm -from .openai import openai_completion_factory, openai_model_factory - -__all__ = ["openai_completion_factory", "openai_model_factory", "vllm"] diff --git a/tests/dummy/data/openai.py b/tests/dummy/data/openai.py deleted file mode 100644 index 6e16865..0000000 --- a/tests/dummy/data/openai.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -This module includes data models factories for openai 3-rd party package -""" - -import random -import string -import time -import uuid -from typing import Generator - -from openai.types import Completion, Model - - -def words(n: int = 1) -> Generator[str, None, None]: - for _ in range(n): - yield "".join( - random.choice(string.ascii_letters) for _ in range(random.randint(3, 10)) - ) - - -def openai_completion_factory( - n: int = 3, - **kwargs, -) -> Generator[Completion, None, None]: - """ - The factory that yields the openai Completion instance. - """ - - for i in range(1, n + 1): - payload = { - "id": str(uuid.uuid4()), - "choices": [], - "stop": not i < n, - "content": " ".join(words(random.randint(3, 10))) if i < n else "", - "object": "text_completion", - "model": "mock-model", - "created": int(time.time()), - } - payload.update(kwargs) - - yield Completion(**payload) # type: ignore - - -def openai_model_factory(n: int = 3) -> Generator[Model, None, None]: - """ - The factory that yields the random openai Model instance. - """ - for _ in range(n): - yield Model( - id=str(uuid.uuid4()), - created=int(time.time()), - object="model", - owned_by="neuralmagic", - ) diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py index 7874c43..6428b7e 100644 --- a/tests/dummy/vllm.py +++ b/tests/dummy/vllm.py @@ -45,7 +45,8 @@ class TestLLM(BaseModel): model: str max_num_batched_tokens: int - _outputs_number: int = Field(default_factory=partial(random.randint, 10, 20)) + # NOTE: This value is used only for testing purposes + outputs_number: int = Field(default_factory=partial(random.randint, 10, 20)) def _generate_completion_outputs(self, max_tokens: int) -> List[CompletionOutput]: self._outputs_number = random.randint(10, 20) From 832b316ebbe9a1703dcbb6076e769e4f09112d9d Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 4 Sep 2024 13:55:36 +0300 Subject: [PATCH 09/19] WIP Docker tests --- ' | 26 ++++ .dockerignore | 14 ++ DEVELOPING.md | 19 ++- Dockerfile | 6 +- requirements.txt | 184 +++++++++++++++++++++++++++ src/guidellm/backend/vllm/backend.py | 4 +- 6 files changed, 247 insertions(+), 6 deletions(-) create mode 100644 ' create mode 100644 .dockerignore create mode 100644 requirements.txt diff --git a/' b/' new file mode 100644 index 0000000..bce016a --- /dev/null +++ b/' @@ -0,0 +1,26 @@ +BUILDPLATFORM=linux/amd64 +PYTHONPATH=/Users/dmytroparfeniuk/Projects/neuralmagic/vllm/ + +# commands +# https://huggingface.co/datasets/openai/openai_humaneval +alias act="source .tox/py39/bin/activate" +alias install="python -m pip install -e '.[dev,deepsparse]'" +alias run='python -m src.guidellm.main --data=openai_humaneval --max-requests=1 --max-seconds=20 --rate-type=constant --rate=1.0 --backend=deepsparse --model=' +alias e2e='python -m pytest -vvv -s tests/e2e' + +alias test='docker run --rm --env-file='.env' -v ./:/app/guidellm guidellm:latest pytest -s -vvv tests/unit/backend/test_vllm.py::test_backend_creation' + +alias unit='python -m pytest -vvv -s tests/unit' +alias fix='python -m ruff check --fix' +alias check='python -m ruff check . && python -m mypy --check-untyped-defs && python -m mdformat --check README.md DEVELOPING.md docs/ src/ tests/' +alias types='python -m mypy --check-untyped-defs' +alias clean='rm -rf build dist .mypy_cache .pytest_cache .tox .ruff_cache .coverage && find . -type f- name ".pyc" | xargs rm && find . -type d -name "__pycache__" -exec rm -r {} + && rm-rf *.egg-info' + +# Application +GUIDELLM__LOGGING__CONSOLE_LOG_LEVEL=WARNING + +# OpenAI +# GUIDELLM__OPENAI__BASE_URL=http://127.0.0.1:8080 +GUIDELLM__OPENAI__BASE_URL=http://192.168.50.36:7070 +GUIDELLM__OPENAI__API_KEY=invalid + diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5002a2b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +venv*/ +Dockerfile +.gitignore +.env +.git +.github/ +.ruff_cache/ +.pre-commit-config.yaml +docs/ +*.md +LICENSE +MANIFEST.in +__pycache__/ +*.egg-info/ diff --git a/DEVELOPING.md b/DEVELOPING.md index b230366..f19079a 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -33,13 +33,30 @@ cd guidellm pip install -e .[dev] ``` -If you work with `deepsparse` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run: +In case of working with `deepsparse` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run: ```sh pip install -e .[deepsparse] # or pip install -e '.[deepsparse]' ``` +In case of working with `vllm` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run: + +```sh +pip install -e .[vllm] +# or pip install -e '.[vllm]' +``` + +According to the [installation guide](https://docs.vllm.ai/en/v0.4.0.post1/getting_started/installation.html) `vllm` is supported only on **Linux**. It means that running the application and tests will fail. + +Workaround with Docker: +```sh +cd guidellm/ +docker build -t guidellm:latest . +docker run -v ./:./ guidellm:latest python -m pytest -s -v src/unit/backend/test_vllm.py +``` + + ## Project Structure The project follows a standard Python project structure: diff --git a/Dockerfile b/Dockerfile index 61aaac4..c880287 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,9 +12,9 @@ RUN apt-get update \ # Python dependencies RUN pip install --upgrade pip setuptools +COPY requirements.txt ./ +RUN pip install -r requirements.txt -WORKDIR /app/ +WORKDIR /app/ COPY ./ ./ - -RUN pip install -e '.[dev,deepsparse,vllm]' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4a05ce7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,184 @@ +aiohappyeyeballs==2.4.0 +aiohttp==3.10.5 +aiosignal==1.3.1 +alabaster==0.7.13 +annotated-types==0.7.0 +anyio==4.4.0 +async-timeout==4.0.3 +attrs==24.2.0 +audioread==3.0.1 +babel==2.16.0 +cachetools==5.5.0 +certifi==2024.8.30 +cffi==1.17.0 +cfgv==3.4.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +colorama==0.4.6 +coverage==7.6.1 +datasets==2.21.0 +decorator==5.1.1 +dill==0.3.8 +diskcache==5.6.3 +distlib==0.3.8 +distro==1.9.0 +docutils==0.20.1 +exceptiongroup==1.2.2 +fastapi==0.112.2 +filelock==3.15.4 +frozenlist==1.4.1 +fsspec==2024.6.1 +ftfy==6.2.3 +gguf==0.9.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.2 +huggingface-hub==0.24.6 +identify==2.6.0 +idna==3.8 +imagesize==1.4.1 +importlib_metadata==8.4.0 +importlib_resources==6.4.4 +iniconfig==2.0.0 +interegular==0.3.3 +Jinja2==3.1.4 +jiter==0.5.0 +joblib==1.4.2 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +lark==1.2.2 +lazy_loader==0.4 +librosa==0.10.2.post1 +linkify-it-py==2.0.3 +llvmlite==0.41.1 +lm-format-enforcer==0.10.6 +loguru==0.7.2 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +mdformat==0.7.17 +mdformat-gfm==0.3.6 +mdformat_footnote==0.1.1 +mdformat_frontmatter==2.0.8 +mdformat_tables==1.0.0 +mdit-py-plugins==0.4.1 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +msgspec==0.18.6 +multidict==6.0.5 +multiprocess==0.70.16 +mypy==1.10.1 +mypy-extensions==1.0.0 +nest-asyncio==1.6.0 +networkx==3.1 +nodeenv==1.9.1 +numba==0.58.1 +numpy==1.24.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==12.560.30 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.6.68 +nvidia-nvtx-cu12==12.1.105 +openai==1.43.0 +outlines==0.0.46 +packaging==24.1 +pandas==2.0.3 +pillow==10.4.0 +pkgutil_resolve_name==1.3.10 +platformdirs==4.2.2 +pluggy==1.5.0 +pooch==1.8.2 +pre-commit==3.5.0 +prometheus-fastapi-instrumentator==7.0.0 +prometheus_client==0.20.0 +protobuf==5.28.0 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pyairports==2.1.1 +pyarrow==17.0.0 +pycountry==24.6.1 +pycparser==2.22 +pydantic==2.8.2 +pydantic-settings==2.4.0 +pydantic_core==2.20.1 +Pygments==2.18.0 +pyproject-api==1.7.1 +pytest==8.2.2 +pytest-asyncio==0.23.8 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pytest-rerunfailures==14.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 +PyYAML==6.0.2 +pyzmq==26.2.0 +ray==2.10.0 +referencing==0.35.1 +regex==2024.7.24 +requests==2.32.3 +requests-mock==1.12.1 +rich==13.8.0 +rpds-py==0.20.0 +ruamel.yaml==0.18.6 +ruamel.yaml.clib==0.2.8 +ruff==0.5.7 +safetensors==0.4.4 +scikit-learn==1.3.2 +scipy==1.10.1 +sentencepiece==0.2.0 +six==1.16.0 +sniffio==1.3.1 +snowballstemmer==2.2.0 +soundfile==0.12.1 +soxr==0.3.7 +Sphinx==7.1.2 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +starlette==0.38.4 +sympy==1.13.2 +threadpoolctl==3.5.0 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +torch==2.4.0 +torchvision==0.19.0 +tox==4.16.0 +tqdm==4.66.5 +transformers==4.44.2 +triton==3.0.0 +types-click==7.1.8 +types-PyYAML==6.0.12.20240808 +types-requests==2.32.0.20240712 +types-toml==0.10.8.20240310 +typing_extensions==4.12.2 +tzdata==2024.1 +uc-micro-py==1.0.3 +urllib3==2.2.2 +uvicorn==0.30.6 +uvloop==0.20.0 +virtualenv==20.26.3 +vllm==0.5.5 +vllm-flash-attn==2.6.1 +watchfiles==0.24.0 +wcwidth==0.2.13 +websockets==13.0.1 +xformers==0.0.27.post2 +xxhash==3.5.0 +yarl==1.9.7 +zipp==3.20.1 diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py index fd99b28..434c12f 100644 --- a/src/guidellm/backend/vllm/backend.py +++ b/src/guidellm/backend/vllm/backend.py @@ -15,15 +15,15 @@ class VllmBackend(Backend): """ def __init__(self, model: Optional[str] = None, **request_args): + self._request_args: Dict[str, Any] = request_args + super().__init__( type_="vllm", model=self._get_model(model), target="not used", ) - self._request_args: Dict[str, Any] = request_args self.llm = LLM(self._model) - logger.info(f"vLLM Backend uses model '{self._model}'") def _get_model(self, model_from_cli: Optional[str] = None) -> str: From 3d8c80f0f985caef465fda8dd76c384088263cf1 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 4 Sep 2024 21:17:14 +0300 Subject: [PATCH 10/19] removed tmp file --- ' | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 ' diff --git a/' b/' deleted file mode 100644 index bce016a..0000000 --- a/' +++ /dev/null @@ -1,26 +0,0 @@ -BUILDPLATFORM=linux/amd64 -PYTHONPATH=/Users/dmytroparfeniuk/Projects/neuralmagic/vllm/ - -# commands -# https://huggingface.co/datasets/openai/openai_humaneval -alias act="source .tox/py39/bin/activate" -alias install="python -m pip install -e '.[dev,deepsparse]'" -alias run='python -m src.guidellm.main --data=openai_humaneval --max-requests=1 --max-seconds=20 --rate-type=constant --rate=1.0 --backend=deepsparse --model=' -alias e2e='python -m pytest -vvv -s tests/e2e' - -alias test='docker run --rm --env-file='.env' -v ./:/app/guidellm guidellm:latest pytest -s -vvv tests/unit/backend/test_vllm.py::test_backend_creation' - -alias unit='python -m pytest -vvv -s tests/unit' -alias fix='python -m ruff check --fix' -alias check='python -m ruff check . && python -m mypy --check-untyped-defs && python -m mdformat --check README.md DEVELOPING.md docs/ src/ tests/' -alias types='python -m mypy --check-untyped-defs' -alias clean='rm -rf build dist .mypy_cache .pytest_cache .tox .ruff_cache .coverage && find . -type f- name ".pyc" | xargs rm && find . -type d -name "__pycache__" -exec rm -r {} + && rm-rf *.egg-info' - -# Application -GUIDELLM__LOGGING__CONSOLE_LOG_LEVEL=WARNING - -# OpenAI -# GUIDELLM__OPENAI__BASE_URL=http://127.0.0.1:8080 -GUIDELLM__OPENAI__BASE_URL=http://192.168.50.36:7070 -GUIDELLM__OPENAI__API_KEY=invalid - From 7836d452a27dd4ba53da519c248d54c004071479 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 6 Sep 2024 14:59:30 +0300 Subject: [PATCH 11/19] Dockerfile remove COPY --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c880287..ae9335c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,4 +17,3 @@ RUN pip install -r requirements.txt WORKDIR /app/ -COPY ./ ./ From defe53d41d746e70b3887d8c78cb774beb78e91b Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 9 Sep 2024 12:45:15 +0300 Subject: [PATCH 12/19] =?UTF-8?q?=F0=9F=9A=A7=20WIP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- src/guidellm/backend/base.py | 1 + src/guidellm/backend/vllm/backend.py | 12 ++++----- tests/dummy/vllm.py | 1 + tests/unit/backend/test_vllm.py | 39 ++++++++++++++++++---------- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index ae9335c..779d835 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM --platform=linux/amd64 python:3.8-slim # Environment variables ENV PYTHONUNBUFFERED=1 +ENV PYTHONPATH=/app/guidellm/src/ RUN apt-get update \ # dependencies for building Python packages && cleaning up unused files @@ -15,5 +16,4 @@ RUN pip install --upgrade pip setuptools COPY requirements.txt ./ RUN pip install -r requirements.txt - WORKDIR /app/ diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 8500369..becf1b2 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -228,6 +228,7 @@ async def submit(self, request: TextGenerationRequest) -> TextGenerationResult: result.start(request.prompt) received_final = False + breakpoint() # TODO: remove async for response in self.make_request(request): logger.debug("Received response: {}", response) if response.type_ == "token_iter": diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py index 434c12f..2899311 100644 --- a/src/guidellm/backend/vllm/backend.py +++ b/src/guidellm/backend/vllm/backend.py @@ -14,16 +14,13 @@ class VllmBackend(Backend): An vLLM Backend implementation for the generative AI result. """ - def __init__(self, model: Optional[str] = None, **request_args): + def __init__(self, model: str = settings.llm_model, **request_args): + _model = self._get_model(model) self._request_args: Dict[str, Any] = request_args + self.llm = LLM(_model) - super().__init__( - type_="vllm", - model=self._get_model(model), - target="not used", - ) + super().__init__(type_="vllm", model=_model, target="not used") - self.llm = LLM(self._model) logger.info(f"vLLM Backend uses model '{self._model}'") def _get_model(self, model_from_cli: Optional[str] = None) -> str: @@ -72,6 +69,7 @@ async def make_request( output_token_count=token_count, ) + breakpoint() # TODO: remove if not (result := self.llm.generate(**request_args)): yield final_response return diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py index 6428b7e..e82f9cc 100644 --- a/tests/dummy/vllm.py +++ b/tests/dummy/vllm.py @@ -61,6 +61,7 @@ def _generate_completion_outputs(self, max_tokens: int) -> List[CompletionOutput def generate( self, inputs: List[str], sampling_params: SamplingParams ) -> Optional[List[List[CompletionOutput]]]: + breakpoint() # TODO: remove return [ self._generate_completion_outputs(max_tokens=sampling_params.max_tokens) ] diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index a77e5fd..fc2bc50 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -5,19 +5,20 @@ the rimtime platform is not a Linux / WSL according to vllm documentation. """ -from typing import Dict, List +import sys +from typing import Callable, Dict, List, Optional import pytest from guidellm.backend import Backend -from guidellm.config import reload_settings +from guidellm.config import reload_settings, settings from guidellm.core import TextGenerationRequest from tests import dummy -# pytestmark = pytest.mark.skipif( -# sys.platform != "linux", -# reason="Unsupported Platform. Try using Linux or WSL instead.", -# ) +pytestmark = pytest.mark.skipif( + sys.platform != "linux", + reason="Unsupported Platform. Try using Linux or WSL instead.", +) @pytest.fixture(scope="module") @@ -28,13 +29,23 @@ def backend_class(): @pytest.fixture(autouse=True) -def mock_vllm_llm(mocker): - llm = dummy.vllm.TestLLM( - model="facebook/opt-125m", - max_num_batched_tokens=4096, - ) +def vllm_patch_factory(mocker) -> Callable[[str], dummy.vllm.TestLLM]: + """ + Skip VLLM initializer due to external calls. + Replace VllmBackend.llm object with mock representation. + """ - return mocker.patch("vllm.LLM", return_value=llm) + def inner(model: Optional[str] = None, max_tokens: Optional[int] = None): + + return mocker.patch( + "vllm.LLM.__new__", + return_value=dummy.vllm.TestLLM( + model=model or settings.llm_model, + max_num_batched_tokens=max_tokens or 4096, + ), + ) + + return inner @pytest.mark.smoke() @@ -45,11 +56,13 @@ def mock_vllm_llm(mocker): {"model": "test/custom_llm"}, ], ) -def test_backend_creation(create_payload: Dict, backend_class): +def test_backend_creation(create_payload: Dict, backend_class, vllm_patch_factory): """Test the "Deepspaarse Backend" class with defaults and custom input parameters. """ + vllm_patch_factory(model=create_payload.get("model")) + backends = [ Backend.create("vllm", **create_payload), backend_class(**create_payload), From 907135848ac43a6cb295cc365f2c4bf62a1617a4 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 9 Sep 2024 20:40:59 +0300 Subject: [PATCH 13/19] dockerfile is improved --- Dockerfile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 779d835..d9c0bbe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,10 +4,14 @@ FROM --platform=linux/amd64 python:3.8-slim ENV PYTHONUNBUFFERED=1 ENV PYTHONPATH=/app/guidellm/src/ -RUN apt-get update \ +RUN : \ + && apt-get update \ # dependencies for building Python packages && cleaning up unused files - && apt-get install -y build-essential \ - libcurl4-openssl-dev libssl-dev \ + && apt-get install -y --no-insatll-recommend \ + build-essential \ + libcurl4-openssl-dev \ + libssl-dev \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 59e4cc655db25bb8a59327a1bda982e420c95973 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 9 Sep 2024 20:41:21 +0300 Subject: [PATCH 14/19] tests are comlete --- src/guidellm/backend/base.py | 1 - src/guidellm/backend/vllm/backend.py | 16 +++++------ tests/dummy/vllm.py | 40 +++++++++++++++++---------- tests/unit/backend/test_deepsparse.py | 2 +- tests/unit/backend/test_vllm.py | 23 +++++++++++---- 5 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index becf1b2..8500369 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -228,7 +228,6 @@ async def submit(self, request: TextGenerationRequest) -> TextGenerationResult: result.start(request.prompt) received_final = False - breakpoint() # TODO: remove async for response in self.make_request(request): logger.debug("Received response: {}", response) if response.type_ == "token_iter": diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py index 2899311..f161055 100644 --- a/src/guidellm/backend/vllm/backend.py +++ b/src/guidellm/backend/vllm/backend.py @@ -14,7 +14,7 @@ class VllmBackend(Backend): An vLLM Backend implementation for the generative AI result. """ - def __init__(self, model: str = settings.llm_model, **request_args): + def __init__(self, model: Optional[str] = None, **request_args): _model = self._get_model(model) self._request_args: Dict[str, Any] = request_args self.llm = LLM(_model) @@ -69,7 +69,6 @@ async def make_request( output_token_count=token_count, ) - breakpoint() # TODO: remove if not (result := self.llm.generate(**request_args)): yield final_response return @@ -82,12 +81,6 @@ async def make_request( for generation in generations: if not (token := generation.text): - yield GenerativeResponse( - type_="final", - prompt=request.prompt, - prompt_token_count=request.prompt_token_count, - output_token_count=token_count, - ) break else: token_count += 1 @@ -99,6 +92,13 @@ async def make_request( output_token_count=token_count, ) + yield GenerativeResponse( + type_="final", + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + def available_models(self) -> List[str]: """ Get the available models for the backend. diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py index e82f9cc..a7b2052 100644 --- a/tests/dummy/vllm.py +++ b/tests/dummy/vllm.py @@ -3,8 +3,7 @@ """ import random -from functools import partial -from typing import List, Optional +from typing import Generator, List, Optional from pydantic import BaseModel, ConfigDict, Field @@ -25,6 +24,10 @@ class SamplingParams(BaseModel): max_tokens: int +class CompletionOutputs(BaseModel): + outputs: List[CompletionOutput] + + class TestLLM(BaseModel): """Test interface of `vllm.LLM`. @@ -32,6 +35,8 @@ class TestLLM(BaseModel): _outputs_number(int | None): the number of generated tokens per output. Should be used only for testing purposes. Default: randint(10..20) + _generations: dynamic representation of generated responses + from deepsparse interface. """ @@ -45,23 +50,28 @@ class TestLLM(BaseModel): model: str max_num_batched_tokens: int - # NOTE: This value is used only for testing purposes - outputs_number: int = Field(default_factory=partial(random.randint, 10, 20)) + def _generate_completion_outputs( + self, max_tokens: int + ) -> Generator[CompletionOutputs, None, None]: - def _generate_completion_outputs(self, max_tokens: int) -> List[CompletionOutput]: - self._outputs_number = random.randint(10, 20) + # NOTE: This value is used only for testing purposes + self._expected_outputs: List[CompletionOutput] = [] - return [ - CompletionOutput(text=text) - for text in random_strings( - min_chars=0, max_chars=max_tokens, n=self._outputs_number - ) - ] + for text in random_strings( + min_chars=0, max_chars=random.randint(10, 20), n=max_tokens + ): + instance = CompletionOutput(text=text) + self._expected_outputs.append(instance) + + yield instance def generate( self, inputs: List[str], sampling_params: SamplingParams - ) -> Optional[List[List[CompletionOutput]]]: - breakpoint() # TODO: remove + ) -> List[CompletionOutputs]: return [ - self._generate_completion_outputs(max_tokens=sampling_params.max_tokens) + CompletionOutputs( + outputs=self._generate_completion_outputs( + max_tokens=sampling_params.max_tokens + ) + ) ] diff --git a/tests/unit/backend/test_deepsparse.py b/tests/unit/backend/test_deepsparse.py index cac49ca..0d667ea 100644 --- a/tests/unit/backend/test_deepsparse.py +++ b/tests/unit/backend/test_deepsparse.py @@ -45,7 +45,7 @@ class TestTextGenerationPipeline: Method `__call__` allows to mock the result object that comes from `deepsparse.pipeline.Pipeline()` so everything is encapsulated right here. - :param self._generation: dynamic representation of generated responses + :param self._generations: dynamic representation of generated responses from deepsparse interface. """ diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index fc2bc50..b8adbe4 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -21,18 +21,21 @@ ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def backend_class(): from guidellm.backend.vllm import VllmBackend return VllmBackend -@pytest.fixture(autouse=True) +@pytest.fixture() def vllm_patch_factory(mocker) -> Callable[[str], dummy.vllm.TestLLM]: """ Skip VLLM initializer due to external calls. Replace VllmBackend.llm object with mock representation. + + This vllm patch is injected into each test automatically. If you need + to override the Mock object - use this fixture. """ def inner(model: Optional[str] = None, max_tokens: Optional[int] = None): @@ -48,6 +51,15 @@ def inner(model: Optional[str] = None, max_tokens: Optional[int] = None): return inner +@pytest.fixture(autouse=True) +def vllm_auto_patch(vllm_patch_factory): + """ + Automatically patch the ``vllm.LLM`` with defaults. + """ + + return vllm_patch_factory() + + @pytest.mark.smoke() @pytest.mark.parametrize( "create_payload", @@ -96,7 +108,7 @@ def test_backend_model_from_env(mocker, backend_class): @pytest.mark.parametrize( "text_generation_request_create_payload", [ - {"prompt": "Test prompt"}, + # {"prompt": "Test prompt"}, {"prompt": "Test prompt", "output_token_count": 20}, ], ) @@ -112,12 +124,13 @@ async def test_make_request( ): if response.add_token: output_tokens.append(response.add_token) + assert "".join(output_tokens) == "".join( - generation.text for generation in backend.pipeline._generations + generation.text for generation in getattr(backend.llm, "_expected_outputs") ) if max_tokens := text_generation_request_create_payload.get("output_token_count"): - assert len(backend.pipeline._generations) == max_tokens + assert len(getattr(backend.llm, "_expected_outputs")) == max_tokens @pytest.mark.smoke() From bd76806a62cded71ad6a89be694c86255a94b57a Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 9 Sep 2024 20:41:33 +0300 Subject: [PATCH 15/19] docker testing guide is added --- DEVELOPING.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/DEVELOPING.md b/DEVELOPING.md index f19079a..aa6c7a9 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -50,13 +50,13 @@ pip install -e .[vllm] According to the [installation guide](https://docs.vllm.ai/en/v0.4.0.post1/getting_started/installation.html) `vllm` is supported only on **Linux**. It means that running the application and tests will fail. Workaround with Docker: + ```sh cd guidellm/ docker build -t guidellm:latest . docker run -v ./:./ guidellm:latest python -m pytest -s -v src/unit/backend/test_vllm.py ``` - ## Project Structure The project follows a standard Python project structure: @@ -180,6 +180,21 @@ The end-to-end tests are located in the `tests/e2e` directory. To run the end-to tox -e test-e2e ``` +### Running unsopported tests + +Some of the test might be not supported on your system (_for instance `vllm` is not supported on MacOS yet_). In order to run them on Linux Operating System you might use technologies like **WSL** on Windows, or **Docker** on Windows or MacOS. + +In order to run under the Docker just run the command below: + +```sh +docker build -t guidellm:latest --shm-size=1024m . +docker run --rm --env-file .env -v ./:/app/guidellm -it guidellm:latest pytest tests/ +``` + +Using `--shm-size=1024m` is recommended due to potential local Docker configuration. Increase or decrease this value depending on your needs. + +
+ ## Formatting, Linting, and Type Checking ### Running Quality Checks (Linting) From 92c88192bf4a4f2d50eef1879e9a6f0c9a66213a Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 9 Sep 2024 21:05:34 +0300 Subject: [PATCH 16/19] =?UTF-8?q?=F0=9F=92=9A=20Code=20quality=20is=20prov?= =?UTF-8?q?ided?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/vllm/__init__.py | 8 ++++---- src/guidellm/backend/vllm/backend.py | 2 ++ tests/dummy/vllm.py | 12 +++++++----- tests/unit/backend/test_vllm.py | 6 +++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/guidellm/backend/vllm/__init__.py b/src/guidellm/backend/vllm/__init__.py index 18040a7..b4f0504 100644 --- a/src/guidellm/backend/vllm/__init__.py +++ b/src/guidellm/backend/vllm/__init__.py @@ -16,10 +16,10 @@ check_python_version(min_version="3.8", max_version="3.12") -# module_is_available( -# module="vllm", -# helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"), -# ) +module_is_available( + module="vllm", + helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"), +) from .backend import VllmBackend # noqa: E402 diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py index f161055..a048db4 100644 --- a/src/guidellm/backend/vllm/backend.py +++ b/src/guidellm/backend/vllm/backend.py @@ -19,6 +19,8 @@ def __init__(self, model: Optional[str] = None, **request_args): self._request_args: Dict[str, Any] = request_args self.llm = LLM(_model) + # NOTE: Must be after all the parameters since ``self.llm`` is going to be used + # by ``make_request`` within ``Backend.test_connection()`` super().__init__(type_="vllm", model=_model, target="not used") logger.info(f"vLLM Backend uses model '{self._model}'") diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py index a7b2052..f86e61d 100644 --- a/tests/dummy/vllm.py +++ b/tests/dummy/vllm.py @@ -3,9 +3,9 @@ """ import random -from typing import Generator, List, Optional +from typing import Generator, List -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict from guidellm.utils import random_strings @@ -52,7 +52,7 @@ class TestLLM(BaseModel): def _generate_completion_outputs( self, max_tokens: int - ) -> Generator[CompletionOutputs, None, None]: + ) -> Generator[CompletionOutput, None, None]: # NOTE: This value is used only for testing purposes self._expected_outputs: List[CompletionOutput] = [] @@ -70,8 +70,10 @@ def generate( ) -> List[CompletionOutputs]: return [ CompletionOutputs( - outputs=self._generate_completion_outputs( - max_tokens=sampling_params.max_tokens + outputs=list( + self._generate_completion_outputs( + max_tokens=sampling_params.max_tokens + ) ) ) ] diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index b8adbe4..ccc0e87 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -108,7 +108,7 @@ def test_backend_model_from_env(mocker, backend_class): @pytest.mark.parametrize( "text_generation_request_create_payload", [ - # {"prompt": "Test prompt"}, + {"prompt": "Test prompt"}, {"prompt": "Test prompt", "output_token_count": 20}, ], ) @@ -126,11 +126,11 @@ async def test_make_request( output_tokens.append(response.add_token) assert "".join(output_tokens) == "".join( - generation.text for generation in getattr(backend.llm, "_expected_outputs") + generation.text for generation in backend.llm._expected_outputs ) if max_tokens := text_generation_request_create_payload.get("output_token_count"): - assert len(getattr(backend.llm, "_expected_outputs")) == max_tokens + assert len(backend.llm._expected_outputs) == max_tokens @pytest.mark.smoke() From e25be2ffe842aef587e19776e0c8a243754ff987 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 9 Sep 2024 21:44:48 +0300 Subject: [PATCH 17/19] Dockefile improved. Removed unused parts --- .dockerignore | 1 + DEVELOPING.md | 6 +- Dockerfile | 17 +++-- requirements.txt | 184 ----------------------------------------------- 4 files changed, 13 insertions(+), 195 deletions(-) delete mode 100644 requirements.txt diff --git a/.dockerignore b/.dockerignore index 5002a2b..35514f9 100644 --- a/.dockerignore +++ b/.dockerignore @@ -12,3 +12,4 @@ LICENSE MANIFEST.in __pycache__/ *.egg-info/ +*log diff --git a/DEVELOPING.md b/DEVELOPING.md index aa6c7a9..b65bc09 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -187,12 +187,10 @@ Some of the test might be not supported on your system (_for instance `vllm` is In order to run under the Docker just run the command below: ```sh -docker build -t guidellm:latest --shm-size=1024m . -docker run --rm --env-file .env -v ./:/app/guidellm -it guidellm:latest pytest tests/ +docker build -t guidellm:latest . +docker run --rm --env-file .env guidellm:latest pytest tests/ ``` -Using `--shm-size=1024m` is recommended due to potential local Docker configuration. Increase or decrease this value depending on your needs. -
## Formatting, Linting, and Type Checking diff --git a/Dockerfile b/Dockerfile index d9c0bbe..2b88eee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,17 +7,20 @@ ENV PYTHONPATH=/app/guidellm/src/ RUN : \ && apt-get update \ # dependencies for building Python packages && cleaning up unused files - && apt-get install -y --no-insatll-recommend \ + && apt-get install -y \ build-essential \ libcurl4-openssl-dev \ libssl-dev \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && pip install --upgrade \ + pip \ + setuptools -# Python dependencies -RUN pip install --upgrade pip setuptools -COPY requirements.txt ./ -RUN pip install -r requirements.txt +WORKDIR /app + +# Install project dependencies +COPY ./ ./ +RUN pip install -e .[dev,deepsparse,vllm] -WORKDIR /app/ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 4a05ce7..0000000 --- a/requirements.txt +++ /dev/null @@ -1,184 +0,0 @@ -aiohappyeyeballs==2.4.0 -aiohttp==3.10.5 -aiosignal==1.3.1 -alabaster==0.7.13 -annotated-types==0.7.0 -anyio==4.4.0 -async-timeout==4.0.3 -attrs==24.2.0 -audioread==3.0.1 -babel==2.16.0 -cachetools==5.5.0 -certifi==2024.8.30 -cffi==1.17.0 -cfgv==3.4.0 -chardet==5.2.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -colorama==0.4.6 -coverage==7.6.1 -datasets==2.21.0 -decorator==5.1.1 -dill==0.3.8 -diskcache==5.6.3 -distlib==0.3.8 -distro==1.9.0 -docutils==0.20.1 -exceptiongroup==1.2.2 -fastapi==0.112.2 -filelock==3.15.4 -frozenlist==1.4.1 -fsspec==2024.6.1 -ftfy==6.2.3 -gguf==0.9.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.2 -huggingface-hub==0.24.6 -identify==2.6.0 -idna==3.8 -imagesize==1.4.1 -importlib_metadata==8.4.0 -importlib_resources==6.4.4 -iniconfig==2.0.0 -interegular==0.3.3 -Jinja2==3.1.4 -jiter==0.5.0 -joblib==1.4.2 -jsonschema==4.23.0 -jsonschema-specifications==2023.12.1 -lark==1.2.2 -lazy_loader==0.4 -librosa==0.10.2.post1 -linkify-it-py==2.0.3 -llvmlite==0.41.1 -lm-format-enforcer==0.10.6 -loguru==0.7.2 -markdown-it-py==3.0.0 -MarkupSafe==2.1.5 -mdformat==0.7.17 -mdformat-gfm==0.3.6 -mdformat_footnote==0.1.1 -mdformat_frontmatter==2.0.8 -mdformat_tables==1.0.0 -mdit-py-plugins==0.4.1 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -msgspec==0.18.6 -multidict==6.0.5 -multiprocess==0.70.16 -mypy==1.10.1 -mypy-extensions==1.0.0 -nest-asyncio==1.6.0 -networkx==3.1 -nodeenv==1.9.1 -numba==0.58.1 -numpy==1.24.4 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==9.1.0.70 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==12.560.30 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.6.68 -nvidia-nvtx-cu12==12.1.105 -openai==1.43.0 -outlines==0.0.46 -packaging==24.1 -pandas==2.0.3 -pillow==10.4.0 -pkgutil_resolve_name==1.3.10 -platformdirs==4.2.2 -pluggy==1.5.0 -pooch==1.8.2 -pre-commit==3.5.0 -prometheus-fastapi-instrumentator==7.0.0 -prometheus_client==0.20.0 -protobuf==5.28.0 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pyairports==2.1.1 -pyarrow==17.0.0 -pycountry==24.6.1 -pycparser==2.22 -pydantic==2.8.2 -pydantic-settings==2.4.0 -pydantic_core==2.20.1 -Pygments==2.18.0 -pyproject-api==1.7.1 -pytest==8.2.2 -pytest-asyncio==0.23.8 -pytest-cov==5.0.0 -pytest-mock==3.14.0 -pytest-rerunfailures==14.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -pytz==2024.1 -PyYAML==6.0.2 -pyzmq==26.2.0 -ray==2.10.0 -referencing==0.35.1 -regex==2024.7.24 -requests==2.32.3 -requests-mock==1.12.1 -rich==13.8.0 -rpds-py==0.20.0 -ruamel.yaml==0.18.6 -ruamel.yaml.clib==0.2.8 -ruff==0.5.7 -safetensors==0.4.4 -scikit-learn==1.3.2 -scipy==1.10.1 -sentencepiece==0.2.0 -six==1.16.0 -sniffio==1.3.1 -snowballstemmer==2.2.0 -soundfile==0.12.1 -soxr==0.3.7 -Sphinx==7.1.2 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -starlette==0.38.4 -sympy==1.13.2 -threadpoolctl==3.5.0 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -torch==2.4.0 -torchvision==0.19.0 -tox==4.16.0 -tqdm==4.66.5 -transformers==4.44.2 -triton==3.0.0 -types-click==7.1.8 -types-PyYAML==6.0.12.20240808 -types-requests==2.32.0.20240712 -types-toml==0.10.8.20240310 -typing_extensions==4.12.2 -tzdata==2024.1 -uc-micro-py==1.0.3 -urllib3==2.2.2 -uvicorn==0.30.6 -uvloop==0.20.0 -virtualenv==20.26.3 -vllm==0.5.5 -vllm-flash-attn==2.6.1 -watchfiles==0.24.0 -wcwidth==0.2.13 -websockets==13.0.1 -xformers==0.0.27.post2 -xxhash==3.5.0 -yarl==1.9.7 -zipp==3.20.1 From 809694cd107f8359ae43a80af37a34d5b351723f Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Tue, 10 Sep 2024 08:55:13 +0300 Subject: [PATCH 18/19] =?UTF-8?q?=E2=9C=85=20tests=20are=20fixed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/utils/progress.py | 8 +++++--- tests/dummy/vllm.py | 10 ++++++---- tests/unit/backend/test_deepsparse.py | 18 +++++++----------- tests/unit/backend/test_vllm.py | 25 ++++++++++++++----------- 4 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py index 5c7a845..5ae8416 100644 --- a/src/guidellm/utils/progress.py +++ b/src/guidellm/utils/progress.py @@ -162,9 +162,11 @@ def update_benchmark( total=completed_total, completed=completed_count if not completed else completed_total, req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"), - start_time_str=datetime.fromtimestamp(start_time).strftime("%H:%M:%S") - if start_time - else "--:--:--", + start_time_str=( + datetime.fromtimestamp(start_time).strftime("%H:%M:%S") + if start_time + else "--:--:--" + ), ) logger.debug( "Updated benchmark task at index {}: {}% complete", diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py index f86e61d..2210b80 100644 --- a/tests/dummy/vllm.py +++ b/tests/dummy/vllm.py @@ -3,7 +3,7 @@ """ import random -from typing import Generator, List +from typing import Generator, List, Optional from pydantic import BaseModel, ConfigDict @@ -21,7 +21,7 @@ class CompletionOutput(BaseModel): class SamplingParams(BaseModel): """Test interface of `vllm.SamplingParams`.""" - max_tokens: int + max_tokens: Optional[int] = 16 class CompletionOutputs(BaseModel): @@ -51,14 +51,16 @@ class TestLLM(BaseModel): max_num_batched_tokens: int def _generate_completion_outputs( - self, max_tokens: int + self, max_tokens: Optional[int] ) -> Generator[CompletionOutput, None, None]: # NOTE: This value is used only for testing purposes self._expected_outputs: List[CompletionOutput] = [] for text in random_strings( - min_chars=0, max_chars=random.randint(10, 20), n=max_tokens + min_chars=5, + max_chars=random.randint(10, 20), + n=max_tokens or random.randint(10, 20), ): instance = CompletionOutput(text=text) self._expected_outputs.append(instance) diff --git a/tests/unit/backend/test_deepsparse.py b/tests/unit/backend/test_deepsparse.py index 0d667ea..244db47 100644 --- a/tests/unit/backend/test_deepsparse.py +++ b/tests/unit/backend/test_deepsparse.py @@ -6,7 +6,7 @@ """ import sys -from typing import Any, Dict, Generator, List, Optional +from typing import Any, Generator, List, Optional import pytest from pydantic import BaseModel @@ -96,7 +96,7 @@ def mock_deepsparse_pipeline(mocker): {"model": "test/custom_llm"}, ], ) -def test_backend_creation(create_payload: Dict, backend_class): +def test_backend_creation(create_payload, backend_class): """Test the "Deepspaarse Backend" class with defaults and custom input parameters. """ @@ -139,9 +139,7 @@ def test_backend_model_from_env(mocker, backend_class): ], ) @pytest.mark.asyncio() -async def test_make_request( - text_generation_request_create_payload: Dict, backend_class -): +async def test_make_request(text_generation_request_create_payload, backend_class): backend = backend_class() output_tokens: List[str] = [] @@ -160,23 +158,21 @@ async def test_make_request( @pytest.mark.smoke() @pytest.mark.parametrize( - ("text_generation_request_create_payload", "error"), + ("text_generation_request", "error"), [ ( - {"prompt": "Test prompt", "output_token_count": -1}, + TextGenerationRequest(prompt="Test prompt", output_token_count=-1), ValueError, ), ], ) @pytest.mark.asyncio() async def test_make_request_invalid_request_payload( - text_generation_request_create_payload: Dict, error, backend_class + text_generation_request, error, backend_class ): backend = backend_class() with pytest.raises(error): [ respnose - async for respnose in backend.make_request( - request=TextGenerationRequest(**text_generation_request_create_payload) - ) + async for respnose in backend.make_request(request=text_generation_request) ] diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index ccc0e87..16e83b0 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -6,7 +6,7 @@ """ import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, List, Optional import pytest @@ -68,7 +68,7 @@ def vllm_auto_patch(vllm_patch_factory): {"model": "test/custom_llm"}, ], ) -def test_backend_creation(create_payload: Dict, backend_class, vllm_patch_factory): +def test_backend_creation(create_payload, backend_class, vllm_patch_factory): """Test the "Deepspaarse Backend" class with defaults and custom input parameters. """ @@ -113,9 +113,7 @@ def test_backend_model_from_env(mocker, backend_class): ], ) @pytest.mark.asyncio() -async def test_make_request( - text_generation_request_create_payload: Dict, backend_class -): +async def test_make_request(text_generation_request_create_payload, backend_class): backend = backend_class() output_tokens: List[str] = [] @@ -135,20 +133,25 @@ async def test_make_request( @pytest.mark.smoke() @pytest.mark.parametrize( - ("text_generation_request_create_payload", "error"), + ("text_generation_request", "error"), [ - ({"prompt": "Test prompt"}, ValueError), + ( + TextGenerationRequest(prompt="Test prompt", output_token_count=-1), + ValueError, + ), + ( + TextGenerationRequest(prompt="Test prompt", output_token_count=0), + ValueError, + ), ], ) @pytest.mark.asyncio() async def test_make_request_invalid_request_payload( - text_generation_request_create_payload: Dict, error, backend_class + text_generation_request, error, backend_class ): backend = backend_class() with pytest.raises(error): [ respnose - async for respnose in backend.make_request( - request=TextGenerationRequest(**text_generation_request_create_payload) - ) + async for respnose in backend.make_request(request=text_generation_request) ] From 78b78ed5d730c158e0b72d46eba31aa4e56264ba Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Tue, 10 Sep 2024 10:43:00 +0300 Subject: [PATCH 19/19] =?UTF-8?q?=F0=9F=90=B3=20--platform=20is=20removed?= =?UTF-8?q?=20from=20Dockerfile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DEVELOPING.md | 2 +- Dockerfile | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/DEVELOPING.md b/DEVELOPING.md index b65bc09..dbb3ed6 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -187,7 +187,7 @@ Some of the test might be not supported on your system (_for instance `vllm` is In order to run under the Docker just run the command below: ```sh -docker build -t guidellm:latest . +docker build --platform linux/amd64 --tag guidellm:latest . docker run --rm --env-file .env guidellm:latest pytest tests/ ``` diff --git a/Dockerfile b/Dockerfile index 2b88eee..5db54bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,6 @@ FROM --platform=linux/amd64 python:3.8-slim # Environment variables ENV PYTHONUNBUFFERED=1 -ENV PYTHONPATH=/app/guidellm/src/ RUN : \ && apt-get update \ @@ -17,10 +16,8 @@ RUN : \ pip \ setuptools - WORKDIR /app # Install project dependencies COPY ./ ./ RUN pip install -e .[dev,deepsparse,vllm] -