diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..35514f9
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,15 @@
+venv*/
+Dockerfile
+.gitignore
+.env
+.git
+.github/
+.ruff_cache/
+.pre-commit-config.yaml
+docs/
+*.md
+LICENSE
+MANIFEST.in
+__pycache__/
+*.egg-info/
+*log
diff --git a/DEVELOPING.md b/DEVELOPING.md
index b230366..dbb3ed6 100644
--- a/DEVELOPING.md
+++ b/DEVELOPING.md
@@ -33,13 +33,30 @@ cd guidellm
 pip install -e .[dev]
 ```
 
-If you work with `deepsparse` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run:
+In case of working with `deepsparse` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run:
 
 ```sh
 pip install -e .[deepsparse]
 # or pip install -e '.[deepsparse]'
 ```
 
+In case of working with `vllm` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run:
+
+```sh
+pip install -e .[vllm]
+# or pip install -e '.[vllm]'
+```
+
+According to the [installation guide](https://docs.vllm.ai/en/v0.4.0.post1/getting_started/installation.html) `vllm` is supported only on **Linux**. It means that running the application and tests will fail.
+
+Workaround with Docker:
+
+```sh
+cd guidellm/
+docker build -t guidellm:latest .
+docker run -v ./:./ guidellm:latest python -m pytest -s -v src/unit/backend/test_vllm.py
+```
+
 ## Project Structure
 
 The project follows a standard Python project structure:
@@ -163,6 +180,19 @@ The end-to-end tests are located in the `tests/e2e` directory. To run the end-to
 tox -e test-e2e
 ```
 
+### Running unsopported tests
+
+Some of the test might be not supported on your system (_for instance `vllm` is not supported on MacOS yet_). In order to run them on Linux Operating System you might use technologies like **WSL** on Windows, or **Docker** on Windows or MacOS.
+
+In order to run under the Docker just run the command below:
+
+```sh
+docker build --platform linux/amd64 --tag guidellm:latest .
+docker run --rm --env-file .env guidellm:latest pytest tests/
+```
+
+<br>
+
 ## Formatting, Linting, and Type Checking
 
 ### Running Quality Checks (Linting)
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..5db54bd
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,23 @@
+FROM --platform=linux/amd64 python:3.8-slim
+
+# Environment variables
+ENV PYTHONUNBUFFERED=1
+
+RUN : \
+    && apt-get update \
+    # dependencies for building Python packages && cleaning up unused files
+    && apt-get install -y \
+        build-essential \
+        libcurl4-openssl-dev \
+        libssl-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && pip install --upgrade \
+        pip \
+        setuptools
+
+WORKDIR /app
+
+# Install project dependencies
+COPY ./ ./
+RUN pip install -e .[dev,deepsparse,vllm]
diff --git a/pyproject.toml b/pyproject.toml
index 942c1cd..407260a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,9 @@ dev = [
 deepsparse = [
     "deepsparse; python_version < '3.12'",
 ]
+vllm = [
+    "vllm; sys_platform == 'linux'",
+]
 
 
 [project.entry-points.console_scripts]
@@ -108,7 +111,7 @@ exclude = ["venv", ".tox"]
 follow_imports = 'silent'
 
 [[tool.mypy.overrides]]
-module = ["deepsparse.*", "transformers.*"]
+module = ["deepsparse.*", "transformers.*", "vllm.*"]
 ignore_missing_imports=true
 
 
diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py
index 010cdd2..8500369 100644
--- a/src/guidellm/backend/base.py
+++ b/src/guidellm/backend/base.py
@@ -15,7 +15,7 @@
 __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"]
 
 
-BackendEnginePublic = Literal["openai_server", "deepsparse"]
+BackendEnginePublic = Literal["openai_server", "deepsparse", "vllm"]
 BackendEngine = Union[BackendEnginePublic, Literal["test"]]
 
 
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
index b5cbc12..8a12c92 100644
--- a/src/guidellm/backend/openai.py
+++ b/src/guidellm/backend/openai.py
@@ -10,7 +10,7 @@
 __all__ = ["OpenAIBackend"]
 
 
-@Backend.register("openai_server")
+@Backend.register(backend_type="openai_server")
 class OpenAIBackend(Backend):
     """
     An OpenAI backend implementation for generative AI results.
diff --git a/src/guidellm/backend/vllm/__init__.py b/src/guidellm/backend/vllm/__init__.py
new file mode 100644
index 0000000..b4f0504
--- /dev/null
+++ b/src/guidellm/backend/vllm/__init__.py
@@ -0,0 +1,26 @@
+"""
+This package encapsulates the "vLLM Backend" implementation.
+
+ref: https://github.com/vllm-project/vllm
+
+The `vllm` package supports Python3.8..Python3.11,
+when the `guidellm` start from Python3.8.
+
+Safe range of versions is Python3.8..Python3.11
+for the vLLM Backend implementation.
+
+In the end ensure that the `vllm` package is installed.
+"""
+
+from guidellm.utils import check_python_version, module_is_available
+
+check_python_version(min_version="3.8", max_version="3.12")
+
+module_is_available(
+    module="vllm",
+    helper=("`vllm` package is not available. Try run: `pip install -e '.[vllm]'`"),
+)
+
+from .backend import VllmBackend  # noqa: E402
+
+__all__ = ["VllmBackend"]
diff --git a/src/guidellm/backend/vllm/backend.py b/src/guidellm/backend/vllm/backend.py
new file mode 100644
index 0000000..a048db4
--- /dev/null
+++ b/src/guidellm/backend/vllm/backend.py
@@ -0,0 +1,122 @@
+from typing import Any, AsyncGenerator, Dict, List, Optional
+
+from loguru import logger
+from vllm import LLM, CompletionOutput, SamplingParams
+
+from guidellm.backend import Backend, GenerativeResponse
+from guidellm.config import settings
+from guidellm.core import TextGenerationRequest
+
+
+@Backend.register(backend_type="vllm")
+class VllmBackend(Backend):
+    """
+    An vLLM Backend implementation for the generative AI result.
+    """
+
+    def __init__(self, model: Optional[str] = None, **request_args):
+        _model = self._get_model(model)
+        self._request_args: Dict[str, Any] = request_args
+        self.llm = LLM(_model)
+
+        # NOTE: Must be after all the parameters since ``self.llm`` is going to be used
+        #       by ``make_request`` within ``Backend.test_connection()``
+        super().__init__(type_="vllm", model=_model, target="not used")
+
+        logger.info(f"vLLM Backend uses model '{self._model}'")
+
+    def _get_model(self, model_from_cli: Optional[str] = None) -> str:
+        """Provides the model by the next priority list:
+        1. from function argument (comes from CLI)
+        1. from environment variable
+        2. `self.default_model` from `self.available_models`
+        """
+
+        if model_from_cli is not None:
+            return model_from_cli
+        elif settings.llm_model is not None:
+            logger.info(
+                "Using vLLM model from environment variable: " f"{settings.llm_model}"
+            )
+            return settings.llm_model
+        else:
+            logger.info(f"Using default vLLM model: {self.default_model}")
+            return self.default_model
+
+    async def make_request(
+        self, request: TextGenerationRequest
+    ) -> AsyncGenerator[GenerativeResponse, None]:
+        """
+        Make a request to the vLLM Python API client.
+
+        :param request: The result request to submit.
+        :type request: TextGenerationRequest
+        :return: An iterator over the generative responses.
+        :rtype: Iterator[GenerativeResponse]
+        """
+
+        logger.debug(f"Making request to vLLM backend with prompt: {request.prompt}")
+
+        token_count = 0
+        request_args = {
+            **self._request_args,
+            "inputs": [request.prompt],
+            "sampling_params": SamplingParams(max_tokens=request.output_token_count),
+        }
+
+        final_response = GenerativeResponse(
+            type_="final",
+            prompt=request.prompt,
+            prompt_token_count=request.prompt_token_count,
+            output_token_count=token_count,
+        )
+
+        if not (result := self.llm.generate(**request_args)):
+            yield final_response
+            return
+
+        try:
+            generations: List[CompletionOutput] = result[0].outputs
+        except IndexError:
+            yield final_response
+            return
+
+        for generation in generations:
+            if not (token := generation.text):
+                break
+            else:
+                token_count += 1
+                yield GenerativeResponse(
+                    type_="token_iter",
+                    add_token=token,
+                    prompt=request.prompt,
+                    prompt_token_count=request.prompt_token_count,
+                    output_token_count=token_count,
+                )
+
+        yield GenerativeResponse(
+            type_="final",
+            prompt=request.prompt,
+            prompt_token_count=request.prompt_token_count,
+            output_token_count=token_count,
+        )
+
+    def available_models(self) -> List[str]:
+        """
+        Get the available models for the backend.
+
+        ref: https://docs.vllm.ai/en/v0.4.1/models/supported_models.html
+
+        :return: A list of available models.
+        :rtype: List[str]
+        """
+
+        return [
+            "mistralai/Mistral-7B-Instruct-v0.3",
+            "meta-llama/Meta-Llama-3-8B-Instruct",
+        ]
+
+    def _token_count(self, text: str) -> int:
+        token_count = len(text.split())
+        logger.debug(f"Token count for text '{text}': {token_count}")
+        return token_count
diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py
index 5c7a845..5ae8416 100644
--- a/src/guidellm/utils/progress.py
+++ b/src/guidellm/utils/progress.py
@@ -162,9 +162,11 @@ def update_benchmark(
             total=completed_total,
             completed=completed_count if not completed else completed_total,
             req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"),
-            start_time_str=datetime.fromtimestamp(start_time).strftime("%H:%M:%S")
-            if start_time
-            else "--:--:--",
+            start_time_str=(
+                datetime.fromtimestamp(start_time).strftime("%H:%M:%S")
+                if start_time
+                else "--:--:--"
+            ),
         )
         logger.debug(
             "Updated benchmark task at index {}: {}% complete",
diff --git a/tests/dummy/__init__.py b/tests/dummy/__init__.py
index a0cccdb..216b702 100644
--- a/tests/dummy/__init__.py
+++ b/tests/dummy/__init__.py
@@ -1,8 +1,5 @@
 """
 The tests.dummy package package represents dummy data factories and test services.
-
-test.dummy.data.openai_model_factory - openai.types.Model test factory
-test.dummy.data.openai_completion_factory - openai.types.Completion test factory
 """
 
-from . import data, services  # noqa: F401
+from . import data, services, vllm  # noqa: F401
diff --git a/tests/dummy/data/__init__.py b/tests/dummy/data/__init__.py
index 95a2c94..e69de29 100644
--- a/tests/dummy/data/__init__.py
+++ b/tests/dummy/data/__init__.py
@@ -1,3 +0,0 @@
-from .openai import openai_completion_factory, openai_model_factory
-
-__all__ = ["openai_completion_factory", "openai_model_factory"]
diff --git a/tests/dummy/data/openai.py b/tests/dummy/data/openai.py
deleted file mode 100644
index 6e16865..0000000
--- a/tests/dummy/data/openai.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""
-This module includes data models factories for openai 3-rd party package
-"""
-
-import random
-import string
-import time
-import uuid
-from typing import Generator
-
-from openai.types import Completion, Model
-
-
-def words(n: int = 1) -> Generator[str, None, None]:
-    for _ in range(n):
-        yield "".join(
-            random.choice(string.ascii_letters) for _ in range(random.randint(3, 10))
-        )
-
-
-def openai_completion_factory(
-    n: int = 3,
-    **kwargs,
-) -> Generator[Completion, None, None]:
-    """
-    The factory that yields the openai Completion instance.
-    """
-
-    for i in range(1, n + 1):
-        payload = {
-            "id": str(uuid.uuid4()),
-            "choices": [],
-            "stop": not i < n,
-            "content": " ".join(words(random.randint(3, 10))) if i < n else "",
-            "object": "text_completion",
-            "model": "mock-model",
-            "created": int(time.time()),
-        }
-        payload.update(kwargs)
-
-        yield Completion(**payload)  # type: ignore
-
-
-def openai_model_factory(n: int = 3) -> Generator[Model, None, None]:
-    """
-    The factory that yields the random openai Model instance.
-    """
-    for _ in range(n):
-        yield Model(
-            id=str(uuid.uuid4()),
-            created=int(time.time()),
-            object="model",
-            owned_by="neuralmagic",
-        )
diff --git a/tests/dummy/vllm.py b/tests/dummy/vllm.py
new file mode 100644
index 0000000..2210b80
--- /dev/null
+++ b/tests/dummy/vllm.py
@@ -0,0 +1,81 @@
+"""
+This module includes data models factories for the `vllm` 3-rd party package
+"""
+
+import random
+from typing import Generator, List, Optional
+
+from pydantic import BaseModel, ConfigDict
+
+from guidellm.utils import random_strings
+
+__all__ = ["TestLLM", "CompletionOutput"]
+
+
+class CompletionOutput(BaseModel):
+    """Test interface of `vllm.CompletionOutput`."""
+
+    text: str
+
+
+class SamplingParams(BaseModel):
+    """Test interface of `vllm.SamplingParams`."""
+
+    max_tokens: Optional[int] = 16
+
+
+class CompletionOutputs(BaseModel):
+    outputs: List[CompletionOutput]
+
+
+class TestLLM(BaseModel):
+    """Test interface of `vllm.LLM`.
+
+    Args:
+        _outputs_number(int | None): the number of generated tokens per output.
+            Should be used only for testing purposes.
+            Default: randint(10..20)
+        _generations: dynamic representation of generated responses
+            from deepsparse interface.
+
+    """
+
+    model_config = ConfigDict(
+        extra="allow",
+        validate_assignment=True,
+        arbitrary_types_allowed=True,
+        from_attributes=True,
+    )
+
+    model: str
+    max_num_batched_tokens: int
+
+    def _generate_completion_outputs(
+        self, max_tokens: Optional[int]
+    ) -> Generator[CompletionOutput, None, None]:
+
+        # NOTE: This value is used only for testing purposes
+        self._expected_outputs: List[CompletionOutput] = []
+
+        for text in random_strings(
+            min_chars=5,
+            max_chars=random.randint(10, 20),
+            n=max_tokens or random.randint(10, 20),
+        ):
+            instance = CompletionOutput(text=text)
+            self._expected_outputs.append(instance)
+
+            yield instance
+
+    def generate(
+        self, inputs: List[str], sampling_params: SamplingParams
+    ) -> List[CompletionOutputs]:
+        return [
+            CompletionOutputs(
+                outputs=list(
+                    self._generate_completion_outputs(
+                        max_tokens=sampling_params.max_tokens
+                    )
+                )
+            )
+        ]
diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse.py
similarity index 85%
rename from tests/unit/backend/test_deepsparse_backend.py
rename to tests/unit/backend/test_deepsparse.py
index 58e5761..244db47 100644
--- a/tests/unit/backend/test_deepsparse_backend.py
+++ b/tests/unit/backend/test_deepsparse.py
@@ -1,5 +1,12 @@
+"""
+This module includes unit tests for the Deepsparse backend.
+
+Notes: tests from this module are going to be skipped in case
+    the Python version is >= 3.12 according to the deepsparse limitation.
+"""
+
 import sys
-from typing import Any, Dict, Generator, List, Optional
+from typing import Any, Generator, List, Optional
 
 import pytest
 from pydantic import BaseModel
@@ -38,7 +45,7 @@ class TestTextGenerationPipeline:
     Method `__call__` allows to mock the result object that comes from
     `deepsparse.pipeline.Pipeline()` so everything is encapsulated right here.
 
-    :param self._generation: dynamic representation of generated responses
+    :param self._generations: dynamic representation of generated responses
         from deepsparse interface.
     """
 
@@ -89,7 +96,7 @@ def mock_deepsparse_pipeline(mocker):
         {"model": "test/custom_llm"},
     ],
 )
-def test_backend_creation(create_payload: Dict, backend_class):
+def test_backend_creation(create_payload, backend_class):
     """Test the "Deepspaarse Backend" class
     with defaults and custom input parameters.
     """
@@ -132,9 +139,7 @@ def test_backend_model_from_env(mocker, backend_class):
     ],
 )
 @pytest.mark.asyncio()
-async def test_make_request(
-    text_generation_request_create_payload: Dict, backend_class
-):
+async def test_make_request(text_generation_request_create_payload, backend_class):
     backend = backend_class()
 
     output_tokens: List[str] = []
@@ -153,23 +158,21 @@ async def test_make_request(
 
 @pytest.mark.smoke()
 @pytest.mark.parametrize(
-    ("text_generation_request_create_payload", "error"),
+    ("text_generation_request", "error"),
     [
         (
-            {"prompt": "Test prompt", "output_token_count": -1},
+            TextGenerationRequest(prompt="Test prompt", output_token_count=-1),
             ValueError,
         ),
     ],
 )
 @pytest.mark.asyncio()
 async def test_make_request_invalid_request_payload(
-    text_generation_request_create_payload: Dict, error, backend_class
+    text_generation_request, error, backend_class
 ):
     backend = backend_class()
     with pytest.raises(error):
         [
             respnose
-            async for respnose in backend.make_request(
-                request=TextGenerationRequest(**text_generation_request_create_payload)
-            )
+            async for respnose in backend.make_request(request=text_generation_request)
         ]
diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai.py
similarity index 100%
rename from tests/unit/backend/test_openai_backend.py
rename to tests/unit/backend/test_openai.py
diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py
new file mode 100644
index 0000000..16e83b0
--- /dev/null
+++ b/tests/unit/backend/test_vllm.py
@@ -0,0 +1,157 @@
+"""
+This module includes unit tests for the vLLM backend.
+
+Notes: tests from this module are going to be skipped in case
+    the rimtime platform is not a Linux / WSL according to vllm documentation.
+"""
+
+import sys
+from typing import Callable, List, Optional
+
+import pytest
+
+from guidellm.backend import Backend
+from guidellm.config import reload_settings, settings
+from guidellm.core import TextGenerationRequest
+from tests import dummy
+
+pytestmark = pytest.mark.skipif(
+    sys.platform != "linux",
+    reason="Unsupported Platform. Try using Linux or WSL instead.",
+)
+
+
+@pytest.fixture(scope="session")
+def backend_class():
+    from guidellm.backend.vllm import VllmBackend
+
+    return VllmBackend
+
+
+@pytest.fixture()
+def vllm_patch_factory(mocker) -> Callable[[str], dummy.vllm.TestLLM]:
+    """
+    Skip VLLM initializer due to external calls.
+    Replace VllmBackend.llm object with mock representation.
+
+    This vllm patch is injected into each test automatically. If you need
+    to override the Mock object - use this fixture.
+    """
+
+    def inner(model: Optional[str] = None, max_tokens: Optional[int] = None):
+
+        return mocker.patch(
+            "vllm.LLM.__new__",
+            return_value=dummy.vllm.TestLLM(
+                model=model or settings.llm_model,
+                max_num_batched_tokens=max_tokens or 4096,
+            ),
+        )
+
+    return inner
+
+
+@pytest.fixture(autouse=True)
+def vllm_auto_patch(vllm_patch_factory):
+    """
+    Automatically patch the ``vllm.LLM`` with defaults.
+    """
+
+    return vllm_patch_factory()
+
+
+@pytest.mark.smoke()
+@pytest.mark.parametrize(
+    "create_payload",
+    [
+        {},
+        {"model": "test/custom_llm"},
+    ],
+)
+def test_backend_creation(create_payload, backend_class, vllm_patch_factory):
+    """Test the "Deepspaarse Backend" class
+    with defaults and custom input parameters.
+    """
+
+    vllm_patch_factory(model=create_payload.get("model"))
+
+    backends = [
+        Backend.create("vllm", **create_payload),
+        backend_class(**create_payload),
+    ]
+
+    for backend in backends:
+        assert backend.llm
+        (
+            backend.model == custom_model
+            if (custom_model := create_payload.get("model"))
+            else backend.default_model
+        )
+
+
+@pytest.mark.smoke()
+def test_backend_model_from_env(mocker, backend_class):
+    mocker.patch.dict(
+        "os.environ",
+        {"GUIDELLM__LLM_MODEL": "test_backend_model_from_env"},
+    )
+
+    reload_settings()
+
+    backends = [Backend.create("vllm"), backend_class()]
+
+    for backend in backends:
+        assert backend.model == "test_backend_model_from_env"
+
+
+@pytest.mark.smoke()
+@pytest.mark.parametrize(
+    "text_generation_request_create_payload",
+    [
+        {"prompt": "Test prompt"},
+        {"prompt": "Test prompt", "output_token_count": 20},
+    ],
+)
+@pytest.mark.asyncio()
+async def test_make_request(text_generation_request_create_payload, backend_class):
+    backend = backend_class()
+
+    output_tokens: List[str] = []
+    async for response in backend.make_request(
+        request=TextGenerationRequest(**text_generation_request_create_payload)
+    ):
+        if response.add_token:
+            output_tokens.append(response.add_token)
+
+    assert "".join(output_tokens) == "".join(
+        generation.text for generation in backend.llm._expected_outputs
+    )
+
+    if max_tokens := text_generation_request_create_payload.get("output_token_count"):
+        assert len(backend.llm._expected_outputs) == max_tokens
+
+
+@pytest.mark.smoke()
+@pytest.mark.parametrize(
+    ("text_generation_request", "error"),
+    [
+        (
+            TextGenerationRequest(prompt="Test prompt", output_token_count=-1),
+            ValueError,
+        ),
+        (
+            TextGenerationRequest(prompt="Test prompt", output_token_count=0),
+            ValueError,
+        ),
+    ],
+)
+@pytest.mark.asyncio()
+async def test_make_request_invalid_request_payload(
+    text_generation_request, error, backend_class
+):
+    backend = backend_class()
+    with pytest.raises(error):
+        [
+            respnose
+            async for respnose in backend.make_request(request=text_generation_request)
+        ]
diff --git a/tox.ini b/tox.ini
index 40611c5..76b04cc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,7 +6,7 @@ env_list = py38,py39,py310,py311,py312
 [testenv]
 description = Run all tests
 deps =
-    .[dev,deepsparse]
+    .[dev,deepsparse,vllm]
 commands =
     pytest tests/ {posargs}
 
@@ -14,7 +14,7 @@ commands =
 [testenv:test-unit]
 description = Run unit tests
 deps =
-    .[dev,deepsparse]
+    .[dev,deepsparse,vllm]
 commands =
     python -m pytest tests/unit {posargs}
 
@@ -22,7 +22,7 @@ commands =
 [testenv:test-integration]
 description = Run integration tests
 deps =
-    .[dev,deepsparse]
+    .[dev,deepsparse,vllm]
 commands =
     python -m pytest tests/integration {posargs}
 
@@ -30,7 +30,7 @@ commands =
 [testenv:test-e2e]
 description = Run end-to-end tests
 deps =
-    .[dev,deepsparse]
+    .[dev,deepsparse,vllm]
 commands =
     python -m pytest tests/e2e {posargs}