Skip to content

Gaudi: add CI #3160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ jobs:
export label_extension="-gaudi"
export docker_volume="/mnt/cache"
export docker_devices=""
export runs_on="ubuntu-latest"
export runs_on="itac-bm-emr-gaudi3-dell-8gaudi"
export platform=""
export extra_pytest=""
export extra_pytest="--gaudi"
export target=""
esac
echo $dockerfile
Expand Down
9 changes: 6 additions & 3 deletions backends/gaudi/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,14 @@ local-dev-install: install-dependencies

# In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
run-integration-tests:
pip install -U pip uv
uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
DOCKER_VOLUME=${root_dir}/data \
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi

run-integration-tests-with-all-models:
DOCKER_VOLUME=${root_dir}/data \
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi --gaudi-all-models

# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
capture-expected-outputs-for-integration-tests:
Expand Down
12 changes: 11 additions & 1 deletion backends/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,26 @@ curl 127.0.0.1:8080/generate \

### Integration tests

Install the dependencies:
```bash
pip install -r integration-tests/requirements.txt
```

To run the integration tests, you need to first build the image:
```bash
make -C backends/gaudi image
```

Then run the following command to run the integration tests:
Then run the following command to run the integration tests (CI tests):
```bash
make -C backends/gaudi run-integration-tests
```

To run the integration tests with all models, you can run the following command:
```bash
make -C backends/gaudi run-integration-tests-with-all-models
```

To capture the expected outputs for the integration tests, you can run the following command:
```bash
make -C backends/gaudi capture-expected-outputs-for-integration-tests
Expand Down
2 changes: 0 additions & 2 deletions backends/gaudi/server/integration-tests/pytest.ini

This file was deleted.

7 changes: 0 additions & 7 deletions backends/gaudi/server/integration-tests/requirements.txt

This file was deleted.

32 changes: 31 additions & 1 deletion integration-tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"]
pytest_plugins = [
"fixtures.neuron.service",
"fixtures.neuron.export_models",
"fixtures.gaudi.service",
]
# ruff: noqa: E402
from _pytest.fixtures import SubRequest
from huggingface_hub.inference._generated.types.chat_completion import (
Expand Down Expand Up @@ -68,6 +72,15 @@ def pytest_addoption(parser):
parser.addoption(
"--neuron", action="store_true", default=False, help="run neuron tests"
)
parser.addoption(
"--gaudi", action="store_true", default=False, help="run gaudi tests"
)
parser.addoption(
"--gaudi-all-models",
action="store_true",
default=False,
help="Run tests for all models instead of just the default subset",
)


def pytest_configure(config):
Expand All @@ -84,6 +97,22 @@ def skip_release(item):
item.add_marker(pytest.mark.skip(reason="need --release option to run"))

selectors.append(skip_release)

if config.getoption("--gaudi"):

def skip_not_gaudi(item):
if "gaudi" not in item.keywords:
item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))

selectors.append(skip_not_gaudi)
else:

def skip_gaudi(item):
if "gaudi" in item.keywords:
item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))

selectors.append(skip_gaudi)

if config.getoption("--neuron"):

def skip_not_neuron(item):
Expand All @@ -100,6 +129,7 @@ def skip_neuron(item):
item.add_marker(pytest.mark.skip(reason="requires --neuron to run"))

selectors.append(skip_neuron)

for item in items:
for selector in selectors:
selector(item)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,21 @@
import pytest
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
from docker.errors import NotFound
from loguru import logger
from test_model import TEST_CONFIGS
from text_generation import AsyncClient
from text_generation.types import Response
import logging
from huggingface_hub import AsyncInferenceClient, TextGenerationOutput
import huggingface_hub

logging.basicConfig(
level=logging.INFO,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
stream=sys.stdout,
)
logger = logging.getLogger(__file__)

# Use the latest image from the local docker build
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi")
DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None)
HF_TOKEN = os.getenv("HF_TOKEN", None)
HF_TOKEN = huggingface_hub.get_token()

assert (
HF_TOKEN is not None
Expand All @@ -48,12 +54,6 @@
"cap_add": ["sys_nice"],
}

logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
level="INFO",
)


def stream_container_logs(container, test_name):
"""Stream container logs in a separate thread."""
Expand All @@ -69,9 +69,15 @@ def stream_container_logs(container, test_name):
logger.error(f"Error streaming container logs: {str(e)}")


class TestClient(AsyncInferenceClient):
def __init__(self, service_name: str, base_url: str):
super().__init__(model=base_url)
self.service_name = service_name


class LauncherHandle:
def __init__(self, port: int):
self.client = AsyncClient(f"http://localhost:{port}", timeout=3600)
def __init__(self, service_name: str, port: int):
self.client = TestClient(service_name, f"http://localhost:{port}")

def _inner_health(self):
raise NotImplementedError
Expand All @@ -87,7 +93,7 @@ async def health(self, timeout: int = 60):
raise RuntimeError("Launcher crashed")

try:
await self.client.generate("test")
await self.client.text_generation("test", max_new_tokens=1)
elapsed = time.time() - start_time
logger.info(f"Health check passed after {elapsed:.1f}s")
return
Expand All @@ -111,7 +117,8 @@ async def health(self, timeout: int = 60):

class ContainerLauncherHandle(LauncherHandle):
def __init__(self, docker_client, container_name, port: int):
super(ContainerLauncherHandle, self).__init__(port)
service_name = container_name # Use container name as service name
super(ContainerLauncherHandle, self).__init__(service_name, port)
self.docker_client = docker_client
self.container_name = container_name

Expand All @@ -132,7 +139,8 @@ def _inner_health(self) -> bool:

class ProcessLauncherHandle(LauncherHandle):
def __init__(self, process, port: int):
super(ProcessLauncherHandle, self).__init__(port)
service_name = "process" # Use generic name for process launcher
super(ProcessLauncherHandle, self).__init__(service_name, port)
self.process = process

def _inner_health(self) -> bool:
Expand All @@ -151,11 +159,13 @@ def data_volume():


@pytest.fixture(scope="module")
def launcher(data_volume):
def gaudi_launcher():
@contextlib.contextmanager
def docker_launcher(
model_id: str,
test_name: str,
tgi_args: List[str] = None,
env_config: dict = None,
):
logger.info(
f"Starting docker launcher for model {model_id} and test {test_name}"
Expand Down Expand Up @@ -183,32 +193,40 @@ def get_free_port():
)
container.stop()
container.wait()
container.remove()
logger.info(f"Removed existing container {container_name}")
except NotFound:
pass
except Exception as e:
logger.error(f"Error handling existing container: {str(e)}")

model_name = next(
name for name, cfg in TEST_CONFIGS.items() if cfg["model_id"] == model_id
)

tgi_args = TEST_CONFIGS[model_name]["args"].copy()
if tgi_args is None:
tgi_args = []
else:
tgi_args = tgi_args.copy()

env = BASE_ENV.copy()

# Add model_id to env
env["MODEL_ID"] = model_id

# Add env config that is definied in the fixture parameter
if "env_config" in TEST_CONFIGS[model_name]:
env.update(TEST_CONFIGS[model_name]["env_config"].copy())
# Add env config that is defined in the fixture parameter
if env_config is not None:
env.update(env_config.copy())

volumes = [f"{DOCKER_VOLUME}:/data"]
volumes = []
if DOCKER_VOLUME:
volumes = [f"{DOCKER_VOLUME}:/data"]
logger.debug(f"Using volume {volumes}")

try:
logger.debug(f"Using command {tgi_args}")
logger.info(f"Creating container with name {container_name}")

logger.debug(f"Using environment {env}")
logger.debug(f"Using volumes {volumes}")
logger.debug(f"HABANA_RUN_ARGS {HABANA_RUN_ARGS}")

# Log equivalent docker run command for debugging, this is not actually executed
container = client.containers.run(
DOCKER_IMAGE,
Expand Down Expand Up @@ -271,15 +289,16 @@ def get_free_port():


@pytest.fixture(scope="module")
def generate_load():
def gaudi_generate_load():
async def generate_load_inner(
client: AsyncClient, prompt: str, max_new_tokens: int, n: int
) -> List[Response]:
client: AsyncInferenceClient, prompt: str, max_new_tokens: int, n: int
) -> List[TextGenerationOutput]:
try:
futures = [
client.generate(
client.text_generation(
prompt,
max_new_tokens=max_new_tokens,
details=True,
decoder_input_details=True,
)
for _ in range(n)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Dict, Any, Generator

import pytest
from test_model import TEST_CONFIGS
from test_gaudi_generate import TEST_CONFIGS

UNKNOWN_CONFIGS = {
name: config
Expand Down
Loading
Loading