Skip to content

Commit

Permalink
[CI] workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
KuilongCui committed Sep 12, 2024
1 parent d174866 commit 3e840db
Show file tree
Hide file tree
Showing 50 changed files with 768 additions and 52 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: BENCH

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/[email protected]
with:
all_but_latest: true

bench_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/bench_test.sh
- name: Create comment from file
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const filePath = 'performance.txt';
const commentBody = fs.readFileSync(filePath, 'utf8');
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
31 changes: 31 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: E2E

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/[email protected]
with:
all_but_latest: true

e2e_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/e2e_test.sh
44 changes: 44 additions & 0 deletions .github/workflows/migration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: MIGRAION

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/[email protected]
with:
all_but_latest: true

migration_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/migration_test.sh
- name: Create comment from file
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const filePath = 'performance.txt';
const commentBody = fs.readFileSync(filePath, 'utf8');
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
33 changes: 18 additions & 15 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Pylint
name: PYLINT

on:
push:
Expand All @@ -9,21 +9,24 @@ on:
- main

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
- uses: styfle/[email protected]
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint==2.12.2
all_but_latest: true

pylint_test:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- name: Analysing the code with pylint
run: |
pylint --rcfile=.pylintrc --output-format=parseable --jobs=8 $( find llumnix/ -type f -name '*.py')
nvidia-docker run --rm -t --net host --ipc host \
-v ${PWD}:/workspace \
-w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && make lint"
31 changes: 31 additions & 0 deletions .github/workflows/unit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: UNIT

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/[email protected]
with:
all_but_latest: true

unit_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/unit_test.sh
26 changes: 26 additions & 0 deletions .github/workflows/whl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: WHL_BUILD

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
whl_build:
runs-on: ubuntu-latest
timeout-minutes: 10

steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Build whl
run: |
python3 -m pip install --upgrade setuptools wheel
python3 setup.py bdist_wheel --universal
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ install:

.PHONY: lint
lint: check_pylint_installed check_pytest_installed
@pylint --rcfile=.pylintrc -s n ./llumnix
@pylint --rcfile=.pylintrc -s n --jobs=32 ./llumnix

@pylint --rcfile=.pylintrc \
--disable=protected-access,super-init-not-called,unused-argument,redefined-outer-name,invalid-name \
-s n ./tests
-s n --jobs=32 ./tests

.PHONY: test
test: check_pytest_installed
@pytest -x -q --ignore=third_party/ --disable-warnings
@pytest -x -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings

#################### pygloo install for gloo migration backend begin ####################

Expand Down
14 changes: 14 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# conftest.py
import subprocess
from time import sleep

def pytest_sessionstart(session):
subprocess.run(["ray", "stop", "--force"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
sleep(3)
subprocess.run(["ray", "start", "--head", "--disable-usage-stats", "--port=30050"], check=True,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
sleep(3)

def pytest_sessionfinish(session, exitstatus):
subprocess.run(["ray", "stop"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
sleep(3)
3 changes: 2 additions & 1 deletion llumnix/backends/vllm/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def step(self) -> None:
instance_info.step_id = next(self.step_counter)
instance_info.timestamp = time.time()
instance_info.latency = self.model_executor.last_inference_latency

seq_groups = self.scheduler.running
if seq_groups:
tot_blocks = []
Expand Down Expand Up @@ -257,8 +258,8 @@ def commit_dst_request(self, backend_request: SequenceGroupLlumnix) -> None:
logger.info("add seq {} to block table".format(seq.seq_id))
pre_alloc_blocks = self.engine.scheduler.pre_alloc_cache_dict.pop(backend_request.request_id)
self.engine.scheduler.block_manager.add_block_table(pre_alloc_blocks, seq.seq_id)
self.add_running_request(backend_request)
backend_request.reset_migration_args()
self.add_running_request(backend_request)

def send_blocks(self, dst_ray_actor: "ray.actor.ActorHandle", src_blocks: List[int], dst_blocks: List[int]) -> None:
ray.get(dst_ray_actor.execute_engine_method.remote("_run_workers",
Expand Down
12 changes: 12 additions & 0 deletions llumnix/backends/vllm/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import time
from typing import Dict, List
import math
import ray
Expand All @@ -26,6 +27,7 @@
from llumnix.backends.vllm.utils import _sample_with_torch
from llumnix.backends.vllm.migration_backend import MigrationBackendBase, get_migration_backend
from llumnix.internal_config import MigrationConfig
from llumnix.utils import convert_bytes

logger = init_logger(__name__)

Expand Down Expand Up @@ -93,6 +95,7 @@ def init_migration(self, instance_id: str, migration_config: MigrationConfig, sr
self.instance_id = instance_id
self.global_world_size = 0
self.global_rank = -1
# self.migration_config = migration_config
self.migration_backend: MigrationBackendBase = get_migration_backend(migration_config=migration_config,
cache_engine=self.cache_engine,
worker_handle_list=src_worker_handle_list,
Expand All @@ -104,10 +107,19 @@ def init_migration(self, instance_id: str, migration_config: MigrationConfig, sr

def migrate_cache(self, src_worker_handle_list, src_blocks: List[int], dst_blocks: List[int]) -> None:
src_worker_handle = src_worker_handle_list[self.rank]

start_time = time.time()
try:
self.migration_backend.migrate_cache(src_worker_handle, src_blocks, dst_blocks)
except ray.exceptions.RayActorError:
logger.info("[migrate_cache] self.rank: {}, src_worker_handle {} is dead".format(self.rank, src_worker_handle))
end_time = time.time()

total_kv_cache_size = len(src_blocks) * CacheEngine.get_cache_block_size(
self.cache_config, self.model_config, self.parallel_config)
speed = total_kv_cache_size/1024/1024/1024/(end_time - start_time)
logger.info("[migration_cache] blocks_num: {}, total_kv_cache_size: {}, time: {}s, speed: {}GB/s."
.format(len(src_blocks), convert_bytes(total_kv_cache_size), end_time-start_time, speed))

def do_recv(self, *args, **kwargs):
return self.migration_backend.do_recv(*args, **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions llumnix/entrypoints/llumnix_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import time
from typing import List, Tuple
import asyncio
import socket
import ray

from llumnix.llm_engine_manager import LLMEngineManager, MANAGER_ACTOR_NAME
Expand All @@ -38,10 +39,9 @@
MAX_TASK_RETRIES = 300
RETRIES_INTERVALS = 0.1


def get_ip_address():
result = subprocess.run(['hostname', '-i'], stdout=subprocess.PIPE, check=True)
ip_address = result.stdout.decode('utf-8').strip()
hostname = socket.gethostname()
ip_address = socket.gethostbyname(hostname)
return ip_address

def launch_ray_cluster(ray_cluster_port: int) -> subprocess.CompletedProcess:
Expand Down
1 change: 1 addition & 0 deletions llumnix/llumlet/llumlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def migrate_out(self, dst_instance_name: str) -> List[str]:
migrate_out_request.stage_timestamps.append(time.time())
self.backend_engine.remove_migrating_out_request_last_stage(migrate_out_request)
else:
migrate_out_request.reset_migration_args()
ray.get(migrate_in_ray_actor.execute_migration_method.remote("free_dst_pre_alloc_cache", migrate_out_request.request_id))
t1 = time.time()
logger.info("{}->{} migrate done, migrate request {}, status:{}, len:{} blocks, cost:{} ms" \
Expand Down
14 changes: 14 additions & 0 deletions llumnix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,17 @@

def random_uuid() -> str:
return str(uuid.uuid4().hex)

def convert_bytes(bytes_size):
"""Convert bytes to KB, MB, GB, etc."""
if bytes_size < 0:
raise ValueError("Size must be a non-negative integer.")

size_suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
index = 0

while bytes_size >= 1024 and index < len(size_suffixes) - 1:
bytes_size /= 1024.0
index += 1

return f"{bytes_size:.2f} {size_suffixes[index]}"
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[pytest]
asyncio_default_fixture_loop_scope = function
asyncio_default_fixture_loop_scope = function
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ func_timeout
pyyaml
yacs
numpy < 1.24.0 # for gloo migration backend's compatibility with numpy.float
pyzmq
14 changes: 14 additions & 0 deletions tests/e2e_test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2024, Alibaba Group;
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO(KuilongCui): add failover test
Loading

0 comments on commit 3e840db

Please sign in to comment.