forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main_rocm_original' into fix-llmm1-kernel
- Loading branch information
Showing
554 changed files
with
57,257 additions
and
19,074 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import os | ||
import zipfile | ||
|
||
MAX_SIZE_MB = 200 | ||
|
||
|
||
def print_top_10_largest_files(zip_file): | ||
with zipfile.ZipFile(zip_file, 'r') as z: | ||
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()] | ||
file_sizes.sort(key=lambda x: x[1], reverse=True) | ||
for f, size in file_sizes[:10]: | ||
print(f"{f}: {size/(1024*1024)} MBs uncompressed.") | ||
|
||
|
||
def check_wheel_size(directory): | ||
for root, _, files in os.walk(directory): | ||
for f in files: | ||
if f.endswith(".whl"): | ||
wheel_path = os.path.join(root, f) | ||
wheel_size = os.path.getsize(wheel_path) | ||
wheel_size_mb = wheel_size / (1024 * 1024) | ||
if wheel_size_mb > MAX_SIZE_MB: | ||
print( | ||
f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) " | ||
f"compare to the allowed size ({MAX_SIZE_MB} MB).") | ||
print_top_10_largest_files(wheel_path) | ||
return 1 | ||
else: | ||
print(f"Wheel {wheel_path} is within the allowed size " | ||
f"({wheel_size_mb} MB).") | ||
return 0 | ||
|
||
|
||
if __name__ == "__main__": | ||
import sys | ||
sys.exit(check_wheel_size(sys.argv[1])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,73 @@ | ||
# This script build the ROCm docker image and run the API server inside the container. | ||
# It serves a sanity check for compilation and basic model usage. | ||
# This script runs test inside the corresponding ROCm docker container. | ||
set -ex | ||
|
||
# Print ROCm version | ||
echo "--- ROCm info" | ||
rocminfo | ||
|
||
# Try building the docker image | ||
docker build -t rocm -f Dockerfile.rocm . | ||
# cleanup older docker images | ||
cleanup_docker() { | ||
# Get Docker's root directory | ||
docker_root=$(docker info -f '{{.DockerRootDir}}') | ||
if [ -z "$docker_root" ]; then | ||
echo "Failed to determine Docker root directory." | ||
exit 1 | ||
fi | ||
echo "Docker root directory: $docker_root" | ||
# Check disk usage of the filesystem where Docker's root directory is located | ||
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//') | ||
# Define the threshold | ||
threshold=70 | ||
if [ "$disk_usage" -gt "$threshold" ]; then | ||
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..." | ||
# Remove dangling images (those that are not tagged and not used by any container) | ||
docker image prune -f | ||
# Remove unused volumes | ||
docker volume prune -f | ||
echo "Docker images and volumes cleanup completed." | ||
else | ||
echo "Disk usage is below $threshold%. No cleanup needed." | ||
fi | ||
} | ||
|
||
# Setup cleanup | ||
remove_docker_container() { docker rm -f rocm || true; } | ||
trap remove_docker_container EXIT | ||
remove_docker_container | ||
|
||
# Run the image | ||
docker run --device /dev/kfd --device /dev/dri --network host --name rocm rocm python3 -m vllm.entrypoints.api_server & | ||
|
||
# Wait for the server to start | ||
wait_for_server_to_start() { | ||
timeout=300 | ||
counter=0 | ||
|
||
while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do | ||
sleep 1 | ||
counter=$((counter + 1)) | ||
if [ $counter -ge $timeout ]; then | ||
echo "Timeout after $timeout seconds" | ||
break | ||
# Call the cleanup docker function | ||
cleanup_docker | ||
|
||
echo "--- Resetting GPUs" | ||
|
||
echo "reset" > /opt/amdgpu/etc/gpu_state | ||
|
||
while true; do | ||
sleep 3 | ||
if grep -q clean /opt/amdgpu/etc/gpu_state; then | ||
echo "GPUs state is \"clean\"" | ||
break | ||
fi | ||
done | ||
done | ||
|
||
echo "--- Building container" | ||
sha=$(git rev-parse --short HEAD) | ||
image_name=rocm_${sha} | ||
container_name=rocm_${sha}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo) | ||
docker build \ | ||
-t ${image_name} \ | ||
-f Dockerfile.rocm \ | ||
--progress plain \ | ||
. | ||
|
||
remove_docker_container() { | ||
docker rm -f ${container_name} || docker image rm -f ${image_name} || true | ||
} | ||
wait_for_server_to_start | ||
trap remove_docker_container EXIT | ||
|
||
echo "--- Running container" | ||
|
||
docker run \ | ||
--device /dev/kfd --device /dev/dri \ | ||
--network host \ | ||
--rm \ | ||
-e HF_TOKEN \ | ||
--name ${container_name} \ | ||
${image_name} \ | ||
/bin/bash -c "${@}" | ||
|
||
# Test a simple prompt | ||
curl -X POST -H "Content-Type: application/json" \ | ||
localhost:8000/generate \ | ||
-d '{"prompt": "San Francisco is a"}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# This script build the Neuron docker image and run the API server inside the container. | ||
# It serves a sanity check for compilation and basic model usage. | ||
set -e | ||
|
||
# Try building the docker image | ||
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com | ||
|
||
# prune old image and containers to save disk space, and only once a day | ||
# by using a timestamp file in tmp. | ||
if [ -f /tmp/neuron-docker-build-timestamp ]; then | ||
last_build=$(cat /tmp/neuron-docker-build-timestamp) | ||
current_time=$(date +%s) | ||
if [ $((current_time - last_build)) -gt 86400 ]; then | ||
docker system prune -f | ||
echo $current_time > /tmp/neuron-docker-build-timestamp | ||
fi | ||
else | ||
echo $(date +%s) > /tmp/neuron-docker-build-timestamp | ||
fi | ||
|
||
docker build -t neuron -f Dockerfile.neuron . | ||
|
||
# Setup cleanup | ||
remove_docker_container() { docker rm -f neuron || true; } | ||
trap remove_docker_container EXIT | ||
remove_docker_container | ||
|
||
# Run the image | ||
docker run --device=/dev/neuron0 --device=/dev/neuron1 --network host --name neuron neuron python3 -m vllm.entrypoints.api_server \ | ||
--model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --max-num-seqs 8 --max-model-len 128 --block-size 128 --device neuron --tensor-parallel-size 2 & | ||
|
||
# Wait for the server to start | ||
wait_for_server_to_start() { | ||
timeout=300 | ||
counter=0 | ||
|
||
while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do | ||
sleep 1 | ||
counter=$((counter + 1)) | ||
if [ $counter -ge $timeout ]; then | ||
echo "Timeout after $timeout seconds" | ||
break | ||
fi | ||
done | ||
} | ||
wait_for_server_to_start | ||
|
||
# Test a simple prompt | ||
curl -X POST -H "Content-Type: application/json" \ | ||
localhost:8000/generate \ | ||
-d '{"prompt": "San Francisco is a"}' |
Oops, something went wrong.