Skip to content

Commit c31fa28

Browse files
authored
Merge branch 'main' into dev_multistream_overlap
2 parents cc6bc1b + e2a0c19 commit c31fa28

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2384
-1413
lines changed

.github/Dockerfile.buildwheel

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
ARG PY_VERSION=3.10
18+
FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py${PY_VERSION}
19+
20+
ARG COMPILE_CUSTOM_KERNELS=1
21+
22+
# Define environments
23+
ENV DEBIAN_FRONTEND=noninteractive
24+
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
25+
RUN apt-get update -y && \
26+
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
27+
rm -rf /var/cache/apt/* && \
28+
rm -rf /var/lib/apt/lists/*
29+
30+
WORKDIR /workspace
31+
32+
COPY . /workspace/vllm-ascend/
33+
34+
# Install req
35+
RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
36+
python3 -m pip install twine
37+
38+
# Install vllm-ascend
39+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
40+
source /usr/local/Ascend/nnal/atb/set_env.sh && \
41+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
42+
cd vllm-ascend && \
43+
python3 setup.py bdist_wheel && \
44+
ls -l dist && \
45+
for f in dist/*.whl; do mv "$f" "$(echo "$f" | sed -e 's/-linux_x86_64\.whl$/-manylinux1_x86_64.whl/' -e 's/-linux_aarch64\.whl$/-manylinux2014_aarch64.whl/')"; done && \
46+
ls -l dist
47+
48+
CMD ["/bin/bash"]

.github/actionlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ self-hosted-runner:
55
- linux-arm64-npu-2
66
- linux-arm64-npu-4
77
- linux-arm64-npu-static-8
8+
- ubuntu-24.04-arm

.github/workflows/actionlint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747

4848
- name: "Run actionlint"
4949
env:
50-
SHELLCHECK_OPTS: --exclude=SC2046,SC2006
50+
SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
5151
run: |
5252
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
5353
tools/actionlint.sh -color

.github/workflows/release_code.yml

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: build / sdist
19+
20+
on:
21+
pull_request:
22+
branches:
23+
- 'main'
24+
- '*-dev'
25+
paths:
26+
- '.github/workflows/release_code.yml'
27+
- 'vllm_ascend/**'
28+
- 'setup.py'
29+
- 'pyproject.toml'
30+
- 'requirements.txt'
31+
- 'cmake/**'
32+
- 'CMakeLists.txt'
33+
- 'csrc/**'
34+
push:
35+
branches:
36+
- 'main'
37+
- '*-dev'
38+
tags:
39+
- 'v*'
40+
paths:
41+
- '.github/workflows/release_code.yml'
42+
- 'vllm_ascend/**'
43+
- 'setup.py'
44+
- 'pyproject.toml'
45+
- 'requirements.txt'
46+
- 'cmake/**'
47+
- 'CMakeLists.txt'
48+
- 'csrc/**'
49+
50+
jobs:
51+
build:
52+
name: release code
53+
runs-on: ubuntu-latest
54+
strategy:
55+
matrix:
56+
python-version: ["3.10"]
57+
steps:
58+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
59+
60+
- name: Print
61+
run: |
62+
lscpu
63+
64+
- name: Set up Python ${{ matrix.python-version }}
65+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
66+
with:
67+
python-version: ${{ matrix.python-version }}
68+
69+
- name: Install dependencies
70+
run: |
71+
python3 -m pip install twine setuptools_scm
72+
73+
- name: Generate tar.gz
74+
run: |
75+
python3 setup.py sdist
76+
ls dist
77+
78+
- name: Archive tar.gz
79+
uses: actions/upload-artifact@v4
80+
with:
81+
name: vllm-ascend-src
82+
path: dist/*
83+
84+
- name: Release
85+
if: startsWith(github.ref, 'refs/tags/')
86+
run: |
87+
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}

.github/workflows/release_whl.yml

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: build / wheel
19+
20+
on:
21+
pull_request:
22+
branches:
23+
- 'main'
24+
- '*-dev'
25+
paths:
26+
- '.github/workflows/release_whl.yml'
27+
- '.github/Dockerfile.buildwheel'
28+
- 'vllm_ascend/**'
29+
- 'setup.py'
30+
- 'pyproject.toml'
31+
- 'requirements.txt'
32+
- 'cmake/**'
33+
- 'CMakeLists.txt'
34+
- 'csrc/**'
35+
push:
36+
branches:
37+
- 'main'
38+
- '*-dev'
39+
tags:
40+
- 'v*'
41+
paths:
42+
- '.github/workflows/release_whl.yml'
43+
- '.github/Dockerfile.buildwheel'
44+
- 'vllm_ascend/**'
45+
- 'setup.py'
46+
- 'pyproject.toml'
47+
- 'requirements.txt'
48+
- 'cmake/**'
49+
- 'CMakeLists.txt'
50+
- 'csrc/**'
51+
52+
jobs:
53+
build:
54+
name: build and release wheel
55+
strategy:
56+
matrix:
57+
os: [ubuntu-24.04, ubuntu-24.04-arm]
58+
python-version: ['3.9', '3.10', '3.11']
59+
runs-on: ${{ matrix.os }}
60+
steps:
61+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
62+
63+
- name: Print
64+
run: |
65+
lscpu
66+
67+
- name: Build wheel
68+
run: |
69+
ls
70+
docker build -f ./.github/Dockerfile.buildwheel \
71+
--build-arg PY_VERSION=${{ matrix.python-version }} \
72+
-t wheel:v1 .
73+
docker run --rm \
74+
-v $(pwd):/outpwd \
75+
wheel:v1 \
76+
bash -c "cp -r /workspace/vllm-ascend/dist /outpwd"
77+
ls dist
78+
79+
- name: Archive wheel
80+
uses: actions/upload-artifact@v4
81+
with:
82+
name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
83+
path: dist/*
84+
85+
- name: Set up Python ${{ matrix.python-version }}
86+
if: startsWith(github.ref, 'refs/tags/')
87+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
88+
with:
89+
python-version: ${{ matrix.python-version }}
90+
91+
- name: Release
92+
if: startsWith(github.ref, 'refs/tags/')
93+
run: |
94+
python3 -m pip install twine
95+
python3 -m twine upload --verbose dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}

.github/workflows/vllm_ascend_test.yaml

Lines changed: 27 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -30,32 +30,27 @@ on:
3030
- '.github/workflows/vllm_ascend_test.yaml'
3131
- '!docs/**'
3232
- 'pytest.ini'
33-
3433
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
3534
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
3635
# It's used to activate ascend-toolkit environment variables.
3736
defaults:
3837
run:
3938
shell: bash -el {0}
4039

41-
concurrency:
42-
group: pr-${{ github.event.pull_request.number }}
43-
cancel-in-progress: true
44-
4540
jobs:
4641
test:
4742
strategy:
4843
max-parallel: 2
4944
matrix:
5045
os: [linux-arm64-npu-1, linux-arm64-npu-4]
51-
vllm_verison: [main, v0.8.5.post1]
46+
vllm_version: [main, v0.8.5.post1]
5247
concurrency:
5348
group: >
54-
${{
55-
matrix.os == 'linux-arm64-npu-4'
56-
&& github.event.pull_request.number
57-
&& format('pr-{0}-limit-npu-4', github.event.pull_request.number)
58-
|| format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_verison, github.event.pull_request.number)
49+
${{
50+
matrix.os == 'linux-arm64-npu-4'
51+
&& github.event.pull_request.number
52+
&& format('pr-{0}-limit-npu-4', github.event.pull_request.number)
53+
|| format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_version, github.event.pull_request.number)
5954
}}
6055
cancel-in-progress: false
6156
name: vLLM Ascend test
@@ -66,6 +61,7 @@ jobs:
6661
env:
6762
HF_ENDPOINT: https://hf-mirror.com
6863
HF_TOKEN: ${{ secrets.HF_TOKEN }}
64+
VLLM_LOGGING_LEVEL: ERROR
6965
steps:
7066
- name: Check npu and CANN info
7167
run: |
@@ -92,7 +88,7 @@ jobs:
9288
uses: actions/checkout@v4
9389
with:
9490
repository: vllm-project/vllm
95-
ref: ${{ matrix.vllm_verison }}
91+
ref: ${{ matrix.vllm_version }}
9692
path: ./vllm-empty
9793

9894
- name: Install vllm-project/vllm from source
@@ -111,59 +107,32 @@ jobs:
111107
VLLM_WORKER_MULTIPROC_METHOD: spawn
112108
run: |
113109
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
114-
pytest -sv tests/singlecard/test_offline_inference.py
115-
pytest -sv tests/ops
116-
pytest -sv tests/compile
110+
VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
111+
# AscendScheduler doesn't work, fix it later
112+
# pytest -sv tests/singlecard/tets_schedule.py
113+
# guided decoding doesn't work, fix it later
114+
# pytest -sv tests/singlecard/test_guided_decoding.py.py
115+
pytest -sv tests/singlecard/ --ignore=tests/singlecard/test_offline_inference.py --ignore=tests/singlecard/test_scheduler.py --ignore=tests/singlecard/test_guided_decoding.py
117116
else
118-
pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py
119-
pytest -sv tests/ops
120-
pytest -sv tests/compile
117+
pytest -sv tests/multicard/test_ilama_lora_tp2.py
118+
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py
121119
fi
122120
123121
- name: Run vllm-project/vllm-ascend test on V0 engine
124122
env:
125123
VLLM_USE_V1: 0
126124
run: |
127125
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
128-
pytest -sv tests/singlecard/test_offline_inference.py
129-
pytest -sv tests/ops
126+
VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
127+
# AscendScheduler doesn't work, fix it later
128+
# pytest -sv tests/singlecard/tets_schedule.py
129+
# guided decoding doesn't work, fix it later
130+
# pytest -sv tests/singlecard/test_guided_decoding.py.py
131+
pytest -sv tests/singlecard/ --ignore=tests/singlecard/test_offline_inference.py --ignore=tests/singlecard/test_scheduler.py --ignore=tests/singlecard/test_guided_decoding.py
130132
else
131-
pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py
132-
pytest -sv -k "DeepSeek" tests/multicard/test_offline_inference_distributed.py
133-
pytest -sv tests/ops
134-
fi
135-
136-
# only run test on spec decode when the related code changed
137-
- name: Check for changes in Speculative Decode
138-
if: github.event_name != 'schedule'
139-
id: filter_spec_decode
140-
uses: dorny/paths-filter@v3
141-
with:
142-
filters: |
143-
speculative_tests_changed:
144-
- ".github/workflows/vllm_ascend_test.yaml"
145-
- "tests/singlecard/spec_decode/**"
146-
- "tests/multicard/spec_decode_e2e/**"
147-
- "vllm_ascend/worker/worker.py"
148-
- "vllm_ascend/worker/model_runner.py"
149-
- "vllm_ascend/worker/multi_step_runner.py"
150-
- "vllm_ascend/worker/multi_step_worker.py"
151-
- "vllm_ascend/worker/draft_model_runner.py"
152-
- "vllm_ascend/patch/worker/patch_common/patch_metrics.py"
153-
- "vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py"
154-
- "vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py"
155-
156-
- name: Run vllm-project/vllm-ascend Speculative Decode test
157-
if: steps.filter_spec_decode.outputs.speculative_tests_changed == 'true' || github.event_name == 'schedule'
158-
run: |
159-
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
160-
pytest -sv tests/singlecard/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
161-
pytest -sv tests/singlecard/spec_decode --ignore=tests/singlecard/spec_decode/e2e/test_mtp_correctness.py
133+
pytest -sv tests/multicard/test_ilama_lora_tp2.py
134+
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
135+
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
136+
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
137+
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
162138
fi
163-
164-
- name: Run vllm-project/vllm test for V0 Engine
165-
env:
166-
VLLM_USE_V1: 0
167-
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
168-
run: |
169-
pytest -sv

0 commit comments

Comments
 (0)