Skip to content

Commit

Permalink
Merge branch 'main' into qwen2audio_new
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 committed Oct 23, 2024
2 parents 663a8fe + e7116c0 commit 2261d8f
Show file tree
Hide file tree
Showing 227 changed files with 8,485 additions and 2,144 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
model_name: "neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8"
tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.356
- name: "exact_match,flexible-extract"
value: 0.358
limit: 1000
num_fewshot: 5
2 changes: 1 addition & 1 deletion .buildkite/lm-eval-harness/configs/models-small.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Meta-Llama-3-8B-Instruct.yaml
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
Expand Down
13 changes: 11 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,22 @@ steps:
- pytest -v -s models/test_oot_registration.py # it needs a clean process
- pytest -v -s models/*.py --ignore=models/test_oot_registration.py

- label: Decoder-only Language Models Test # 1h36min
- label: Decoder-only Language Models Test (Standard) # 35min
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language
- pytest -v -s models/decoder_only/language/test_models.py
- pytest -v -s models/decoder_only/language/test_big_models.py

- label: Decoder-only Language Models Test (Extended) # 1h20min
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py

- label: Decoder-only Multi-Modal Models Test # 1h31min
#mirror_hardwares: [amd]
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ jobs:

- name: "Run actionlint"
run: |
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
tools/actionlint.sh -color
2 changes: 1 addition & 1 deletion .github/workflows/add_label_automerge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Add label
uses: actions/github-script@v7
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
github.rest.issues.addLabels({
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -38,4 +38,4 @@ jobs:
)
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
| xargs clang-format --dry-run --Werror
| xargs clang-format --dry-run --Werror
16 changes: 16 additions & 0 deletions .github/workflows/matchers/mypy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"problemMatcher": [
{
"owner": "mypy",
"pattern": [
{
"regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
"file": 1,
"line": 2,
"severity": 3,
"message": 4
}
]
}
]
}
17 changes: 17 additions & 0 deletions .github/workflows/matchers/ruff.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"problemMatcher": [
{
"owner": "ruff",
"pattern": [
{
"regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
"file": 1,
"line": 2,
"column": 3,
"code": 4,
"message": 5
}
]
}
]
}
7 changes: 4 additions & 3 deletions .github/workflows/mypy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -32,4 +32,5 @@ jobs:
pip install types-setuptools
- name: Mypy
run: |
tools/mypy.sh
echo "::add-matcher::.github/workflows/matchers/mypy.json"
tools/mypy.sh 1
12 changes: 6 additions & 6 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1

- name: Extract branch info
shell: bash
Expand All @@ -30,7 +30,7 @@ jobs:
- name: Create Release
id: create_release
uses: "actions/github-script@v7"
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
env:
RELEASE_TAG: ${{ env.release_tag }}
with:
Expand All @@ -54,10 +54,10 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1

- name: Setup ccache
uses: hendrikmuhs/[email protected]
uses: hendrikmuhs/ccache-action@ed74d11c0b343532753ecead8a951bb09bb34bc9 # v1.2.14
with:
create-symlink: true
key: ${{ github.job }}-${{ matrix.python-version }}-${{ matrix.cuda-version }}
Expand All @@ -68,7 +68,7 @@ jobs:
bash -x .github/workflows/scripts/env.sh
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -92,7 +92,7 @@ jobs:
echo "asset_name=${asset_name}" >> "$GITHUB_ENV"
- name: Upload Release Asset
uses: actions/upload-release-asset@v1
uses: actions/upload-release-asset@e8f9f06c4b078e705bd2ea027f0926603fc9b4d5 # v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/reminder_comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Remind to run full CI on PR
uses: actions/github-script@v7
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
github.rest.issues.createComment({
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -28,7 +28,8 @@ jobs:
pip install -r requirements-lint.txt
- name: Analysing the code with ruff
run: |
ruff check .
echo "::add-matcher::.github/workflows/matchers/ruff.json"
ruff check --output-format github .
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/yapf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ jobs:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
14 changes: 8 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@ endif()

#
# Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.
# Configure it to place files in vllm/.deps, in order to play nicely with sccache.
# setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache.
# Each dependency that produces build artifacts should override its BINARY_DIR to avoid
# conflicts between build types. It should instead be set to ${CMAKE_BINARY_DIR}/<dependency>.
#
include(FetchContent)
get_filename_component(PROJECT_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
file(MAKE_DIRECTORY "${FETCHCONTENT_BASE_DIR}")
set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT_DIR}/.deps")
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")

#
Expand Down Expand Up @@ -252,7 +252,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
else()
message(STATUS "Not building Marlin kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()

#
Expand Down Expand Up @@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
else()
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
endif()

Expand Down Expand Up @@ -509,6 +509,8 @@ else()
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
GIT_TAG 013f0c4fc47e6574060879d9734c1df8c5c273bd
GIT_PROGRESS TRUE
# Don't share the vllm-flash-attn build between build types
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
)
endif()

Expand Down
8 changes: 4 additions & 4 deletions Dockerfile.openvino
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ RUN --mount=type=bind,source=.git,target=.git \
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi

# install build requirements
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm/requirements-build.txt
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements-build.txt
# build vLLM with OpenVINO backend
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace/vllm/
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace

COPY examples/ /workspace/vllm/examples
COPY benchmarks/ /workspace/vllm/benchmarks
COPY examples/ /workspace/examples
COPY benchmarks/ /workspace/benchmarks

CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion Dockerfile.ppc64le
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ WORKDIR /workspace/

RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks

ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
ENTRYPOINT ["/opt/conda/bin/python3", "-m", "vllm.entrypoints.openai.api_server"]
Loading

0 comments on commit 2261d8f

Please sign in to comment.