Skip to content

[VLM] Set stop token ids from default generation config in VLM pipeline #907

[VLM] Set stop token ids from default generation config in VLM pipeline

[VLM] Set stop token ids from default generation config in VLM pipeline #907

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: GenAI tools
on:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- master
- 'releases/**'
permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions
concurrency:
# github.ref is not unique in post-commit
group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-llm-bench-python
cancel-in-progress: true
jobs:
openvino_download:
name: Download OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }}
ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }}
ov_version: ${{ steps.openvino_download.outputs.ov_version }}
timeout-minutes: 10
defaults:
run:
shell: bash
runs-on: aks-linux-2-cores-8gb
container:
image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0'
volumes:
- /mount:/mount
- ${{ github.workspace }}:${{ github.workspace }}
steps:
- uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
id: openvino_download
with:
platform: ubuntu22
commit_packages_to_provide: wheels
revision: latest_available_commit
llm_bench:
name: 'LLM bench tests'
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.11"]
needs: [ openvino_download ]
env:
OV_INSTALL_DIR: ${{ github.workspace }}/ov
SRC_DIR: ${{ github.workspace }}
LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Lint with flake8
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest black
# stop the build if there are Python syntax errors or undefined names
python -m flake8 ${{ env.LLM_BENCH_PYPATH }} --config=${{ env.LLM_BENCH_PYPATH }}/setup.cfg
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.OV_INSTALL_DIR }}
merge-multiple: true
- name: Install dependencies
run: |
python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
working-directory: ${{ env.OV_INSTALL_DIR }}
- name: Test native pytorch model
run: |
huggingface-cli download katuni4ka/tiny-random-qwen --local-dir ./tiny-random-qwen
python ./tools/llm_bench/benchmark.py -m ./tiny-random-qwen -d cpu -n 1 -f pt -ic 20
rm -rf ./tiny-random-qwen
- name: Test katuni4ka/tiny-random-baichuan2 Optimum Intel
run: |
optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/ -d cpu -n 1 --optimum -ic 10
rm -rf ./ov_models/
- name: Create prompt file for for image generation
run: |
prompt="side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"
json_template='{steps: $steps, width: $width, height: $height, guidance_scale: $guidance_scale, prompt: $prompt}'
jq -n -c --arg steps "30" --arg width "64" --arg height "128" --arg guidance_scale "1.0" --arg prompt "$prompt" "$json_template" > ./image_generation.jsonl
jq -n -c --arg steps "4" --arg width "64" --arg height "32" --arg guidance_scale "7.0" --arg prompt "$prompt" "$json_template" >> ./image_generation.jsonl
- name: Test echarlaix/tiny-random-latent-consistency Optimum Intel
run: |
optimum-cli export openvino --model echarlaix/tiny-random-latent-consistency --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-latent-consistency
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --optimum --num_steps 4
- name: Test echarlaix/tiny-random-latent-consistency with GenAI
run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --num_steps 4
- name: Test echarlaix/tiny-random-latent-consistency with GenAI and LoRA
run: |
huggingface-cli download katuni4ka/tiny-random-latent-consistency-lora --local-dir ./lora
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --num_steps 4 --lora ./lora/tiny-random-latent-consistency-lora.safetensors --lora_alphas 0.7
rm -rf ./ov_models/ ./lora ./image_generation.jsonl
- name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Decoding via GenAI
run: |
optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --assistant_confidence_threshold 0.4 -ic 20
python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --num_assistant_tokens 5 -ic 20
rm -rf ov_models/TinyLlama-1.1B-Chat-v1.0
- name: Test openai/whisper-tiny via Optimum
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
cd multilingual_librispeech
git lfs pull -I /data/mls_polish/train/audio/3283_1447_000.tar.gz
mkdir data/mls_polish/train/audio/3283_1447_000
tar zxvf data/mls_polish/train/audio/3283_1447_000.tar.gz -C data/mls_polish/train/audio/3283_1447_000/
cd ..
optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ./ov_models/whisper-tiny
python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 --optimum
- name: Test openai/whisper-tiny via GenAI
run: |
python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1
rm -rf ./ov_models/whisper-tiny
rm -rf multilingual_librispeech
- name: Test katuni4ka/tiny-random-llava via Optimum
run: |
optimum-cli export openvino --model katuni4ka/tiny-random-llava ./ov_models/tiny-random-llava --task image-text-to-text --trust-remote-code
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-llava --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20 --optimum
- name: Test katuni4ka/tiny-random-llava via GenAI
run: |
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-llava --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20
rm -rf ./ov_models
wwb:
name: 'WWB tests'
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
python-version: ["3.11"]
needs: [ openvino_download ]
env:
OV_INSTALL_DIR: ${{ github.workspace }}/ov
SRC_DIR: ${{ github.workspace }}
WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Lint with flake8
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest black
# stop the build if there are Python syntax errors or undefined names
python -m flake8 ${{ env.WWB_PATH }} --config=${{ env.WWB_PATH }}/setup.cfg
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.OV_INSTALL_DIR }}
merge-multiple: true
- name: Install dependencies
run: |
python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install -r ${{ env.WWB_PATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
python -m pip install git+https://github.com/huggingface/optimum-intel.git@main#egg=optimum-intel
working-directory: ${{ env.OV_INSTALL_DIR }}
- name: WWB Tests
run: |
python -m pip install -v ${{ env.WWB_PATH }}
python -m pytest -v ${{ env.WWB_PATH }}/tests
Overall_Status:
name: ci/gha_overall_status_llm_bench
needs: [openvino_download, llm_bench, wwb]
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Check status of all jobs
if: >-
${{
contains(needs.*.result, 'failure') ||
contains(needs.*.result, 'cancelled')
}}
run: exit 1