This repository has been archived by the owner on Oct 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 211
51 lines (40 loc) · 2.9 KB
/
chatbot-inference-llama-2-7b_70b-chat-hf-hpu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
name: Chatbot inference on llama-2-7b-chat-hf with hpu and deepspeed
on:
workflow_call:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf-lla-7b-hpu
cancel-in-progress: true
permissions:
contents: read
jobs:
inference:
name: inference test
runs-on: guadi2-4
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ~/itrex-actions-runner/_work/intel-extension-for-transformers/intel-extension-for-transformers/*
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Load environment variables
run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV
- name: Build Docker Image
run: docker build --no-cache ./ --target hpu --build-arg REPO=${{ github.server_url }}/${{ github.event.pull_request.head.repo.full_name }}.git --build-arg REPO_PATH="." --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f intel_extension_for_transformers/neural_chat/docker/Dockerfile -t chatbotinfer-hpu:latest && yes | docker container prune && yes | docker image prune
- name: Start Docker Container
run: |
cid=$(docker ps -q --filter "name=chatbotinfer-hpu")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
docker run -tid --runtime=habana -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotinfer-hpu" --hostname="chatbotinfer-hpu-container" chatbotinfer-hpu:latest
- name: Run Inference Test without DeepSpeed
run: |
docker exec "chatbotinfer-hpu" bash -c "cd /intel-extension-for-transformers; python workflows/chatbot/inference/generate.py --base_model_path \"meta-llama/Llama-2-7b-chat-hf\" --hf_access_token \"${{ env.HF_ACCESS_TOKEN }}\" --habana --use_hpu_graphs --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" "
- name: Run Inference Test with DeepSpeed
run: |
docker exec "chatbotinfer-hpu" bash -c "cd /intel-extension-for-transformers; export HABANA_VISIBLE_MODULES=\"0,1\"; python workflows/chatbot/utils/gaudi_spawn.py --use_deepspeed --world_size 2 workflows/chatbot/inference/generate.py --base_model_path \"meta-llama/Llama-2-7b-chat-hf\" --hf_access_token \"${{ env.HF_ACCESS_TOKEN }}\" --habana --use_hpu_graphs --use_kv_cache --task chat --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" "
- name: Stop Container
if: success() || failure()
run: |
cid=$(docker ps -q --filter "name=chatbotinfer-hpu")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
- name: Test Summary
run: echo "Inference completed successfully"