Skip to content

Refactor turbomind attention by precomputing rotary embed #1111

Refactor turbomind attention by precomputing rotary embed

Refactor turbomind attention by precomputing rotary embed #1111

Workflow file for this run

name: linux-x64-gpu
on:
push:
paths:
- '.github/workflows/linux-x64-gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
pull_request:
paths:
- '.github/workflows/linux-x64-gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
concurrency:
group: linux-x64-gpu-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build:
strategy:
matrix:
cudaver: [11.8, 12.1]
name: cuda-${{ matrix.cudaver }}
runs-on: ubuntu-latest
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
- name: Build
uses: addnab/docker-run-action@v3
with:
image: openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }}
options: -v ${{ github.workspace }}:/work
run: |
cd /work
source /opt/conda/bin/activate
conda activate py38
mkdir build && cd build
bash ../generate.sh make
make -j$(nproc) && make install