Update Binaries #21
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Update Binaries | |
on: | |
workflow_dispatch: | |
inputs: | |
cublas: | |
type: boolean | |
description: Build CUBLAS binaries | |
macos: | |
type: boolean | |
description: Build MacOS binaries | |
push: | |
branches: [cron_job] | |
#schedule: | |
# - cron: "22 22 * * 2" | |
jobs: | |
compile-linux: | |
name: Compile (Linux) | |
strategy: | |
fail-fast: true | |
matrix: | |
include: | |
- build: 'noavx' | |
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON' | |
- build: 'avx2' | |
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON' | |
- build: 'avx' | |
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON' | |
- build: 'avx512' | |
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
repository: ggerganov/llama.cpp | |
- name: Build | |
id: cmake_build | |
run: | | |
mkdir build | |
cd build | |
cmake .. ${{ matrix.defines }} | |
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} | |
- uses: actions/upload-artifact@v3 | |
with: | |
path: ./build/libllama.so | |
name: llama-bin-linux-${{ matrix.build }}-x64.so | |
compile-windows: | |
name: Compile (Windows) | |
strategy: | |
fail-fast: true | |
matrix: | |
include: | |
- build: 'noavx' | |
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON' | |
- build: 'avx2' | |
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON' | |
- build: 'avx' | |
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON' | |
- build: 'avx512' | |
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' | |
runs-on: windows-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
repository: ggerganov/llama.cpp | |
- name: Build | |
id: cmake_build | |
run: | | |
mkdir build | |
cd build | |
cmake .. ${{ matrix.defines }} | |
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
path: .\build\bin\Release\llama.dll | |
name: llama-bin-win-${{ matrix.build }}-x64.dll | |
compile-cublas: | |
if: ${{ github.event.inputs.cublas }} | |
name: Compile (cublas) | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ubuntu-latest, windows-latest] | |
cuda: ['12.1.0', '11.7.1'] | |
runs-on: ${{ matrix.os }} | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v3 | |
with: | |
repository: ggerganov/llama.cpp | |
- uses: Jimver/[email protected] | |
if: runner.os == 'Windows' | |
id: cuda-toolkit-windows | |
with: | |
cuda: ${{ matrix.cuda }} | |
method: 'network' | |
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' | |
- uses: Jimver/[email protected] | |
if: runner.os == 'Linux' | |
id: cuda-toolkit-linux | |
with: | |
cuda: ${{ matrix.cuda }} | |
method: 'network' | |
linux-local-args: '["--toolkit"]' | |
- name: Build | |
id: cmake_build | |
run: | | |
mkdir build | |
cd build | |
cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF | |
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} | |
ls -R | |
- name: Upload artifacts (Windows) | |
if: ${{ matrix.os == 'windows-latest' }} | |
uses: actions/upload-artifact@v3 | |
with: | |
path: .\build\bin\Release\llama.dll | |
name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll | |
- name: Upload artifacts (Linux) | |
if: ${{ matrix.os == 'ubuntu-latest' }} | |
uses: actions/upload-artifact@v3 | |
with: | |
path: ./build/libllama.so | |
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so | |
compile-macos: | |
if: ${{ github.event.inputs.macos }} | |
name: Compile (MacOS) | |
strategy: | |
fail-fast: true | |
matrix: | |
include: | |
- build: 'metal' | |
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON -DLLAMA_NATIVE=OFF -DCMAKE_OSX_ARCHITECTURES=arm64' | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
repository: ggerganov/llama.cpp | |
- name: Dependencies | |
continue-on-error: true | |
run: | | |
brew update | |
- name: Build | |
id: cmake_build | |
run: | | |
mkdir build | |
cd build | |
cmake .. ${{ matrix.defines }} | |
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
path: ./build/libllama.dylib | |
name: llama-bin-macos-${{ matrix.build }}.dylib | |
- name: Upload Metal | |
uses: actions/upload-artifact@v3 | |
with: | |
path: ./build/bin/ggml-metal.metal | |
name: ggml-metal.metal | |
build-deps: | |
runs-on: ubuntu-latest | |
name: "Gather Binaries" | |
if: ${{ always() }} | |
needs: [ | |
"compile-linux", | |
"compile-macos", | |
"compile-windows", | |
"compile-cublas" | |
] | |
steps: | |
- uses: actions/download-artifact@v3 | |
with: | |
path: artifacts | |
- name: Rearrange Files | |
run: | | |
ls -R | |
mkdir deps | |
mkdir deps/avx | |
mkdir deps/avx2 | |
mkdir deps/avx512 | |
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so | |
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so | |
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so | |
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so | |
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/libllama.dll | |
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/libllama.dll | |
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/libllama.dll | |
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/libllama.dll | |
- name: Rearrange MacOS files | |
if: ${{ github.event.inputs.macos }} | |
run: | | |
mkdir deps/macos-metal | |
cp artifacts/llama-bin-macos-metal.dylib/libllama.dylib deps/macos-metal/libllama.dylib | |
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/macos-metal/ggml-metal.metal | |
- name: Rearrange CUDA files | |
if: ${{ github.event.inputs.cublas }} | |
run: | | |
mkdir cuda_deps | |
mkdir cuda_deps/cu11.7.1 | |
mkdir cuda_deps/cu12.1.0 | |
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll cuda_deps/cu11.7.1/libllama.dll | |
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so cuda_deps/cu11.7.1/libllama.so | |
cp artifacts/llama-bin-win-cublas-cu12.1.0-x64.dll/llama.dll cuda_deps/cu12.1.0/libllama.dll | |
cp artifacts/llama-bin-linux-cublas-cu12.1.0-x64.so/libllama.so cuda_deps/cu12.1.0/libllama.so | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
path: deps/ | |
name: deps | |
- name: Upload artifacts (CUDA12) | |
if: ${{ github.event.inputs.cublas }} | |
uses: actions/upload-artifact@v3 | |
with: | |
path: cuda_deps/cu12.1.0/ | |
name: cu12.1.0 | |
- name: Upload artifacts (CUDA11) | |
if: ${{ github.event.inputs.cublas }} | |
uses: actions/upload-artifact@v3 | |
with: | |
path: cuda_deps/cu11.7.1/ | |
name: cu11.7.1 | |
- name: Remove Artifacts | |
uses: geekyeggo/delete-artifact@v2 | |
with: | |
name: | | |
llama-* |