diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 1968a7a4e..d54107f59 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -9,69 +9,69 @@ concurrency: cancel-in-progress: true jobs: - # linux-benchmark-cuda: - # if: contains(github.event.pull_request.labels.*.name, 'benchmark') - # runs-on: [self-hosted, linux, gpu] + linux-benchmark-cuda: + if: contains(github.event.pull_request.labels.*.name, 'benchmark') + runs-on: [self-hosted, linux, gpu] - # strategy: - # fail-fast: false - # matrix: - # build: [cuda11] - # include: - # - build: cuda11 - # image: nvidia/cuda:11.7.1-devel-ubuntu22.04 - # modeldir: /llamasharp_ci/models_benchmark - # # - build: cuda12 - # # image: nvidia/cuda:12.1.1-runtime-ubuntu22.04 + strategy: + fail-fast: false + matrix: + build: [cuda11] + include: + - build: cuda11 + image: nvidia/cuda:11.7.1-devel-ubuntu22.04 + modeldir: /llamasharp_ci/models_benchmark + # - build: cuda12 + # image: nvidia/cuda:12.1.1-runtime-ubuntu22.04 - # container: - # image: ${{ matrix.image }} - # env: - # BENCHMARK_MODEL_DIR: ${{ matrix.modeldir }} - # ports: - # - 80 - # volumes: - # - /llamasharp_ci:/llamasharp_ci - # options: --gpus=all --ipc=host --runtime=nvidia + container: + image: ${{ matrix.image }} + env: + BENCHMARK_MODEL_DIR: ${{ matrix.modeldir }} + ports: + - 80 + volumes: + - /llamasharp_ci:/llamasharp_ci + options: --gpus=all --ipc=host --runtime=nvidia - # steps: - # - uses: actions/checkout@v4 + steps: + - uses: actions/checkout@v4 - # - name: Install libraries - # run: | - # apt update - # apt install -y curl libicu-dev - # apt-get install wget - # wget https://packages.microsoft.com/config/ubuntu/22.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb - # dpkg -i packages-microsoft-prod.deb - # rm packages-microsoft-prod.deb - # apt-get update && apt-get install -y dotnet-sdk-8.0 + - name: Install libraries + run: | + apt update + apt install -y curl libicu-dev + apt-get install wget + wget https://packages.microsoft.com/config/ubuntu/22.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb + dpkg -i packages-microsoft-prod.deb + rm packages-microsoft-prod.deb + apt-get update && apt-get install -y dotnet-sdk-8.0 - # - name: Prepare models - # run: | - # apt-get update - # apt-get install -y python3.10 python3-pip - # python3 --version - # pip install huggingface_hub - # python3 .github/download_models.py --model-dir ${{ matrix.modeldir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com + - name: Prepare models + run: | + apt-get update + apt-get install -y python3.10 python3-pip + python3 --version + pip install huggingface_hub + python3 .github/download_models.py --model-dir ${{ matrix.modeldir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com - # - name: Clear package cache - # run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear - # - name: Restore packages - # run: dotnet restore LLamaSharp.sln - # - name: Build - # run: | - # dotnet clean - # dotnet build LLama/LLamaSharp.csproj -c Release --no-restore - # dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c Release --no-restore - # - name: Run benchmark test - # run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c Release --anyCategories LLama - # - name: Upload artifacts - # if: always() - # uses: actions/upload-artifact@v3 - # with: - # name: Benchmark_Results - # path: BenchmarkDotNet.Artifacts/results/* + - name: Clear package cache + run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear + - name: Restore packages + run: dotnet restore LLamaSharp.sln + - name: Build + run: | + dotnet clean + dotnet build LLama/LLamaSharp.csproj -c Release --no-restore + dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c Release --no-restore + - name: Run benchmark test + run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c Release --anyCategories LLama + - name: Upload artifacts + if: always() + uses: actions/upload-artifact@v3 + with: + name: Benchmark_Results + path: BenchmarkDotNet.Artifacts/results/* windows-benchmark-cuda: if: contains(github.event.pull_request.labels.*.name, 'benchmark') diff --git a/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs b/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs index 7c540d081..d664c9b78 100644 --- a/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs +++ b/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs @@ -6,6 +6,7 @@ using BenchmarkDotNet.Jobs; using LLama.Abstractions; using LLama.Common; +using LLama.Native; namespace LLama.Benchmark.LLamaExecutorBenchmark { @@ -100,6 +101,17 @@ private void InitializeParamsAndModel() [GlobalSetup(Targets = [nameof(Basic)])] public void GlobalSetup() { + var showLLamaCppLogs = true; + NativeLibraryConfig + .Instance + .WithLogCallback((level, message) => + { + if (showLLamaCppLogs) + Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); + }); + + // Calling this method forces loading to occur now. + NativeApi.llama_empty_call(); InitializeParamsAndModel(); }