Skip to content

Commit

Permalink
feat: add benchmark test for prefill.
Browse files Browse the repository at this point in the history
  • Loading branch information
AsakusaRinne committed May 7, 2024
1 parent c07c4cc commit 413c23c
Show file tree
Hide file tree
Showing 6 changed files with 5,549 additions and 2 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,5 @@ jobs:
if: always()
uses: actions/upload-artifact@v3
with:
path: logs/ # TODO: change it
name: logs
name: Benchmark_Results
path: BenchmarkDotNet.Artifacts/results/*
5,379 changes: 5,379 additions & 0 deletions LLama.Benchmark/Assets/TextCompletionPrompts.txt

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions LLama.Benchmark/Common.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LLama.Benchmark
{
public enum ExecutorType
{
Interactive,
Instruct,
Stateless
}
}
21 changes: 21 additions & 0 deletions LLama.Benchmark/Constants.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;

namespace LLama.Benchmark
{
internal static class Constants
{
public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";

public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";

public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
}
}
4 changes: 4 additions & 0 deletions LLama.Benchmark/LLama.Benchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
<None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>

<None Update="Assets\TextCompletionPrompts.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
128 changes: 128 additions & 0 deletions LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#pragma warning disable CS8618

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
using BenchmarkDotNet.Jobs;
using LLama.Abstractions;
using LLama.Common;
using Microsoft.VisualBasic;

namespace LLama.Benchmark.LLamaExecutorBenchmark
{
[BenchmarkCategory("LLamaExecutor")]
[SimpleJob(RunStrategy.Monitoring, runtimeMoniker: RuntimeMoniker.Net80)]
[MinWarmupCount(1)]
[MaxWarmupCount(2)]
[MinIterationCount(1)]
[MaxIterationCount(16)]
public class PrefillBenchmark
{
/// <summary>
/// (prompt length, context length)
/// </summary>
public IEnumerable<(int, uint)> PromptAndContextLengths => new (int, uint)[]
{
(512, 2048),
(2024, 2048)
};

/// <summary>
/// (model path, gpu layer count)
/// </summary>
public IEnumerable<(string, int)> ModelAndGpuLayerCounts => new (string, int)[]
// TODO: specify the native library to load here to test cpu case better.
{
(Constants.Generative7BModelPath, 0),
(Constants.Generative7BModelPath, 10),
(Constants.Generative7BModelPath, 20)
};

public IEnumerable<ExecutorType> ExecutorTypes => new ExecutorType[]
{
ExecutorType.Interactive,
ExecutorType.Stateless
};

[ParamsSource(nameof(PromptAndContextLengths))]
public (int, uint) PromptAndContextLength { get; set; }

[ParamsSource(nameof(ModelAndGpuLayerCounts))]
public (string, int) ModelAndGpuLayerCount { get; set; }

[ParamsSource(nameof(ExecutorTypes))]
public ExecutorType ExecutorType { get; set; }

/// <summary>
/// Params used to create a model.
/// </summary>
public ModelParams ModelParams { get; set; }

/// <summary>
/// Params used in inference.
/// </summary>
public InferenceParams InferenceParams { get; set; }

/// <summary>
/// Prompt used to run text generation.
/// </summary>
public string Prompt { get; set; }

public ILLamaExecutor Executor { get; set; }

private void InitializeParamsAndModel()
{
ModelParams = new ModelParams(ModelAndGpuLayerCount.Item1)
{
ContextSize = PromptAndContextLength.Item2,
GpuLayerCount = ModelAndGpuLayerCount.Item2
};
Prompt = File.ReadAllText(Constants.TextCompletionPromptsFilePath).Substring(0, PromptAndContextLength.Item1);
InferenceParams = new InferenceParams()
{
Temperature = 0.6f,
MaxTokens = 1 // Only prefill, no generation here.
};

LLamaWeights weights = LLamaWeights.LoadFromFile(ModelParams);
LLamaContext context = weights.CreateContext(ModelParams);
Executor = ExecutorType switch
{
ExecutorType.Interactive => new InteractiveExecutor(context),
ExecutorType.Instruct => new InstructExecutor(context),
ExecutorType.Stateless => new StatelessExecutor(weights, ModelParams),
_ => throw new NotSupportedException()
};
}

[GlobalSetup(Targets = [nameof(Basic)])]
public void GlobalSetup()
{
InitializeParamsAndModel();
}

[IterationCleanup(Targets = [nameof(Basic)])]
public void GlobalCleanup()
{
if(ExecutorType != ExecutorType.Stateless) // stateless executor always dispose its `Context` property
{
Executor.Context.NativeHandle.KvCacheClear();
}
}

[Benchmark]
public async Task<string> Basic()
{
StringBuilder sb = new();
await foreach(var text in Executor.InferAsync(Prompt, InferenceParams))
{
sb.Append(text);
}
return sb.ToString();
}
}
}

0 comments on commit 413c23c

Please sign in to comment.