-
Notifications
You must be signed in to change notification settings - Fork 364
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add benchmark test for prefill.
- Loading branch information
1 parent
c07c4cc
commit 413c23c
Showing
6 changed files
with
5,549 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace LLama.Benchmark | ||
{ | ||
public enum ExecutorType | ||
{ | ||
Interactive, | ||
Instruct, | ||
Stateless | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace LLama.Benchmark | ||
{ | ||
internal static class Constants | ||
{ | ||
public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf"; | ||
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf"; | ||
|
||
public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf"; | ||
public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf"; | ||
public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg"; | ||
|
||
public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt"; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#pragma warning disable CS8618 | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
using BenchmarkDotNet.Attributes; | ||
using BenchmarkDotNet.Engines; | ||
using BenchmarkDotNet.Jobs; | ||
using LLama.Abstractions; | ||
using LLama.Common; | ||
using Microsoft.VisualBasic; | ||
|
||
namespace LLama.Benchmark.LLamaExecutorBenchmark | ||
{ | ||
[BenchmarkCategory("LLamaExecutor")] | ||
[SimpleJob(RunStrategy.Monitoring, runtimeMoniker: RuntimeMoniker.Net80)] | ||
[MinWarmupCount(1)] | ||
[MaxWarmupCount(2)] | ||
[MinIterationCount(1)] | ||
[MaxIterationCount(16)] | ||
public class PrefillBenchmark | ||
{ | ||
/// <summary> | ||
/// (prompt length, context length) | ||
/// </summary> | ||
public IEnumerable<(int, uint)> PromptAndContextLengths => new (int, uint)[] | ||
{ | ||
(512, 2048), | ||
(2024, 2048) | ||
}; | ||
|
||
/// <summary> | ||
/// (model path, gpu layer count) | ||
/// </summary> | ||
public IEnumerable<(string, int)> ModelAndGpuLayerCounts => new (string, int)[] | ||
// TODO: specify the native library to load here to test cpu case better. | ||
{ | ||
(Constants.Generative7BModelPath, 0), | ||
(Constants.Generative7BModelPath, 10), | ||
(Constants.Generative7BModelPath, 20) | ||
}; | ||
|
||
public IEnumerable<ExecutorType> ExecutorTypes => new ExecutorType[] | ||
{ | ||
ExecutorType.Interactive, | ||
ExecutorType.Stateless | ||
}; | ||
|
||
[ParamsSource(nameof(PromptAndContextLengths))] | ||
public (int, uint) PromptAndContextLength { get; set; } | ||
|
||
[ParamsSource(nameof(ModelAndGpuLayerCounts))] | ||
public (string, int) ModelAndGpuLayerCount { get; set; } | ||
|
||
[ParamsSource(nameof(ExecutorTypes))] | ||
public ExecutorType ExecutorType { get; set; } | ||
|
||
/// <summary> | ||
/// Params used to create a model. | ||
/// </summary> | ||
public ModelParams ModelParams { get; set; } | ||
|
||
/// <summary> | ||
/// Params used in inference. | ||
/// </summary> | ||
public InferenceParams InferenceParams { get; set; } | ||
|
||
/// <summary> | ||
/// Prompt used to run text generation. | ||
/// </summary> | ||
public string Prompt { get; set; } | ||
|
||
public ILLamaExecutor Executor { get; set; } | ||
|
||
private void InitializeParamsAndModel() | ||
{ | ||
ModelParams = new ModelParams(ModelAndGpuLayerCount.Item1) | ||
{ | ||
ContextSize = PromptAndContextLength.Item2, | ||
GpuLayerCount = ModelAndGpuLayerCount.Item2 | ||
}; | ||
Prompt = File.ReadAllText(Constants.TextCompletionPromptsFilePath).Substring(0, PromptAndContextLength.Item1); | ||
InferenceParams = new InferenceParams() | ||
{ | ||
Temperature = 0.6f, | ||
MaxTokens = 1 // Only prefill, no generation here. | ||
}; | ||
|
||
LLamaWeights weights = LLamaWeights.LoadFromFile(ModelParams); | ||
LLamaContext context = weights.CreateContext(ModelParams); | ||
Executor = ExecutorType switch | ||
{ | ||
ExecutorType.Interactive => new InteractiveExecutor(context), | ||
ExecutorType.Instruct => new InstructExecutor(context), | ||
ExecutorType.Stateless => new StatelessExecutor(weights, ModelParams), | ||
_ => throw new NotSupportedException() | ||
}; | ||
} | ||
|
||
[GlobalSetup(Targets = [nameof(Basic)])] | ||
public void GlobalSetup() | ||
{ | ||
InitializeParamsAndModel(); | ||
} | ||
|
||
[IterationCleanup(Targets = [nameof(Basic)])] | ||
public void GlobalCleanup() | ||
{ | ||
if(ExecutorType != ExecutorType.Stateless) // stateless executor always dispose its `Context` property | ||
{ | ||
Executor.Context.NativeHandle.KvCacheClear(); | ||
} | ||
} | ||
|
||
[Benchmark] | ||
public async Task<string> Basic() | ||
{ | ||
StringBuilder sb = new(); | ||
await foreach(var text in Executor.InferAsync(Prompt, InferenceParams)) | ||
{ | ||
sb.Append(text); | ||
} | ||
return sb.ToString(); | ||
} | ||
} | ||
} |