Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exposed basic timing information from llama.cpp #736

Merged
merged 1 commit into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion LLama.Examples/Examples/BatchedExecutorFork.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama.Batched;
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
Expand Down Expand Up @@ -67,6 +67,13 @@ await AnsiConsole
root.Display(display);
AnsiConsole.Write(display);
});

// Print some stats
var timings = executor.Context.NativeHandle.GetTimings();
AnsiConsole.MarkupLine($"Total Tokens Evaluated: {timings.TokensEvaluated}");
AnsiConsole.MarkupLine($"Total Tokens Sampled: {timings.TokensSampled}");
AnsiConsole.MarkupLine($"Eval Time: {(timings.Eval + timings.PromptEval).TotalMilliseconds}ms");
AnsiConsole.MarkupLine($"Sample Time: {timings.Sampling.TotalMilliseconds}ms");
}

private class Node
Expand Down
104 changes: 104 additions & 0 deletions LLama/Native/LLamaTimings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System;
using System.Runtime.InteropServices;

namespace LLama.Native;

/// <summary>
/// LLama performance information
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct LLamaTimings
{
/// <summary>
/// Timestamp when reset was last called
/// </summary>
private double t_start_ms;

/// <summary>
/// Timestamp when these timings were read
/// </summary>
private double t_end_ms;

/// <summary>
/// Loading milliseconds
/// </summary>
private double t_load_ms;

/// <summary>
/// Total sampling milliseconds
/// </summary>
private double t_sample_ms;

/// <summary>
/// total milliseconds spent prompt processing
/// </summary>
private double t_p_eval_ms;

/// <summary>
/// Total milliseconds in eval/decode calls
/// </summary>
private double t_eval_ms;

/// <summary>
/// number of tokens sampled
/// </summary>
private int n_sample;

/// <summary>
/// number of tokens in eval calls for the prompt (with batch size > 1)
/// </summary>
private int n_p_eval;

/// <summary>
/// number of eval calls
/// </summary>
private int n_eval;




/// <summary>
/// Timestamp when reset was last called
/// </summary>
public readonly TimeSpan ResetTimestamp => TimeSpan.FromMilliseconds(t_start_ms);

/// <summary>
/// Timestamp when these timings were read
/// </summary>
public readonly TimeSpan ReadTimestamp => TimeSpan.FromMilliseconds(t_start_ms);

/// <summary>
/// Time spent loading
/// </summary>
public readonly TimeSpan Loading => TimeSpan.FromMilliseconds(t_load_ms);

/// <summary>
/// Time spent sampling
/// </summary>
public readonly TimeSpan Sampling => TimeSpan.FromMilliseconds(t_load_ms);

/// <summary>
/// total milliseconds spent prompt processing
/// </summary>
public TimeSpan PromptEval => TimeSpan.FromMilliseconds(t_p_eval_ms);

/// <summary>
/// Total milliseconds in eval/decode calls
/// </summary>
public readonly TimeSpan Eval => TimeSpan.FromMilliseconds(t_eval_ms);

/// <summary>
/// Total number of tokens sampled
/// </summary>
public readonly int TokensSampled => n_sample;

/// <summary>
/// number of tokens in eval calls for the prompt (with batch size > 1)
/// </summary>
public readonly int PrompTokensEvaluated => n_p_eval;

/// <summary>
/// number of eval calls
/// </summary>
public readonly int TokensEvaluated => n_p_eval;
}
36 changes: 35 additions & 1 deletion LLama/Native/SafeLLamaContextHandle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,21 @@ static SafeLLamaContextHandle()
/// </summary>
/// <param name="ctx"></param>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
private static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);

/// <summary>
/// get performance information
/// </summary>
/// <param name="ctx"></param>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern LLamaTimings llama_get_timings(SafeLLamaContextHandle ctx);

/// <summary>
/// Reset performance information
/// </summary>
/// <param name="ctx"></param>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern void llama_reset_timings(SafeLLamaContextHandle ctx);
#endregion

/// <summary>
Expand Down Expand Up @@ -510,6 +524,26 @@ public void SetThreads(uint threads, uint threadsBatch)
{
llama_set_n_threads(this, threads, threadsBatch);
}

#region timing
/// <summary>
/// Get performance information
/// </summary>
/// <returns></returns>
public LLamaTimings GetTimings()
{
return llama_get_timings(this);
}

/// <summary>
/// Reset all performance information for this context
/// </summary>
public void ResetTimings()
{
llama_reset_timings(this);
}
#endregion


#region KV Cache Management
/// <summary>
Expand Down
Loading