Skip to content

Commit

Permalink
Merge pull request #736 from martindevans/llama_get_timings
Browse files Browse the repository at this point in the history
Exposed basic timing information from llama.cpp
  • Loading branch information
martindevans authored May 13, 2024
2 parents e2a770e + e38d96b commit 3582d82
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 2 deletions.
9 changes: 8 additions & 1 deletion LLama.Examples/Examples/BatchedExecutorFork.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama.Batched;
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
Expand Down Expand Up @@ -67,6 +67,13 @@ await AnsiConsole
root.Display(display);
AnsiConsole.Write(display);
});

// Print some stats
var timings = executor.Context.NativeHandle.GetTimings();
AnsiConsole.MarkupLine($"Total Tokens Evaluated: {timings.TokensEvaluated}");
AnsiConsole.MarkupLine($"Total Tokens Sampled: {timings.TokensSampled}");
AnsiConsole.MarkupLine($"Eval Time: {(timings.Eval + timings.PromptEval).TotalMilliseconds}ms");
AnsiConsole.MarkupLine($"Sample Time: {timings.Sampling.TotalMilliseconds}ms");
}

private class Node
Expand Down
104 changes: 104 additions & 0 deletions LLama/Native/LLamaTimings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System;
using System.Runtime.InteropServices;

namespace LLama.Native;

/// <summary>
/// LLama performance information
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct LLamaTimings
{
/// <summary>
/// Timestamp when reset was last called
/// </summary>
private double t_start_ms;

/// <summary>
/// Timestamp when these timings were read
/// </summary>
private double t_end_ms;

/// <summary>
/// Loading milliseconds
/// </summary>
private double t_load_ms;

/// <summary>
/// Total sampling milliseconds
/// </summary>
private double t_sample_ms;

/// <summary>
/// total milliseconds spent prompt processing
/// </summary>
private double t_p_eval_ms;

/// <summary>
/// Total milliseconds in eval/decode calls
/// </summary>
private double t_eval_ms;

/// <summary>
/// number of tokens sampled
/// </summary>
private int n_sample;

/// <summary>
/// number of tokens in eval calls for the prompt (with batch size > 1)
/// </summary>
private int n_p_eval;

/// <summary>
/// number of eval calls
/// </summary>
private int n_eval;




/// <summary>
/// Timestamp when reset was last called
/// </summary>
public readonly TimeSpan ResetTimestamp => TimeSpan.FromMilliseconds(t_start_ms);

/// <summary>
/// Timestamp when these timings were read
/// </summary>
public readonly TimeSpan ReadTimestamp => TimeSpan.FromMilliseconds(t_start_ms);

/// <summary>
/// Time spent loading
/// </summary>
public readonly TimeSpan Loading => TimeSpan.FromMilliseconds(t_load_ms);

/// <summary>
/// Time spent sampling
/// </summary>
public readonly TimeSpan Sampling => TimeSpan.FromMilliseconds(t_load_ms);

/// <summary>
/// total milliseconds spent prompt processing
/// </summary>
public TimeSpan PromptEval => TimeSpan.FromMilliseconds(t_p_eval_ms);

/// <summary>
/// Total milliseconds in eval/decode calls
/// </summary>
public readonly TimeSpan Eval => TimeSpan.FromMilliseconds(t_eval_ms);

/// <summary>
/// Total number of tokens sampled
/// </summary>
public readonly int TokensSampled => n_sample;

/// <summary>
/// number of tokens in eval calls for the prompt (with batch size > 1)
/// </summary>
public readonly int PrompTokensEvaluated => n_p_eval;

/// <summary>
/// number of eval calls
/// </summary>
public readonly int TokensEvaluated => n_p_eval;
}
36 changes: 35 additions & 1 deletion LLama/Native/SafeLLamaContextHandle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,21 @@ static SafeLLamaContextHandle()
/// </summary>
/// <param name="ctx"></param>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
private static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);

/// <summary>
/// get performance information
/// </summary>
/// <param name="ctx"></param>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern LLamaTimings llama_get_timings(SafeLLamaContextHandle ctx);

/// <summary>
/// Reset performance information
/// </summary>
/// <param name="ctx"></param>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern void llama_reset_timings(SafeLLamaContextHandle ctx);
#endregion

/// <summary>
Expand Down Expand Up @@ -510,6 +524,26 @@ public void SetThreads(uint threads, uint threadsBatch)
{
llama_set_n_threads(this, threads, threadsBatch);
}

#region timing
/// <summary>
/// Get performance information
/// </summary>
/// <returns></returns>
public LLamaTimings GetTimings()
{
return llama_get_timings(this);
}

/// <summary>
/// Reset all performance information for this context
/// </summary>
public void ResetTimings()
{
llama_reset_timings(this);
}
#endregion


#region KV Cache Management
/// <summary>
Expand Down

0 comments on commit 3582d82

Please sign in to comment.