Skip to content

Commit

Permalink
Merge branch 'pr/268' into RuntimeDetection
Browse files Browse the repository at this point in the history
  • Loading branch information
SignalRT committed Nov 8, 2023
2 parents 9b2ca9c + b893c6f commit 091b8d5
Show file tree
Hide file tree
Showing 18 changed files with 210 additions and 218 deletions.
1 change: 1 addition & 0 deletions LLama.Examples/LLama.Examples.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta4" />
<PackageReference Include="Spectre.Console" Version="0.47.0" />
</ItemGroup>

<ItemGroup>
Expand Down
3 changes: 2 additions & 1 deletion LLama.Examples/NewVersion/GetEmbeddings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
{
public class GetEmbeddings
{
public static void Run()
public static Task Run()
{
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
Expand All @@ -23,6 +23,7 @@ public static void Run()
Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
Console.WriteLine();
}
return Task.CompletedTask;
}
}
}
4 changes: 3 additions & 1 deletion LLama.Examples/NewVersion/QuantizeModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
public class QuantizeModel
{
public static void Run()
public static Task Run()
{
Console.Write("Please input your original model path: ");
var inputPath = Console.ReadLine();
Expand All @@ -21,6 +21,8 @@ public static void Run()
{
Console.WriteLine("Quantization failed!");
}

return Task.CompletedTask;
}
}
}
135 changes: 40 additions & 95 deletions LLama.Examples/NewVersion/TestRunner.cs
Original file line number Diff line number Diff line change
@@ -1,109 +1,54 @@
namespace LLama.Examples.NewVersion
using System.Linq.Expressions;
using Spectre.Console;

namespace LLama.Examples.NewVersion
{
public class NewVersionTestRunner
{
static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
{
{"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
{"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
{"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
{"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
{"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
{"Load and save chat session.",()=> SaveAndLoadSession.Run()},
{"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
{"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
{"Quantize the model.",()=> QuantizeModel.Run()},
{"Automatic conversation.",()=> TalkToYourself.Run()},
{"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
{"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
{"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
{"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
{"Coding Assistant.",()=> CodingAssistant.Run()},
{"Batch Decoding.",()=> BatchedDecoding.Run()},
{"SK Kernel Memory.",()=> KernelMemory.Run()},
{"Exit", ()=> Task.CompletedTask}
};
public static async Task Run()
{
Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");

Console.WriteLine("Please input a number to choose an example to run:");
Console.WriteLine("0: Run a chat session without stripping the role names.");
Console.WriteLine("1: Run a chat session with the role names stripped.");
Console.WriteLine("2: Interactive mode chat by using executor.");
Console.WriteLine("3: Instruct mode chat by using executor.");
Console.WriteLine("4: Stateless mode chat by using executor.");
Console.WriteLine("5: Load and save chat session.");
Console.WriteLine("6: Load and save state of model and executor.");
Console.WriteLine("7: Get embeddings from LLama model.");
Console.WriteLine("8: Quantize the model.");
Console.WriteLine("9: Automatic conversation.");
Console.WriteLine("10: Constrain response to json format using grammar.");
Console.WriteLine("11: Semantic Kernel Prompt.");
Console.WriteLine("12: Semantic Kernel Chat.");
Console.WriteLine("13: Semantic Kernel Memory.");
Console.WriteLine("14: Coding Assistant.");
Console.WriteLine("15: Batch Decoding.");
Console.WriteLine("16: SK Kernel Memory.");
AnsiConsole.Write(new Rule("LLamaSharp Examples"));

while (true)
{
Console.Write("\nYour choice: ");
int choice = int.Parse(Console.ReadLine());
var choice = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Please choose[green] an example[/] to run: ")
.AddChoices(Examples.Keys));

if (choice == 0)
{
await ChatSessionWithRoleName.Run();
}
else if (choice == 1)
{
await ChatSessionStripRoleName.Run();
}
else if (choice == 2)
{
await InteractiveModeExecute.Run();
}
else if (choice == 3)
{
await InstructModeExecute.Run();
}
else if (choice == 4)
{
await StatelessModeExecute.Run();
}
else if (choice == 5)
{
await SaveAndLoadSession.Run();
}
else if (choice == 6)
{
await LoadAndSaveState.Run();
}
else if (choice == 7)
{
GetEmbeddings.Run();
}
else if (choice == 8)
{
QuantizeModel.Run();
}
else if (choice == 9)
{
await TalkToYourself.Run();
}
else if (choice == 10)
{
await GrammarJsonResponse.Run();
}
else if (choice == 11)
{
await SemanticKernelPrompt.Run();
}
else if (choice == 12)
{
await SemanticKernelChat.Run();
}
else if (choice == 13)
{
await SemanticKernelMemory.Run();
}
else if (choice == 14)
{
await CodingAssistant.Run();
}
else if (choice == 15)
{
await BatchedDecoding.Run();
}
else if (choice == 16)
{
await KernelMemory.Run();
}
else

if (Examples.TryGetValue(choice, out var example))
{
Console.WriteLine("Cannot parse your choice. Please select again.");
continue;
if (choice == "Exit")
{
break;
}
AnsiConsole.Write(new Rule(choice));
await example();
}
break;

AnsiConsole.Clear();
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions LLama.Web/Common/ModelOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ public class ModelOptions
public int MaxInstances { get; set; }

/// <summary>
/// Model context size (n_ctx)
/// Model context size (n_ctx). Null to use value from model.
/// </summary>
public uint ContextSize { get; set; } = 512;
public uint? ContextSize { get; set; }

/// <summary>
/// the GPU that is used for scratch and small tensors
Expand Down
4 changes: 2 additions & 2 deletions LLama/Abstractions/IContextParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ namespace LLama.Abstractions;
public interface IContextParams
{
/// <summary>
/// Model context size (n_ctx)
/// Model context size (n_ctx). Null to use value from model file.
/// </summary>
uint ContextSize { get; set; }
uint? ContextSize { get; set; }

/// <summary>
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
Expand Down
2 changes: 1 addition & 1 deletion LLama/Common/FixedSizeQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public FixedSizeQueue(int size)
/// <param name="data"></param>
public FixedSizeQueue(int size, IEnumerable<T> data)
{
#if !NETSTANDARD2_0
#if NET6_0_OR_GREATER
// Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
// in which case we'll have to check later
if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
Expand Down
101 changes: 32 additions & 69 deletions LLama/Common/ModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,105 +12,68 @@ namespace LLama.Common
public record ModelParams
: ILLamaParams
{
/// <summary>
/// Model context size (n_ctx)
/// </summary>
public uint ContextSize { get; set; } = 512;
/// <summary>
/// the GPU that is used for scratch and small tensors
/// </summary>
/// <inheritdoc />
public uint? ContextSize { get; set; }

/// <inheritdoc />
public int MainGpu { get; set; } = 0;

/// <summary>
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
/// </summary>
/// <inheritdoc />
public int GpuLayerCount { get; set; } = 20;
/// <summary>
/// Seed for the random number generator (seed)
/// </summary>

/// <inheritdoc />
public uint Seed { get; set; } = 0xFFFFFFFF;
/// <summary>
/// Use f16 instead of f32 for memory kv (memory_f16)
/// </summary>

/// <inheritdoc />
public bool UseFp16Memory { get; set; } = true;
/// <summary>
/// Use mmap for faster loads (use_mmap)
/// </summary>

/// <inheritdoc />
public bool UseMemorymap { get; set; } = true;
/// <summary>
/// Use mlock to keep model in memory (use_mlock)
/// </summary>

/// <inheritdoc />
public bool UseMemoryLock { get; set; }
/// <summary>
/// Compute perplexity over the prompt (perplexity)
/// </summary>

/// <inheritdoc />
public bool Perplexity { get; set; }
/// <summary>
/// Model path (model)
/// </summary>

/// <inheritdoc />
public string ModelPath { get; set; }

/// <summary>
/// List of LoRAs to apply
/// </summary>
/// <inheritdoc />
public AdapterCollection LoraAdapters { get; set; } = new();

/// <summary>
/// base model path for the lora adapter (lora_base)
/// </summary>
/// <inheritdoc />
public string LoraBase { get; set; } = string.Empty;

/// <summary>
/// Number of threads (null = autodetect) (n_threads)
/// </summary>
/// <inheritdoc />
public uint? Threads { get; set; }

/// <summary>
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
/// </summary>
/// <inheritdoc />
public uint? BatchThreads { get; set; }

/// <summary>
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
/// </summary>
/// <inheritdoc />
public uint BatchSize { get; set; } = 512;

/// <summary>
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
/// The LLamaModel won't produce text response anymore.
/// </summary>
/// <inheritdoc />
public bool EmbeddingMode { get; set; }

/// <summary>
/// how split tensors should be distributed across GPUs.
/// </summary>
/// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
/// <inheritdoc />
[JsonConverter(typeof(TensorSplitsCollectionConverter))]
public TensorSplitsCollection TensorSplits { get; set; } = new();

/// <summary>
/// RoPE base frequency
/// </summary>
public float? RopeFrequencyBase { get; set; }
/// <inheritdoc />
public float? RopeFrequencyBase { get; set; }

/// <summary>
/// RoPE frequency scaling factor
/// </summary>
public float? RopeFrequencyScale { get; set; }
/// <inheritdoc />
public float? RopeFrequencyScale { get; set; }

/// <summary>
/// Use experimental mul_mat_q kernels
/// </summary>
public bool MulMatQ { get; set; }
/// <inheritdoc />
public bool MulMatQ { get; set; }

/// <summary>
/// Load vocab only (no weights)
/// </summary>
/// <inheritdoc />
public bool VocabOnly { get; set; }

/// <summary>
/// The encoding to use to convert text for the model
/// </summary>
/// <inheritdoc />
[JsonConverter(typeof(EncodingConverter))]
public Encoding Encoding { get; set; } = Encoding.UTF8;

Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/DictionaryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ public static TValue GetValueOrDefault<TKey, TValue>(this IReadOnlyDictionary<TK
{
return GetValueOrDefaultImpl(dictionary, key, defaultValue);
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif

internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)
Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/EncodingExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ public static int GetCharCount(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
return GetCharCountImpl(encoding, bytes);
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif

internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)
Expand Down
2 changes: 1 addition & 1 deletion LLama/Extensions/IContextParamsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static class IContextParamsExtensions
public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
{
result = NativeApi.llama_context_default_params();
result.n_ctx = @params.ContextSize;
result.n_ctx = @params.ContextSize ?? 0;
result.n_batch = @params.BatchSize;
result.seed = @params.Seed;
result.f16_kv = @params.UseFp16Memory;
Expand Down
2 changes: 2 additions & 0 deletions LLama/Extensions/IEnumerableExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int count)
{
return TakeLastImpl(source, count);
}
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
#error Target framework not supported!
#endif

internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)
Expand Down
Loading

0 comments on commit 091b8d5

Please sign in to comment.