Merge branch 'pr/268' into RuntimeDetection

SignalRT · Nov 8, 2023 · 091b8d5 · 091b8d5
2 parents 9b2ca9c + b893c6f
commit 091b8d5
Show file tree

Hide file tree

Showing 18 changed files with 210 additions and 218 deletions.
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
@@ -30,6 +30,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
     <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta4" />
+    <PackageReference Include="Spectre.Console" Version="0.47.0" />
   </ItemGroup>
 
   <ItemGroup>

diff --git a/LLama.Examples/NewVersion/GetEmbeddings.cs b/LLama.Examples/NewVersion/GetEmbeddings.cs
@@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
 {
     public class GetEmbeddings
     {
-        public static void Run()
+        public static Task Run()
         {
             Console.Write("Please input your model path: ");
             var modelPath = Console.ReadLine();
@@ -23,6 +23,7 @@ public static void Run()
                 Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
                 Console.WriteLine();
             }
+            return Task.CompletedTask;
         }
     }
 }
diff --git a/LLama.Examples/NewVersion/QuantizeModel.cs b/LLama.Examples/NewVersion/QuantizeModel.cs
@@ -2,7 +2,7 @@
 {
     public class QuantizeModel
     {
-        public static void Run()
+        public static Task Run()
         {
             Console.Write("Please input your original model path: ");
             var inputPath = Console.ReadLine();
@@ -21,6 +21,8 @@ public static void Run()
             {
                 Console.WriteLine("Quantization failed!");
             }
+
+            return Task.CompletedTask;
         }
     }
 }
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
@@ -1,109 +1,54 @@
-namespace LLama.Examples.NewVersion
+using System.Linq.Expressions;
+using Spectre.Console;
+
+namespace LLama.Examples.NewVersion
 {
     public class NewVersionTestRunner
     {
+        static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
+        {
+            {"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
+            {"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
+            {"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
+            {"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
+            {"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
+            {"Load and save chat session.",()=> SaveAndLoadSession.Run()},
+            {"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
+            {"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
+            {"Quantize the model.",()=> QuantizeModel.Run()},
+            {"Automatic conversation.",()=> TalkToYourself.Run()},
+            {"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
+            {"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
+            {"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
+            {"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
+            {"Coding Assistant.",()=> CodingAssistant.Run()},
+            {"Batch Decoding.",()=> BatchedDecoding.Run()},
+            {"SK Kernel Memory.",()=> KernelMemory.Run()},
+            {"Exit", ()=> Task.CompletedTask}
+        };
         public static async Task Run()
         {
-            Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");
-
-            Console.WriteLine("Please input a number to choose an example to run:");
-            Console.WriteLine("0: Run a chat session without stripping the role names.");
-            Console.WriteLine("1: Run a chat session with the role names stripped.");
-            Console.WriteLine("2: Interactive mode chat by using executor.");
-            Console.WriteLine("3: Instruct mode chat by using executor.");
-            Console.WriteLine("4: Stateless mode chat by using executor.");
-            Console.WriteLine("5: Load and save chat session.");
-            Console.WriteLine("6: Load and save state of model and executor.");
-            Console.WriteLine("7: Get embeddings from LLama model.");
-            Console.WriteLine("8: Quantize the model.");
-            Console.WriteLine("9: Automatic conversation.");
-            Console.WriteLine("10: Constrain response to json format using grammar.");
-            Console.WriteLine("11: Semantic Kernel Prompt.");
-            Console.WriteLine("12: Semantic Kernel Chat.");
-            Console.WriteLine("13: Semantic Kernel Memory.");
-            Console.WriteLine("14: Coding Assistant.");
-            Console.WriteLine("15: Batch Decoding.");
-            Console.WriteLine("16: SK Kernel Memory.");
+            AnsiConsole.Write(new Rule("LLamaSharp Examples"));
 
             while (true)
             {
-                Console.Write("\nYour choice: ");
-                int choice = int.Parse(Console.ReadLine());
+                var choice = AnsiConsole.Prompt(
+                    new SelectionPrompt<string>()
+                        .Title("Please choose[green] an example[/] to run: ")
+                        .AddChoices(Examples.Keys));
 
-                if (choice == 0)
-                {
-                    await ChatSessionWithRoleName.Run();
-                }
-                else if (choice == 1)
-                {
-                    await ChatSessionStripRoleName.Run();
-                }
-                else if (choice == 2)
-                {
-                    await InteractiveModeExecute.Run();
-                }
-                else if (choice == 3)
-                {
-                    await InstructModeExecute.Run();
-                }
-                else if (choice == 4)
-                {
-                    await StatelessModeExecute.Run();
-                }
-                else if (choice == 5)
-                {
-                    await SaveAndLoadSession.Run();
-                }
-                else if (choice == 6)
-                {
-                    await LoadAndSaveState.Run();
-                }
-                else if (choice == 7)
-                {
-                    GetEmbeddings.Run();
-                }
-                else if (choice == 8)
-                {
-                    QuantizeModel.Run();
-                }
-                else if (choice == 9)
-                {
-                    await TalkToYourself.Run();
-                }
-                else if (choice == 10)
-                {
-                    await GrammarJsonResponse.Run();
-                }
-                else if (choice == 11)
-                {
-                    await SemanticKernelPrompt.Run();
-                }
-                else if (choice == 12)
-                {
-                    await SemanticKernelChat.Run();
-                }
-                else if (choice == 13)
-                {
-                    await SemanticKernelMemory.Run();
-                }
-                else if (choice == 14)
-                {
-                    await CodingAssistant.Run();
-                }
-                else if (choice == 15)
-                {
-                    await BatchedDecoding.Run();
-                }
-                else if (choice == 16)
-                {
-                    await KernelMemory.Run();
-                }
-                else
+
+                if (Examples.TryGetValue(choice, out var example))
                 {
-                    Console.WriteLine("Cannot parse your choice. Please select again.");
-                    continue;
+                    if (choice == "Exit")
+                    {
+                        break;
+                    }
+                    AnsiConsole.Write(new Rule(choice));
+                    await example();
                 }
-                break;
+
+                AnsiConsole.Clear();
             }
         }
     }

diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
@@ -17,9 +17,9 @@ public class ModelOptions
         public int MaxInstances { get; set; }
 
         /// <summary>
-        /// Model context size (n_ctx)
+        /// Model context size (n_ctx). Null to use value from model.
         /// </summary>
-        public uint ContextSize { get; set; } = 512;
+        public uint? ContextSize { get; set; }
 
         /// <summary>
         /// the GPU that is used for scratch and small tensors

diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs
@@ -8,9 +8,9 @@ namespace LLama.Abstractions;
 public interface IContextParams
 {
     /// <summary>
-    /// Model context size (n_ctx)
+    /// Model context size (n_ctx). Null to use value from model file.
     /// </summary>
-    uint ContextSize { get; set; }
+    uint? ContextSize { get; set; }
 
     /// <summary>
     /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)

diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
@@ -43,7 +43,7 @@ public FixedSizeQueue(int size)
         /// <param name="data"></param>
         public FixedSizeQueue(int size, IEnumerable<T> data)
         {
-#if !NETSTANDARD2_0 
+#if NET6_0_OR_GREATER
             // Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
             // in which case we'll have to check later
             if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)

diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
@@ -12,105 +12,68 @@ namespace LLama.Common
     public record ModelParams
         : ILLamaParams
     {
-        /// <summary>
-        /// Model context size (n_ctx)
-        /// </summary>
-        public uint ContextSize { get; set; } = 512;
-        /// <summary>
-        /// the GPU that is used for scratch and small tensors
-        /// </summary>
+        /// <inheritdoc />
+        public uint? ContextSize { get; set; }
+
+        /// <inheritdoc />
         public int MainGpu { get; set; } = 0;
 
-        /// <summary>
-        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
-        /// </summary>
+        /// <inheritdoc />
         public int GpuLayerCount { get; set; } = 20;
-        /// <summary>
-        /// Seed for the random number generator (seed)
-        /// </summary>
+
+        /// <inheritdoc />
         public uint Seed { get; set; } = 0xFFFFFFFF;
-        /// <summary>
-        /// Use f16 instead of f32 for memory kv (memory_f16)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool UseFp16Memory { get; set; } = true;
-        /// <summary>
-        /// Use mmap for faster loads (use_mmap)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool UseMemorymap { get; set; } = true;
-        /// <summary>
-        /// Use mlock to keep model in memory (use_mlock)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool UseMemoryLock { get; set; }
-        /// <summary>
-        /// Compute perplexity over the prompt (perplexity)
-        /// </summary>
+
+        /// <inheritdoc />
         public bool Perplexity { get; set; }
-        /// <summary>
-        /// Model path (model)
-        /// </summary>
+
+        /// <inheritdoc />
         public string ModelPath { get; set; }
 
-        /// <summary>
-        /// List of LoRAs to apply
-        /// </summary>
+        /// <inheritdoc />
         public AdapterCollection LoraAdapters { get; set; } = new();
 
-        /// <summary>
-        /// base model path for the lora adapter (lora_base)
-        /// </summary>
+        /// <inheritdoc />
         public string LoraBase { get; set; } = string.Empty;
 
-        /// <summary>
-        /// Number of threads (null = autodetect) (n_threads)
-        /// </summary>
+        /// <inheritdoc />
         public uint? Threads { get; set; }
 
-        /// <summary>
-        /// Number of threads to use for batch processing (null = autodetect) (n_threads)
-        /// </summary>
+        /// <inheritdoc />
         public uint? BatchThreads { get; set; }
 
-        /// <summary>
-        /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
-        /// </summary>
+        /// <inheritdoc />
         public uint BatchSize { get; set; } = 512;
 
-        /// <summary>
-        /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
-        /// The LLamaModel won't produce text response anymore.
-        /// </summary>
+        /// <inheritdoc />
         public bool EmbeddingMode { get; set; }
 
-        /// <summary>
-        /// how split tensors should be distributed across GPUs.
-        /// </summary>
-        /// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
+        /// <inheritdoc />
         [JsonConverter(typeof(TensorSplitsCollectionConverter))]
         public TensorSplitsCollection TensorSplits { get; set; } = new();
 
-		/// <summary>
-		/// RoPE base frequency
-		/// </summary>
-		public float? RopeFrequencyBase { get; set; }
+        /// <inheritdoc />
+        public float? RopeFrequencyBase { get; set; }
 
-		/// <summary>
-		/// RoPE frequency scaling factor
-		/// </summary>
-		public float? RopeFrequencyScale { get; set; }
+        /// <inheritdoc />
+        public float? RopeFrequencyScale { get; set; }
 
-		/// <summary>
-		/// Use experimental mul_mat_q kernels
-		/// </summary>
-		public bool MulMatQ { get; set; }
+        /// <inheritdoc />
+        public bool MulMatQ { get; set; }
 
-        /// <summary>
-        /// Load vocab only (no weights)
-        /// </summary>
+        /// <inheritdoc />
         public bool VocabOnly { get; set; }
 
-        /// <summary>
-        /// The encoding to use to convert text for the model
-        /// </summary>
+        /// <inheritdoc />
         [JsonConverter(typeof(EncodingConverter))]
         public Encoding Encoding { get; set; } = Encoding.UTF8;
 

diff --git a/LLama/Extensions/DictionaryExtensions.cs b/LLama/Extensions/DictionaryExtensions.cs
@@ -9,6 +9,8 @@ public static TValue GetValueOrDefault<TKey, TValue>(this IReadOnlyDictionary<TK
         {
             return GetValueOrDefaultImpl(dictionary, key, defaultValue);
         }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+#error Target framework not supported!
 #endif
 
         internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)

diff --git a/LLama/Extensions/EncodingExtensions.cs b/LLama/Extensions/EncodingExtensions.cs
@@ -15,6 +15,8 @@ public static int GetCharCount(this Encoding encoding, ReadOnlySpan<byte> bytes)
     {
         return GetCharCountImpl(encoding, bytes);
     }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+#error Target framework not supported!
 #endif
 
     internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)

diff --git a/LLama/Extensions/IContextParamsExtensions.cs b/LLama/Extensions/IContextParamsExtensions.cs
@@ -21,7 +21,7 @@ public static class IContextParamsExtensions
         public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
         {
             result = NativeApi.llama_context_default_params();
-            result.n_ctx = @params.ContextSize;
+            result.n_ctx = @params.ContextSize ?? 0;
             result.n_batch = @params.BatchSize;
             result.seed = @params.Seed;
             result.f16_kv = @params.UseFp16Memory;

diff --git a/LLama/Extensions/IEnumerableExtensions.cs b/LLama/Extensions/IEnumerableExtensions.cs
@@ -10,6 +10,8 @@ public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int count)
         {
             return TakeLastImpl(source, count);
         }
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+#error Target framework not supported!
 #endif
 
         internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)