Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add LlamafileClient #1

Merged
merged 2 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Cellm/Models/ClientFactory.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Cellm.Models.Anthropic;
using Cellm.Models.Google;
using Cellm.Models.Llamafile;
using Cellm.Models.OpenAi;
using Cellm.Services;

Expand All @@ -15,6 +16,7 @@ public IClient GetClient(string modelProvider)
"anthropic" => ServiceLocator.Get<AnthropicClient>(),
"google" => ServiceLocator.Get<GoogleClient>(),
"openai" => ServiceLocator.Get<OpenAiClient>(),
"llamafile" => ServiceLocator.Get<LlamafileClient>(),
_ => throw new ArgumentException($"Unsupported client type: {modelProvider}")
};
}
Expand Down
47 changes: 47 additions & 0 deletions src/Cellm/Models/Llamafile/AsyncLazy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System.Runtime.CompilerServices;

/// <summary>
/// Provides threadsafe asynchronous lazy initialization. This type is fully threadsafe.
/// </summary>
/// <typeparam name="T">The type of object that is being asynchronously initialized.</typeparam>
public sealed class AsyncLazy<T>
{
/// <summary>
/// The underlying lazy task.
/// </summary>
private readonly Lazy<Task<T>> instance;

/// <summary>
/// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class.
/// </summary>
/// <param name="factory">The delegate that is invoked on a background thread to produce the value when it is needed.</param>
public AsyncLazy(Func<T> factory)
{
instance = new Lazy<Task<T>>(() => Task.Run(factory));
}

/// <summary>
/// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class.
/// </summary>
/// <param name="factory">The asynchronous delegate that is invoked on a background thread to produce the value when it is needed.</param>
public AsyncLazy(Func<Task<T>> factory)
{
instance = new Lazy<Task<T>>(() => Task.Run(factory));
}

/// <summary>
/// Asynchronous infrastructure support. This method permits instances of <see cref="AsyncLazy<T>"/> to be awaited.
/// </summary>
public TaskAwaiter<T> GetAwaiter()
{
return instance.Value.GetAwaiter();
}

/// <summary>
/// Starts the asynchronous initialization, if it has not already started.
/// </summary>
public void Start()
{
_ = instance.Value;
}
}
90 changes: 90 additions & 0 deletions src/Cellm/Models/Llamafile/LLamafileProcessManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
using System.Diagnostics;
using System.Runtime.InteropServices;

public class LLamafileProcessManager
{
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
static extern IntPtr CreateJobObject(IntPtr a, string lpName);

[DllImport("kernel32.dll")]
static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process);

[DllImport("kernel32.dll")]
static extern bool SetInformationJobObject(IntPtr hJob, JobObjectInfoType infoType, IntPtr lpJobObjectInfo, uint cbJobObjectInfoLength);

[StructLayout(LayoutKind.Sequential)]
struct JOBOBJECT_BASIC_LIMIT_INFORMATION
{
public Int64 PerProcessUserTimeLimit;
public Int64 PerJobUserTimeLimit;
public UInt32 LimitFlags;
public UIntPtr MinimumWorkingSetSize;
public UIntPtr MaximumWorkingSetSize;
public UInt32 ActiveProcessLimit;
public UIntPtr Affinity;
public UInt32 PriorityClass;
public UInt32 SchedulingClass;
}

[StructLayout(LayoutKind.Sequential)]
struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION
{
public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation;
public IO_COUNTERS IoInfo;
public UIntPtr ProcessMemoryLimit;
public UIntPtr JobMemoryLimit;
public UIntPtr PeakProcessMemoryUsed;
public UIntPtr PeakJobMemoryUsed;
}

[StructLayout(LayoutKind.Sequential)]
struct IO_COUNTERS
{
public UInt64 ReadOperationCount;
public UInt64 WriteOperationCount;
public UInt64 OtherOperationCount;
public UInt64 ReadTransferCount;
public UInt64 WriteTransferCount;
public UInt64 OtherTransferCount;
}

enum JobObjectInfoType
{
AssociateCompletionPortInformation = 7,
BasicLimitInformation = 2,
BasicUIRestrictions = 4,
EndOfJobTimeInformation = 6,
ExtendedLimitInformation = 9,
SecurityLimitInformation = 5,
GroupInformation = 11
}

private IntPtr _jobObject;

public LLamafileProcessManager()
{
_jobObject = CreateJobObject(IntPtr.Zero, string.Empty);

var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION
{
LimitFlags = 0x2000
};

var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION
{
BasicLimitInformation = info
};

int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION));
IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length);
Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false);

SetInformationJobObject(_jobObject, JobObjectInfoType.ExtendedLimitInformation, extendedInfoPtr, (uint)length);
Marshal.FreeHGlobal(extendedInfoPtr);
}

public void AssignProcessToCellm(Process process)
{
AssignProcessToJobObject(_jobObject, process.Handle);
}
}
158 changes: 158 additions & 0 deletions src/Cellm/Models/Llamafile/LlamafileClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
using System.Diagnostics;
using Cellm.AddIn;
using Cellm.AddIn.Exceptions;
using Cellm.AddIn.Prompts;
using Cellm.Models.OpenAi;
using Microsoft.Extensions.Options;

namespace Cellm.Models.Llamafile;

internal class LlamafileClient : IClient
{
private readonly AsyncLazy<string> _llamafilePath;
private readonly AsyncLazy<string> _llamafileModelPath;
private readonly AsyncLazy<Process> _llamafileProcess;

private readonly CellmConfiguration _cellmConfiguration;
private readonly LlamafileConfiguration _llamafileConfiguration;
private readonly OpenAiConfiguration _openAiConfiguration;

private readonly IClient _openAiClient;
private readonly HttpClient _httpClient;
private readonly LLamafileProcessManager _llamafileProcessManager;

public LlamafileClient(IOptions<CellmConfiguration> cellmConfiguration,
IOptions<LlamafileConfiguration> llamafileConfiguration,
IOptions<OpenAiConfiguration> openAiConfiguration,
IClientFactory clientFactory,
HttpClient httpClient,
LLamafileProcessManager llamafileProcessManager)
{
_cellmConfiguration = cellmConfiguration.Value;
_llamafileConfiguration = llamafileConfiguration.Value;
_openAiConfiguration = openAiConfiguration.Value;
_openAiClient = clientFactory.GetClient("openai");
_httpClient = httpClient;
_llamafileProcessManager = llamafileProcessManager;

_llamafilePath = new AsyncLazy<string>(async () =>
{
return await DownloadFile(_llamafileConfiguration.LlamafileUrl, $"Llamafile.exe", httpClient);
});

_llamafileModelPath = new AsyncLazy<string>(async () =>
{
return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient);
});

_llamafileProcess = new AsyncLazy<Process>(async () =>
{
return await StartProcess();
});
}

public async Task<Prompt> Send(Prompt prompt, string? provider, string? model)
{
await _llamafilePath;
await _llamafileModelPath;
await _llamafileProcess;
return await _openAiClient.Send(prompt, provider, model);
}

private async Task<Process> StartProcess()
{
var processStartInfo = new ProcessStartInfo(await _llamafilePath);
processStartInfo.Arguments += $"-m {await _llamafileModelPath} ";
processStartInfo.Arguments += $"--port {_llamafileConfiguration.Port} ";

if (!_cellmConfiguration.Debug)
{
processStartInfo.Arguments += "--disable-browser ";
}

if (_llamafileConfiguration.Gpu)
{
processStartInfo.Arguments += $"-ngl {_llamafileConfiguration.GpuLayers} ";
}

var process = Process.Start(processStartInfo) ?? throw new CellmException("Failed to start Llamafile server");

try
{
await WaitForLlamafile(process);
_llamafileProcessManager.AssignProcessToCellm(process);
return process;
}
catch
{
process.Kill();
throw;
}
}

private static async Task<string> DownloadFile(Uri uri, string filename, HttpClient httpClient)
{
var filePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), nameof(Cellm), filename);
Directory.CreateDirectory(Path.GetDirectoryName(filePath) ?? throw new CellmException("Failed to create Llamafile path"));

if (File.Exists(filePath))
{
return filePath;
}

var filePathPart = filePath + ".part";

if (File.Exists(filePathPart))
{
File.Delete(filePathPart);
}

var response = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead);
response.EnsureSuccessStatusCode();

using (var fileStream = File.Create(filePathPart))
using (var httpStream = await response.Content.ReadAsStreamAsync())
{

await httpStream.CopyToAsync(fileStream).ConfigureAwait(false);
}

File.Move(filePathPart, filePath);

return filePath;
}

private async Task WaitForLlamafile(Process process)
{
var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1));
var startTime = DateTime.UtcNow;

while ((DateTime.UtcNow - startTime).TotalSeconds < 30) // Max 30 seconds timeout
{
if (process.HasExited)
{
throw new CellmException($"Failed to run Llamafile. Exit code: {process.ExitCode}");
}

try
{
var response = await _httpClient.GetAsync($"{_openAiConfiguration.BaseAddress}/health", cancellationTokenSource.Token);
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
return; // Server is healthy
}
}
catch (TaskCanceledException)
{
}
catch (HttpRequestException)
{
}

await Task.Delay(500); // Wait for 500ms before next attempt
}

throw new CellmException("Timeout waiting for Llamafile server to be ready");
}
}

25 changes: 25 additions & 0 deletions src/Cellm/Models/Llamafile/LlamafileConfiguration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
namespace Cellm.Models.Llamafile;

internal class LlamafileConfiguration
{
public Uri LlamafileUrl { get; init; }

public Dictionary<string, Uri> Models { get; init; }

public string DefaultModel { get; init; }

public ushort Port { get; init; }

public bool Gpu { get; init; }

public int GpuLayers { get; init; }

public LlamafileConfiguration()
{
LlamafileUrl = default!;
Models = default!;
DefaultModel = default!;
Gpu = false;
GpuLayers = 999;
}
}
7 changes: 6 additions & 1 deletion src/Cellm/Services/ServiceLocator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using Cellm.Models;
using Cellm.Models.Anthropic;
using Cellm.Models.Google;
using Cellm.Models.Llamafile;
using Cellm.Models.OpenAi;
using Cellm.Services.Configuration;
using ExcelDna.Integration;
Expand Down Expand Up @@ -42,6 +43,7 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
.Configure<AnthropicConfiguration>(configuration.GetRequiredSection(nameof(AnthropicConfiguration)))
.Configure<GoogleConfiguration>(configuration.GetRequiredSection(nameof(GoogleConfiguration)))
.Configure<OpenAiConfiguration>(configuration.GetRequiredSection(nameof(OpenAiConfiguration)))
.Configure<LlamafileConfiguration>(configuration.GetRequiredSection(nameof(LlamafileConfiguration)))
.Configure<RateLimiterConfiguration>(configuration.GetRequiredSection(nameof(RateLimiterConfiguration)))
.Configure<CircuitBreakerConfiguration>(configuration.GetRequiredSection(nameof(CircuitBreakerConfiguration)))
.Configure<RetryConfiguration>(configuration.GetRequiredSection(nameof(RetryConfiguration)))
Expand Down Expand Up @@ -83,7 +85,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
.AddSingleton<IClientFactory, ClientFactory>()
.AddSingleton<IClient, Client>()
.AddSingleton<ICache, Cache>()
.AddSingleton<ISerde, Serde>();
.AddSingleton<ISerde, Serde>()
.AddSingleton<LLamafileProcessManager>();

// Model Providers
var rateLimiterConfiguration = configuration.GetRequiredSection(nameof(RateLimiterConfiguration)).Get<RateLimiterConfiguration>()
Expand Down Expand Up @@ -125,6 +128,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
openAiHttpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {openAiConfiguration.ApiKey}");
}).AddResilienceHandler("OpenAiResiliencePipeline", resiliencePipelineConfigurator.ConfigureResiliencePipeline);

services.AddSingleton<LlamafileClient>();

return services;
}
}
18 changes: 18 additions & 0 deletions src/Cellm/appsettings.Local.Llamafile.GPU.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"LlamafileConfiguration": {
"LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13",
"DefaultModel": "qwen-2.5-3b-instruct-q6-k-l",
"Models": {
"qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf"
},
"Port": 22195,
"GPU": true,
"GpuLayers": 999
},
"OpenAiConfiguration": {
"BaseAddress": "http://localhost:22195"
},
"CellmConfiguration": {
"DefaultModelProvider": "Llamafile"
}
}
Loading