Llava api (SciSharp#563)

* Add llava_binaries, update all binaries to make the test * Llava API + LlavaTest Preliminary * First prototype of Load + Unit Test * Temporary run test con branch LlavaAPI * Disable Embed test to review the rest of the test * Restore Embedding test * Use BatchThread to eval image embeddings Test Threads default value to ensure it doesn´t produce problems. * Rename test file * Update action versions * Test only one method, no release embeddings * Revert "Test only one method, no release embeddings" This reverts commit 264e176. * Correct API call * Only test llava related functionality * Cuda and Cblast binaries * Restore build policy * Changes related with code review * Add SafeHandles * Set overwrite to upload-artifact@v4 * Revert to upload-artifact@v3 * revert to upload-artifact@v3
SignalRT · Mar 13, 2024 · 3b2836e · 3b2836e
1 parent ce4de7d
commit 3b2836e
Show file tree

Hide file tree

Showing 14 changed files with 441 additions and 17 deletions.
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
@@ -48,12 +48,12 @@ jobs:
           cd build
           cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v3
         with:
           path: ./build/libllama.so
           name: llama-bin-linux-${{ matrix.build }}-x64.so
       - name: Upload Llava
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: ./build/examples/llava/libllava_shared.so
           name: llava-bin-linux-${{ matrix.build }}-x64.so
@@ -89,13 +89,13 @@ jobs:
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
 
       - name: Upload artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: .\build\bin\Release\llama.dll
           name: llama-bin-win-${{ matrix.build }}-x64.dll
 
       - name: Upload Llava
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: .\build\bin\Release\llava_shared.dll
           name: llava-bin-win-${{ matrix.build }}-x64.dll
@@ -169,20 +169,35 @@ jobs:
           ls -R
       - name: Upload artifacts (Windows)
         if: ${{ matrix.os == 'windows-latest' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: |
             .\build\bin\Release\llama.dll
             .\build\bin\Release\clblast.dll
           name: llama-bin-win-clblast-x64.dll
+      - name: Upload llava artifacts (Windows)
+        if: ${{ matrix.os == 'windows-latest' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            .\build\bin\Release\llava_shared.dll
+          name: llava-bin-win-clblast-x64.dll
       - name: Upload artifacts (linux)
         if: ${{ matrix.os == 'ubuntu-22.04' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: |
             ./build/libllama.so
             # ./build/libclblast.so
           name: llama-bin-linux-clblast-x64.so
+      - name: Upload llava artifacts (linux)
+        if: ${{ matrix.os == 'ubuntu-22.04' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            ./build/examples/llava/libllava_shared.so
+          name: llava-bin-linux-clblast-x64.so
+
 
   compile-cublas:
     name: Compile (cublas)
@@ -228,16 +243,29 @@ jobs:
 
       - name: Upload artifacts (Windows)
         if: ${{ matrix.os == 'windows-latest' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: .\build\bin\Release\llama.dll
           name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
+      - name: Upload llava artifacts (Windows)
+        if: ${{ matrix.os == 'windows-latest' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: .\build\bin\Release\llava_shared.dll
+          name: llava-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
       - name: Upload artifacts (Linux)
         if: ${{ matrix.os == 'ubuntu-20.04' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: ./build/libllama.so
           name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
+      - name: Upload llava artifacts (Linux)
+        if: ${{ matrix.os == 'ubuntu-20.04' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: ./build/examples/llava/libllava_shared.so
+          name: llava-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
+
 
   compile-macos:
     name: Compile (MacOS)
@@ -268,18 +296,18 @@ jobs:
           cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
       - name: Upload artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: ./build/libllama.dylib
           name: llama-bin-osx-${{ matrix.build }}.dylib
       - name: Upload Llava
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: ./build/examples/llava/libllava_shared.dylib
           name: llava-bin-osx-${{ matrix.build }}.dylib
       - name: Upload Metal
         if: ${{ matrix.build != 'x64' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: ./build/bin/ggml-metal.metal
           name: ggml-metal.metal
@@ -347,11 +375,12 @@ jobs:
           cp artifacts/llama-bin-linux-clblast-x64.so/libllama.so deps/clblast/
 
       - name: Upload artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
         with:
           path: deps/
           name: deps
 
+
       - name: Remove Artifacts
         uses: geekyeggo/delete-artifact@v2
         with:

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -28,14 +28,14 @@ jobs:
             os: windows-2019
             config: release
     steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-dotnet@v3
+    - uses: actions/checkout@v4
+    - uses: actions/setup-dotnet@v4
       with:
         dotnet-version: | 
           7.0.x
           8.0.x
     - name: Cache Packages
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         key: "unit_test_models"
         path: LLama.Unittest/Models

diff --git a/LLama.Unittest/Constants.cs b/LLama.Unittest/Constants.cs
@@ -3,5 +3,8 @@
     internal static class Constants
     {
         public static string ModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
+        public static string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
+        public static string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
+        public static string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
     }
 }
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
@@ -27,8 +27,9 @@
   </ItemGroup>
 
   <Target Name="DownloadContentFiles" BeforeTargets="Build">
-      <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true">
-    </DownloadFile>
+    <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
+    <DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
+    <DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
   </Target>
 
   <ItemGroup>
@@ -44,5 +45,14 @@
     <None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+    <None Update="Models\llava-v1.6-mistral-7b.Q3_K_XS.gguf">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Models\mmproj-model-f16.gguf">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Models\extreme-ironing-taxi-610x427.jpg">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
   </ItemGroup>
 </Project>
diff --git a/LLama.Unittest/LLamaEmbedderTests.cs b/LLama.Unittest/LLamaEmbedderTests.cs
@@ -14,6 +14,8 @@ public LLamaEmbedderTests(ITestOutputHelper testOutputHelper)
         _testOutputHelper = testOutputHelper;
         var @params = new ModelParams(Constants.ModelPath)
         {
+            ContextSize = 4096,
+            Threads = 5,
             EmbeddingMode = true,
         };
         using var weights = LLamaWeights.LoadFromFile(@params);
@@ -31,6 +33,7 @@ private static float Dot(float[] a, float[] b)
         return a.Zip(b, (x, y) => x * y).Sum();
     }
 
+
     [Fact]
     public async Task EmbedCompare()
     {

diff --git a/LLama.Unittest/LLavaWeightsTests.cs b/LLama.Unittest/LLavaWeightsTests.cs
@@ -0,0 +1,53 @@
+using LLama.Common;
+using LLama.Native;
+
+namespace LLama.Unittest
+{
+    // Test the same things as llama model + image embedings
+    //
+    public sealed class LLavaWeightTests
+        : IDisposable
+    {
+        private readonly LLamaWeights _llamaWeights;
+        private readonly LLavaWeights _lLavaWeights;
+        private readonly LLamaContext _context;
+
+        public LLavaWeightTests()
+        {
+            var @params = new ModelParams(Constants.ModelPath)
+            {
+                // Llava models requires big context
+                ContextSize = 4096
+            };
+            _llamaWeights = LLamaWeights.LoadFromFile(@params);
+            _lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath);
+
+            _context = _llamaWeights.CreateContext(@params);
+
+        }
+
+        public void Dispose()
+        {
+            _llamaWeights.Dispose();
+            _lLavaWeights.Dispose();
+        }
+
+
+
+        [Fact]
+        public void EmbedImageAsFileName()
+        {
+            int n_past = 0;
+            Assert.True( _lLavaWeights.EmbedImage( _context, Constants.LLavaImage, ref n_past ) );
+        }        
+
+        [Fact]
+        public void EmbedImageAsBinary()
+        {
+            int n_past = 0;
+            byte[] image = System.IO.File.ReadAllBytes(Constants.LLavaImage);
+            Assert.True( _lLavaWeights.EmbedImage( _context, image, ref n_past ) );
+        }        
+
+    }
+}
diff --git a/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg b/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg
diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
@@ -67,5 +67,51 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-x64/native/libllama.dylib</Link>
       </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/llava_shared.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/noavx/llava_shared.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/llava_shared.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx/llava_shared.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/llava_shared.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx2/llava_shared.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/llava_shared.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx512/llava_shared.dll</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllava_shared.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/noavx/libllava_shared.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libllava_shared.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx/libllava_shared.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libllava_shared.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx2/libllava_shared.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libllava_shared.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx512/libllava_shared.so</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libllava_shared.dylib">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/osx-arm64/native/libllava_shared.dylib</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libllava_shared.dylib">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/osx-x64/native/libllava_shared.dylib</Link>
+      </None>      
+
+
     </ItemGroup>
 </Project>
diff --git a/LLama/LLavaWeights.cs b/LLama/LLavaWeights.cs
@@ -0,0 +1,51 @@
+
+using System;
+using LLama.Native;
+
+namespace LLama;
+
+public sealed class LLavaWeights : IDisposable
+{
+    public SafeLlavaModelHandle NativeHandle { get; }   
+
+    internal LLavaWeights(SafeLlavaModelHandle weights)
+    {
+        NativeHandle = weights;
+    }
+
+    public static LLavaWeights LoadFromFile(string mmProject)
+    {
+        var weights = SafeLlavaModelHandle.LoadFromFile(mmProject, 1);
+        return new LLavaWeights(weights);
+    }
+
+    /// <summary>
+    /// Embed the image from file into llama context
+    /// </summary>
+    /// <param name="ctxLlama"></param>
+    /// <param name="Image"></param>
+    /// <param name="n_past"></param>
+    /// <returns></returns>
+    public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
+    {
+        return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
+    }
+
+    /// <summary>
+    /// Embed the image from binary into llama context.
+    /// </summary>
+    /// <param name="ctxLlama"></param>
+    /// <param name="Image"></param>
+    /// <param name="n_past"></param>
+    /// <returns></returns>
+    public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past )
+    {
+        return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
+    }
+
+    public void Dispose()
+    {
+        NativeHandle.Dispose();
+    }    
+
+}
diff --git a/LLama/Native/LLavaImageEmbed.cs b/LLama/Native/LLavaImageEmbed.cs
@@ -0,0 +1,13 @@
+using System.Runtime.InteropServices;
+
+namespace LLama.Native;
+
+/// <summary>
+/// LLaVa Image embeddings 
+/// </summary>
+[StructLayout(LayoutKind.Sequential)]
+unsafe public struct LLavaImageEmbed
+{
+    public float* embed;
+    public int n_image_pos;
+}