Skip to content

Commit

Permalink
remove support for extras (flash attention, iQ quants
Browse files Browse the repository at this point in the history
  • Loading branch information
amakropoulos committed Dec 2, 2024
1 parent f488839 commit 7a2c438
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 49 deletions.
7 changes: 0 additions & 7 deletions Editor/LLMEditor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ public override void AddModelSettings(SerializedObject llmScriptSO)
if (llmScriptSO.FindProperty("advancedOptions").boolValue)
{
attributeClasses.Add(typeof(ModelAdvancedAttribute));
if (LLMUnitySetup.FullLlamaLib) attributeClasses.Add(typeof(ModelExtrasAttribute));
}
ShowPropertiesOfClass("", llmScriptSO, attributeClasses, false);
Space();
Expand Down Expand Up @@ -445,18 +444,12 @@ private void CopyToClipboard(string text)
te.Copy();
}

public void AddExtrasToggle()
{
if (ToggleButton("Use extras", LLMUnitySetup.FullLlamaLib)) LLMUnitySetup.SetFullLlamaLib(!LLMUnitySetup.FullLlamaLib);
}

public override void AddOptionsToggles(SerializedObject llmScriptSO)
{
AddDebugModeToggle();

EditorGUILayout.BeginHorizontal();
AddAdvancedOptionsToggle(llmScriptSO);
AddExtrasToggle();
EditorGUILayout.EndHorizontal();
Space();
}
Expand Down
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -499,8 +499,7 @@ Save the scene, run and enjoy!
### LLM Settings

- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
- `Log Level` select how verbose the log messages are
- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
- `Log Level` select how verbose the log messages arequants)

#### 💻 Setup Settings

Expand Down Expand Up @@ -551,7 +550,6 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
- `Chat Template` the chat template being used for the LLM
- `Lora` the path of the LoRAs being used (relative to the Assets/StreamingAssets folder)
- `Lora Weights` the weights of the LoRAs being used
- `Flash Attention` click to use flash attention in the model (if `Use extras` is enabled)

</details>

Expand All @@ -566,7 +564,6 @@ If the user's GPU is not supported, the LLM will fall back to the CPU

- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
- `Log Level` select how verbose the log messages are
- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)

#### 💻 Setup Settings
<div>
Expand Down
3 changes: 0 additions & 3 deletions Runtime/LLM.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ public class LLM : MonoBehaviour
[ModelAdvanced] public string lora = "";
/// <summary> the weights of the LORA models being used.</summary>
[ModelAdvanced] public string loraWeights = "";
/// <summary> enable use of flash attention </summary>
[ModelExtras] public bool flashAttention = false;

/// <summary> API key to use for the server (optional) </summary>
public string APIKey;
Expand Down Expand Up @@ -435,7 +433,6 @@ protected virtual string GetLlamaccpArguments()
if (numThreadsToUse > 0) arguments += $" -t {numThreadsToUse}";
arguments += loraArgument;
arguments += $" -ngl {numGPULayers}";
if (LLMUnitySetup.FullLlamaLib && flashAttention) arguments += $" --flash-attn";
if (remote)
{
arguments += $" --port {port} --host 0.0.0.0";
Expand Down
2 changes: 0 additions & 2 deletions Runtime/LLMBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ public static void HideLibraryPlatforms(string platform)
foreach (string platformPrefix in platforms)
{
bool move = sourceName.StartsWith(platformPrefix);
move = move || (sourceName.Contains("cuda") && !sourceName.Contains("full") && LLMUnitySetup.FullLlamaLib);
move = move || (sourceName.Contains("cuda") && sourceName.Contains("full") && !LLMUnitySetup.FullLlamaLib);
if (move)
{
string target = Path.Combine(BuildTempDir, sourceName);
Expand Down
15 changes: 3 additions & 12 deletions Runtime/LLMLib.cs
Original file line number Diff line number Diff line change
Expand Up @@ -462,18 +462,9 @@ public static List<string> PossibleArchitectures(bool gpu = false)
{
if (gpu)
{
if (LLMUnitySetup.FullLlamaLib)
{
architectures.Add("cuda-cu12.2.0-full");
architectures.Add("cuda-cu11.7.1-full");
architectures.Add("hip-full");
}
else
{
architectures.Add("cuda-cu12.2.0");
architectures.Add("cuda-cu11.7.1");
architectures.Add("hip");
}
architectures.Add("cuda-cu12.2.0");
architectures.Add("cuda-cu11.7.1");
architectures.Add("hip");
architectures.Add("vulkan");
}
if (has_avx512) architectures.Add("avx512");
Expand Down
21 changes: 0 additions & 21 deletions Runtime/LLMUnitySetup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ public class LocalRemoteAttribute : PropertyAttribute {}
public class RemoteAttribute : PropertyAttribute {}
public class LocalAttribute : PropertyAttribute {}
public class ModelAttribute : PropertyAttribute {}
public class ModelExtrasAttribute : PropertyAttribute {}
public class ChatAttribute : PropertyAttribute {}
public class LLMUnityAttribute : PropertyAttribute {}

Expand Down Expand Up @@ -112,8 +111,6 @@ public class LLMUnitySetup
public static string libraryPath = GetAssetPath(libraryName);
/// <summary> LlamaLib url </summary>
public static string LlamaLibURL = $"{LlamaLibReleaseURL}/{libraryName}.zip";
/// <summary> LlamaLib extension url </summary>
public static string LlamaLibExtensionURL = $"{LlamaLibReleaseURL}/{libraryName}-full.zip";
/// <summary> LLMnity store path </summary>
public static string LLMUnityStore = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "LLMUnity");
/// <summary> Model download path </summary>
Expand Down Expand Up @@ -153,8 +150,6 @@ public class LLMUnitySetup
/// \cond HIDE
[LLMUnity] public static DebugModeType DebugMode = DebugModeType.All;
static string DebugModeKey = "DebugMode";
public static bool FullLlamaLib = false;
static string FullLlamaLibKey = "FullLlamaLib";
static List<Callback<string>> errorCallbacks = new List<Callback<string>>();
static readonly object lockObject = new object();
static Dictionary<string, Task> androidExtractTasks = new Dictionary<string, Task>();
Expand Down Expand Up @@ -189,7 +184,6 @@ public static void LogError(string message)
static void LoadPlayerPrefs()
{
DebugMode = (DebugModeType)PlayerPrefs.GetInt(DebugModeKey, (int)DebugModeType.All);
FullLlamaLib = PlayerPrefs.GetInt(FullLlamaLibKey, 0) == 1;
}

public static void SetDebugMode(DebugModeType newDebugMode)
Expand All @@ -200,18 +194,6 @@ public static void SetDebugMode(DebugModeType newDebugMode)
PlayerPrefs.Save();
}

#if UNITY_EDITOR
public static void SetFullLlamaLib(bool value)
{
if (FullLlamaLib == value) return;
FullLlamaLib = value;
PlayerPrefs.SetInt(FullLlamaLibKey, value ? 1 : 0);
PlayerPrefs.Save();
_ = DownloadLibrary();
}

#endif

public static string GetLibraryName(string version)
{
return $"undreamai-{version}-llamacpp";
Expand Down Expand Up @@ -452,9 +434,6 @@ static async Task DownloadLibrary()

// setup LlamaLib in StreamingAssets
await DownloadAndExtractInsideDirectory(LlamaLibURL, libraryPath, setupDir);

// setup LlamaLib extras in StreamingAssets
if (FullLlamaLib) await DownloadAndExtractInsideDirectory(LlamaLibExtensionURL, libraryPath, setupDir);
}
catch (Exception e)
{
Expand Down

0 comments on commit 7a2c438

Please sign in to comment.