Skip to content

Commit

Permalink
try a hack for M1
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Feb 8, 2024
1 parent 7de3e0b commit 963dffa
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 8 deletions.
9 changes: 5 additions & 4 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@
{
"type": "lldb",
"request": "launch",
"name": "rllm-cpp orca",
"name": "rllm-cpp phi",
"cwd": "rllm-cpp",
"preLaunchTask": "rllm-cpp: build",
"program": "${workspaceFolder}/target/debug/rllm-cpp",
"env": {
"RUST_LOG": "info,rllm=debug,aicirt=info"
"RUST_LOG": "info,tokenizers=error,rllm=trace,aicirt=info,llama_cpp_low=trace"
},
"args": [
"--verbose",
"--aicirt=${workspaceFolder}/target/release/aicirt",
"--model=https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf",
"--tokenizer=orca"
"--model=https://huggingface.co/TheBloke/phi-2-GGUF/blob/main/phi-2.Q8_0.gguf",
"--gpu-layers=100"
]
}
]
Expand Down
17 changes: 17 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "cargo",
"command": "build",
"options": {
"cwd": "${workspaceFolder}/rllm-cpp"
},
"problemMatcher": [
"$rustc"
],
"group": "build",
"label": "rllm-cpp: build"
}
]
}
14 changes: 10 additions & 4 deletions rllm-cpp/src/llamacpp/loader.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::sync::Arc;

use rllm::{config::ModelMeta, LoaderArgs, Repo, RllmEngine};
use anyhow::{bail, Result};
use rllm::{config::ModelMeta, LoaderArgs, Repo, RllmEngine};

use llama_cpp_low as cpp;

Expand Down Expand Up @@ -44,16 +44,22 @@ fn do_load(args: &LoaderArgs, model_args: &mut CppLoaderArgs) -> Result<cpp::Mod

let mut mparams = cpp::ModelParams::default();
// TODO: make this configurable
mparams.set_split_mode(cpp::SplitMode::None);
mparams.set_split_mode(cpp::SplitMode::Layer);
mparams.n_gpu_layers = model_args.n_gpu_layers.unwrap_or(0) as i32;
log::info!("{} layer(s) offloaded to GPU", mparams.n_gpu_layers);
// don't GPU offload on Intel macs - it just fails there
#[cfg(all(target_os = "macos", target_arch = "x86_64"))]
#[cfg(target_os = "macos")]
{
// don't GPU offload on Intel macs - it just fails there
#[cfg(target_arch = "x86_64")]
if mparams.n_gpu_layers > 0 {
log::warn!("disabling GPU (Intel macOS)");
mparams.n_gpu_layers = 0;
}
#[cfg(target_arch = "aarch64")]
{
log::info!("disabling mmap");
mparams.use_mmap = false;
}
}

let m = cpp::Model::from_file(file.to_str().unwrap(), mparams)?;
Expand Down

0 comments on commit 963dffa

Please sign in to comment.