diff --git a/.vscode/launch.json b/.vscode/launch.json index 2fe42f98..e0402de4 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -7,17 +7,18 @@ { "type": "lldb", "request": "launch", - "name": "rllm-cpp orca", + "name": "rllm-cpp phi", "cwd": "rllm-cpp", + "preLaunchTask": "rllm-cpp: build", "program": "${workspaceFolder}/target/debug/rllm-cpp", "env": { - "RUST_LOG": "info,rllm=debug,aicirt=info" + "RUST_LOG": "info,tokenizers=error,rllm=trace,aicirt=info,llama_cpp_low=trace" }, "args": [ "--verbose", "--aicirt=${workspaceFolder}/target/release/aicirt", - "--model=https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf", - "--tokenizer=orca" + "--model=https://huggingface.co/TheBloke/phi-2-GGUF/blob/main/phi-2.Q8_0.gguf", + "--gpu-layers=100" ] } ] diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 00000000..94332b78 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,17 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cargo", + "command": "build", + "options": { + "cwd": "${workspaceFolder}/rllm-cpp" + }, + "problemMatcher": [ + "$rustc" + ], + "group": "build", + "label": "rllm-cpp: build" + } + ] +} \ No newline at end of file diff --git a/rllm-cpp/src/llamacpp/loader.rs b/rllm-cpp/src/llamacpp/loader.rs index 9d6d9c86..7b367154 100644 --- a/rllm-cpp/src/llamacpp/loader.rs +++ b/rllm-cpp/src/llamacpp/loader.rs @@ -1,7 +1,7 @@ use std::sync::Arc; -use rllm::{config::ModelMeta, LoaderArgs, Repo, RllmEngine}; use anyhow::{bail, Result}; +use rllm::{config::ModelMeta, LoaderArgs, Repo, RllmEngine}; use llama_cpp_low as cpp; @@ -44,16 +44,22 @@ fn do_load(args: &LoaderArgs, model_args: &mut CppLoaderArgs) -> Result 0 { log::warn!("disabling GPU (Intel macOS)"); mparams.n_gpu_layers = 0; } + #[cfg(target_arch = "aarch64")] + { + log::info!("disabling mmap"); + mparams.use_mmap = false; + } } let m = cpp::Model::from_file(file.to_str().unwrap(), mparams)?;