capture llama.cpp logs

microsoft · Jan 23, 2024 · 14ae535 · 14ae535
1 parent 24bd47e
commit 14ae535
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 1 deletion.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/aici_abi/src/toktree.rs b/aici_abi/src/toktree.rs
@@ -122,7 +122,6 @@ impl TokTrie {
         let mut trie = TrieHash::new(0xff);
         let mut token_offsets = Vec::new();
         let mut token_data = Vec::new();
-        println!("info: {:?} wl={}", info, words.len());
         assert!(info.vocab_size == words.len() as u32);
         for (idx, word) in words.iter().enumerate() {
             if word.len() > 0 {

diff --git a/aicirt/src/main.rs b/aicirt/src/main.rs
@@ -1058,6 +1058,11 @@ fn save_tokenizer(cli: &Cli) {
     let tokenizer = find_tokenizer(&cli.tokenizer).unwrap();
     let tokens = tokenizer.token_bytes();
 
+    log::info!(
+        "TokTrie building: {:?} wl={}",
+        tokenizer.tokrx_info(),
+        tokens.len()
+    );
     let trie = TokTrie::from(&tokenizer.tokrx_info(), &tokens);
     trie.check_against(&tokens);
 

diff --git a/llama-cpp-low/Cargo.toml b/llama-cpp-low/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0.79"
 link-cplusplus = "1.0.9"
+log = "0.4.20"
 num_cpus = "1.16.0"
 
 [build-dependencies]

diff --git a/llama-cpp-low/src/lib.rs b/llama-cpp-low/src/lib.rs
@@ -50,6 +50,31 @@ pub struct Sequence {
 pub type ModelParams = llama_model_params;
 pub type ContextParams = llama_context_params;
 
+extern "C" fn llama_log(
+    level: ggml_log_level,
+    text: *const ::std::os::raw::c_char,
+    _user_data: *mut ::std::os::raw::c_void,
+) {
+    let ll = if level == ggml_log_level_GGML_LOG_LEVEL_DEBUG {
+        log::Level::Trace
+    } else if level == ggml_log_level_GGML_LOG_LEVEL_INFO {
+        // llama.cpp idea of INFO is a bit too verbose
+        log::Level::Debug
+    } else if level == ggml_log_level_GGML_LOG_LEVEL_WARN {
+        log::Level::Warn
+    } else if level <= ggml_log_level_GGML_LOG_LEVEL_ERROR {
+        log::Level::Error
+    } else {
+        log::Level::Trace
+    };
+    let str = unsafe { std::ffi::CStr::from_ptr(text).to_string_lossy() };
+    let str = str.trim_end();
+    if str.len() == 1 {
+        return;
+    }
+    log::log!(ll, "{}", str);
+}
+
 impl Default for ModelParams {
     fn default() -> Self {
         unsafe { llama_model_default_params() }
@@ -143,6 +168,7 @@ pub struct ModelInfo {
 impl Model {
     pub fn from_file(file: &str, mparams: ModelParams) -> Result<Self> {
         unsafe {
+            llama_log_set(Some(llama_log), std::ptr::null_mut());
             let numa = false;
             llama_backend_init(numa); // TODO: only call this once?
             let c = CString::new(file).unwrap();

diff --git a/rllm/src/engine.rs b/rllm/src/engine.rs
@@ -256,6 +256,11 @@ impl RllmEngine {
         let hf_bytes = byte_tokenizer.get_hf_bytes();
         let tokenizer = Tokenizer::from_bytes(&hf_bytes).expect("can't load hf tokenizer");
         let tokens = byte_tokenizer.token_bytes();
+        log::info!(
+            "TokTrie building: {:?} wl={}",
+            byte_tokenizer.tokrx_info(),
+            tokens.len()
+        );
         let trie = TokTrie::from(&byte_tokenizer.tokrx_info(), &tokens);
         trie.check_against(&tokens);
         Ok((tokenizer, trie))