diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index 9c8db2b0..a0207b0d 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -36,10 +36,12 @@ jobs: build_and_test: strategy: + fail-fast: false matrix: os: [ubuntu-latest, macos-13, windows-latest] rust-version: [stable] runs-on: ${{ matrix.os }} + timeout-minutes: 30 steps: - name: Checkout repository uses: actions/checkout@v3 diff --git a/Cargo.lock b/Cargo.lock index a117b184..dfe6f9eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,15 @@ checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" [[package]] name = "ahash" -version = "0.4.8" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0453232ace82dee0dd0b4c87a59bd90f7b53b314f3e0f61fe2ee7c8a16482289" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] [[package]] name = "aho-corasick" @@ -38,6 +44,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -662,6 +674,12 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.5.0" @@ -748,6 +766,16 @@ dependencies = [ "libloading", ] +[[package]] +name = "combine" +version = "4.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "core-foundation" version = "0.9.3" @@ -803,7 +831,7 @@ dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset", + "memoffset 0.9.0", "scopeguard", ] @@ -1008,6 +1036,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.39", +] + [[package]] name = "env_logger" version = "0.10.1" @@ -1336,15 +1376,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "hashbrown" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" -dependencies = [ - "ahash 0.4.8", -] - [[package]] name = "hashbrown" version = "0.12.3" @@ -1356,6 +1387,10 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +dependencies = [ + "ahash 0.8.6", + "allocator-api2", +] [[package]] name = "heck" @@ -1386,16 +1421,18 @@ dependencies = [ [[package]] name = "hnsw_rs" -version = "0.1.19" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0308727459701f2fa18286e50662c37044e130e955bfd42b6ff30260116b2a5" +checksum = "baf40f00346c339c8181f485ef409e49412649cde8e318cc6804849841ad85f1" dependencies = [ + "anyhow", "bincode", "cpu-time", "env_logger", - "hashbrown 0.9.1", + "hashbrown 0.14.2", "lazy_static", "log", + "mmap-rs", "num-traits", "num_cpus", "parking_lot", @@ -1972,6 +2009,15 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "mach2" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8" +dependencies = [ + "libc", +] + [[package]] name = "markdown" version = "1.0.0-alpha.14" @@ -2002,6 +2048,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "memoffset" version = "0.9.0" @@ -2072,6 +2127,23 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "mmap-rs" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e1af4ac2b44e6faa5d82a400349ccf8444d68559eca4c6f976befc4eee963da" +dependencies = [ + "bitflags 1.3.2", + "combine", + "libc", + "mach2", + "nix", + "sysctl", + "thiserror", + "widestring", + "windows", +] + [[package]] name = "mockall" version = "0.11.4" @@ -2123,6 +2195,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", +] + [[package]] name = "nom" version = "7.1.3" @@ -3270,6 +3355,20 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.4.1", + "byteorder", + "enum-as-inner", + "libc", + "thiserror", + "walkdir", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -4003,6 +4102,12 @@ dependencies = [ "rustix", ] +[[package]] +name = "widestring" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" + [[package]] name = "winapi" version = "0.3.9" @@ -4034,6 +4139,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-core" version = "0.51.1" @@ -4125,6 +4239,26 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" +[[package]] +name = "zerocopy" +version = "0.7.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + [[package]] name = "zeroize" version = "1.6.0" diff --git a/crates/llm-chain-hnsw/Cargo.toml b/crates/llm-chain-hnsw/Cargo.toml index 703160f7..5f5f9874 100644 --- a/crates/llm-chain-hnsw/Cargo.toml +++ b/crates/llm-chain-hnsw/Cargo.toml @@ -14,7 +14,7 @@ repository = "https://github.com/sobelio/llm-chain/" [dependencies] async-trait.workspace = true -hnsw_rs = "0.1.19" +hnsw_rs = "0.2" llm-chain = { path = "../llm-chain", version = "0.13.0", default-features = false } serde.workspace = true serde_json.workspace = true diff --git a/crates/llm-chain-hnsw/examples/dump_load.rs b/crates/llm-chain-hnsw/examples/dump_load.rs index 58204272..4922de1d 100644 --- a/crates/llm-chain-hnsw/examples/dump_load.rs +++ b/crates/llm-chain-hnsw/examples/dump_load.rs @@ -1,3 +1,5 @@ +use hnsw_rs::{hnswio::*, prelude::*}; +use std::path::PathBuf; use std::sync::Arc; use llm_chain::{ @@ -16,7 +18,7 @@ async fn main() { let hnsw_index_fn = "hnsw_index".to_string(); let mut embeddings = llm_chain_openai::embeddings::Embeddings::default(); let document_store = Arc::new(Mutex::new(InMemoryDocumentStore::::new())); - let mut hnsw_vs = HnswVectorStore::new( + let hnsw_vs = HnswVectorStore::new( HnswArgs::default(), Arc::new(embeddings), document_store.clone(), @@ -56,12 +58,13 @@ async fn main() { // Load println!("Loading hnsw index from file"); embeddings = llm_chain_openai::embeddings::Embeddings::default(); - hnsw_vs = HnswVectorStore::load_from_file( - hnsw_index_fn, - Arc::new(embeddings), - document_store.clone(), - ) - .unwrap(); + + let mut hnswio = HnswIo::new(PathBuf::from("."), hnsw_index_fn); + let hnsw_loaded = hnswio.load_hnsw::().unwrap(); + let hnsw_vs = + HnswVectorStore::load_from_file(hnsw_loaded, Arc::new(embeddings), document_store.clone()) + .unwrap(); + println!("Loaded!"); let response = hnsw_vs diff --git a/crates/llm-chain-hnsw/src/lib.rs b/crates/llm-chain-hnsw/src/lib.rs index 44661011..87f9d54d 100644 --- a/crates/llm-chain-hnsw/src/lib.rs +++ b/crates/llm-chain-hnsw/src/lib.rs @@ -1,10 +1,7 @@ -use std::{ - collections::HashMap, fs::OpenOptions, io::BufReader, marker::PhantomData, path::PathBuf, - sync::Arc, -}; +use std::{collections::HashMap, marker::PhantomData, sync::Arc}; use async_trait::async_trait; -use hnsw_rs::{hnsw::Hnsw, hnswio::*, prelude::*}; +use hnsw_rs::{hnsw::Hnsw, prelude::*}; use llm_chain::{ document_stores::document_store::*, schema::Document, @@ -32,19 +29,19 @@ impl Default for HnswArgs { } } -pub struct HnswVectorStore +pub struct HnswVectorStore<'a, E, D, M> where E: Embeddings, D: DocumentStore + Send + Sync, M: Serialize + DeserializeOwned + Send + Sync, { - hnsw: Arc>, + hnsw: Arc>, document_store: Arc>, embeddings: Arc, _marker: PhantomData, } -impl HnswVectorStore +impl<'a, E, D, M> HnswVectorStore<'a, E, D, M> where E: Embeddings, D: DocumentStore + Send + Sync, @@ -69,47 +66,20 @@ where pub fn dump_to_file( &self, filename: String, - ) -> Result> { + ) -> Result> { self.hnsw .file_dump(&filename) - .map_err(HnswVectorStoreError::FileDumpError) + .map_err(|e| HnswVectorStoreError::FileDumpError(e.to_string())) } pub fn load_from_file( - filename: String, + hnsw: Hnsw<'a, f32, DistCosine>, embeddings: Arc, document_store: Arc>, - ) -> Result> { - let graph_fn = format!("{}.hnsw.graph", &filename); - let graph_path = PathBuf::from(graph_fn); - let graph_file_res = OpenOptions::new().read(true).open(&graph_path); - if graph_file_res.is_err() { - return Err(HnswVectorStoreError::FileLoadError(format!( - "could not open file {:?}", - graph_path.as_os_str() - ))); - } - let graph_file = graph_file_res.unwrap(); - let data_fn = format!("{}.hnsw.data", &filename); - let data_path = PathBuf::from(data_fn); - let data_file_res = OpenOptions::new().read(true).open(&data_path); - if data_file_res.is_err() { - return Err(HnswVectorStoreError::FileLoadError(format!( - "could not open file {:?}", - data_path.as_os_str() - ))); - } - let data_file = data_file_res.unwrap(); - - let mut graph_in = BufReader::new(graph_file); - let mut data_in = BufReader::new(data_file); - - let hnsw_description = load_description(&mut graph_in).unwrap(); - let hnsw_loaded: Hnsw = - load_hnsw(&mut graph_in, &hnsw_description, &mut data_in).unwrap(); - + ) -> Result> +where { Ok(HnswVectorStore { - hnsw: Arc::new(hnsw_loaded), + hnsw: Arc::new(hnsw), document_store, embeddings, _marker: Default::default(), @@ -143,7 +113,7 @@ where } #[async_trait] -impl VectorStore for HnswVectorStore +impl<'a, E, D, M> VectorStore for HnswVectorStore<'a, E, D, M> where E: Embeddings + Send + Sync, D: DocumentStore + Send + Sync, diff --git a/crates/llm-chain-llama-sys/README.md b/crates/llm-chain-llama-sys/README.md index 66f56b64..fd5c31d4 100644 --- a/crates/llm-chain-llama-sys/README.md +++ b/crates/llm-chain-llama-sys/README.md @@ -12,3 +12,10 @@ use llama_sys::\*; ``` Note that llama-sys provides a lower-level interface than llm-chain-llama, and may be more difficult to use. However, if you need fine-grained control over llama.cpp, llama-sys is the way to go. + +## Updating llama.cpp submodule +To update the llama.cpp submodule, run the following command: + +```console +$ git submodule update --remote --merge llama.cpp +``` diff --git a/crates/llm-chain-llama-sys/build.rs b/crates/llm-chain-llama-sys/build.rs index e85f682d..270200eb 100644 --- a/crates/llm-chain-llama-sys/build.rs +++ b/crates/llm-chain-llama-sys/build.rs @@ -49,6 +49,9 @@ fn main() { let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); b.write_to_file(out_path.join("bindings.rs")) .expect("Couldn't write bindings!"); + let out_path = PathBuf::from("src"); + b.write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write binding to src directorys!"); } Err(e) => { println!("cargo:warning=Unable to generate bindings: {}", e); @@ -85,7 +88,9 @@ fn main() { .arg("-DLLAMA_ALL_WARNINGS=OFF") .arg("-DLLAMA_ALL_WARNINGS_3RD_PARTY=OFF") .arg("-DLLAMA_BUILD_TESTS=OFF") - .arg("-DLLAMA_BUILD_EXAMPLES=OFF"); + .arg("-DLLAMA_BUILD_EXAMPLES=OFF") + .arg("-DLLAMA_NO_METAL=ON") + .arg("-DLLAMA_METAL=OFF"); // .arg("-DLLAMA_STATIC=ON") if cuda_enabled { // If CUDA feature is enabled, build with cuBlAS to enable GPU acceleration diff --git a/crates/llm-chain-llama-sys/llama.cpp b/crates/llm-chain-llama-sys/llama.cpp index 173d0e64..e4b76bbe 160000 --- a/crates/llm-chain-llama-sys/llama.cpp +++ b/crates/llm-chain-llama-sys/llama.cpp @@ -1 +1 @@ -Subproject commit 173d0e6419e8f8f3c1f4f13201b777f4c60629f3 +Subproject commit e4b76bbe316ee50fb17d9ac29e654c0edf830eba diff --git a/crates/llm-chain-llama-sys/src/bindings.rs b/crates/llm-chain-llama-sys/src/bindings.rs index eae6618f..2ce37af8 100644 --- a/crates/llm-chain-llama-sys/src/bindings.rs +++ b/crates/llm-chain-llama-sys/src/bindings.rs @@ -52,7 +52,7 @@ pub const __STDC_IEC_60559_COMPLEX__: u32 = 201404; pub const __STDC_ISO_10646__: u32 = 201706; pub const __GNU_LIBRARY__: u32 = 6; pub const __GLIBC__: u32 = 2; -pub const __GLIBC_MINOR__: u32 = 37; +pub const __GLIBC_MINOR__: u32 = 36; pub const _SYS_CDEFS_H: u32 = 1; pub const __glibc_c99_flexarr_available: u32 = 1; pub const __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI: u32 = 0; @@ -152,30 +152,76 @@ pub const GGML_FILE_VERSION: u32 = 1; pub const GGML_QNT_VERSION: u32 = 2; pub const GGML_QNT_VERSION_FACTOR: u32 = 1000; pub const GGML_MAX_DIMS: u32 = 4; -pub const GGML_MAX_NODES: u32 = 4096; -pub const GGML_MAX_PARAMS: u32 = 256; +pub const GGML_MAX_PARAMS: u32 = 1024; pub const GGML_MAX_CONTEXTS: u32 = 64; pub const GGML_MAX_SRC: u32 = 6; -pub const GGML_MAX_NAME: u32 = 48; -pub const GGML_MAX_OP_PARAMS: u32 = 32; +pub const GGML_MAX_NAME: u32 = 64; +pub const GGML_MAX_OP_PARAMS: u32 = 64; pub const GGML_DEFAULT_N_THREADS: u32 = 4; +pub const GGML_DEFAULT_GRAPH_SIZE: u32 = 2048; +pub const GGML_MEM_ALIGN: u32 = 16; pub const GGML_EXIT_SUCCESS: u32 = 0; pub const GGML_EXIT_ABORTED: u32 = 1; -pub const GGML_GRAPH_HASHTABLE_SIZE: u32 = 8273; -pub const GGML_CUDA_MAX_DEVICES: u32 = 16; -pub const LLAMA_MAX_DEVICES: u32 = 16; -pub const LLAMA_FILE_MAGIC_GGJT: u32 = 1734830708; -pub const LLAMA_FILE_MAGIC_GGLA: u32 = 1734831201; -pub const LLAMA_FILE_MAGIC_GGMF: u32 = 1734831462; -pub const LLAMA_FILE_MAGIC_GGML: u32 = 1734831468; +pub const GGUF_MAGIC: &[u8; 5] = b"GGUF\0"; +pub const GGUF_VERSION: u32 = 3; +pub const GGUF_DEFAULT_ALIGNMENT: u32 = 32; +pub const GGML_N_TASKS_MAX: i32 = -1; +pub const LLAMA_MAX_DEVICES: u32 = 1; +pub const _STDIO_H: u32 = 1; +pub const __GNUC_VA_LIST: u32 = 1; +pub const _____fpos_t_defined: u32 = 1; +pub const ____mbstate_t_defined: u32 = 1; +pub const _____fpos64_t_defined: u32 = 1; +pub const ____FILE_defined: u32 = 1; +pub const __FILE_defined: u32 = 1; +pub const __struct_FILE_defined: u32 = 1; +pub const _IO_EOF_SEEN: u32 = 16; +pub const _IO_ERR_SEEN: u32 = 32; +pub const _IO_USER_LOCK: u32 = 32768; +pub const __cookie_io_functions_t_defined: u32 = 1; +pub const _IOFBF: u32 = 0; +pub const _IOLBF: u32 = 1; +pub const _IONBF: u32 = 2; +pub const BUFSIZ: u32 = 8192; +pub const EOF: i32 = -1; +pub const SEEK_SET: u32 = 0; +pub const SEEK_CUR: u32 = 1; +pub const SEEK_END: u32 = 2; +pub const SEEK_DATA: u32 = 3; +pub const SEEK_HOLE: u32 = 4; +pub const P_tmpdir: &[u8; 5] = b"/tmp\0"; +pub const _BITS_STDIO_LIM_H: u32 = 1; +pub const L_tmpnam: u32 = 20; +pub const TMP_MAX: u32 = 238328; +pub const FILENAME_MAX: u32 = 4096; +pub const L_ctermid: u32 = 9; +pub const L_cuserid: u32 = 9; +pub const FOPEN_MAX: u32 = 16; +pub const _PRINTF_NAN_LEN_MAX: u32 = 4; +pub const RENAME_NOREPLACE: u32 = 1; +pub const RENAME_EXCHANGE: u32 = 2; +pub const RENAME_WHITEOUT: u32 = 4; +pub const __HAVE_FLOAT128: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT128: u32 = 0; +pub const __HAVE_FLOAT64X: u32 = 1; +pub const __HAVE_FLOAT64X_LONG_DOUBLE: u32 = 1; +pub const __HAVE_FLOAT16: u32 = 0; +pub const __HAVE_FLOAT32: u32 = 1; +pub const __HAVE_FLOAT64: u32 = 1; +pub const __HAVE_FLOAT32X: u32 = 1; +pub const __HAVE_FLOAT128X: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT16: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT32: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT64: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT32X: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT64X: u32 = 0; +pub const __HAVE_DISTINCT_FLOAT128X: u32 = 0; +pub const __HAVE_FLOATN_NOT_TYPEDEF: u32 = 0; +pub const LLAMA_DEFAULT_SEED: u32 = 4294967295; +pub const LLAMA_MAX_RNG_STATE: u32 = 65536; pub const LLAMA_FILE_MAGIC_GGSN: u32 = 1734833006; -pub const LLAMA_FILE_VERSION: u32 = 3; -pub const LLAMA_FILE_MAGIC: u32 = 1734830708; -pub const LLAMA_FILE_MAGIC_UNVERSIONED: u32 = 1734831468; pub const LLAMA_SESSION_MAGIC: u32 = 1734833006; -pub const LLAMA_SESSION_VERSION: u32 = 1; -pub const LLAMA_DEFAULT_SEED: u32 = 4294967295; -pub const LLAMA_DEFAULT_RMS_EPS: f64 = 0.000005; +pub const LLAMA_SESSION_VERSION: u32 = 2; pub type __u_char = ::std::os::raw::c_uchar; pub type __u_short = ::std::os::raw::c_ushort; pub type __u_int = ::std::os::raw::c_uint; @@ -366,10 +412,10 @@ pub const ggml_type_GGML_TYPE_I16: ggml_type = 17; pub const ggml_type_GGML_TYPE_I32: ggml_type = 18; pub const ggml_type_GGML_TYPE_COUNT: ggml_type = 19; pub type ggml_type = ::std::os::raw::c_uint; -pub const ggml_backend_GGML_BACKEND_CPU: ggml_backend = 0; -pub const ggml_backend_GGML_BACKEND_GPU: ggml_backend = 10; -pub const ggml_backend_GGML_BACKEND_GPU_SPLIT: ggml_backend = 20; -pub type ggml_backend = ::std::os::raw::c_uint; +pub const ggml_backend_type_GGML_BACKEND_CPU: ggml_backend_type = 0; +pub const ggml_backend_type_GGML_BACKEND_GPU: ggml_backend_type = 10; +pub const ggml_backend_type_GGML_BACKEND_GPU_SPLIT: ggml_backend_type = 20; +pub type ggml_backend_type = ::std::os::raw::c_uint; pub const ggml_ftype_GGML_FTYPE_UNKNOWN: ggml_ftype = -1; pub const ggml_ftype_GGML_FTYPE_ALL_F32: ggml_ftype = 0; pub const ggml_ftype_GGML_FTYPE_MOSTLY_F16: ggml_ftype = 1; @@ -402,49 +448,58 @@ pub const ggml_op_GGML_OP_MEAN: ggml_op = 13; pub const ggml_op_GGML_OP_ARGMAX: ggml_op = 14; pub const ggml_op_GGML_OP_REPEAT: ggml_op = 15; pub const ggml_op_GGML_OP_REPEAT_BACK: ggml_op = 16; -pub const ggml_op_GGML_OP_SILU_BACK: ggml_op = 17; -pub const ggml_op_GGML_OP_NORM: ggml_op = 18; -pub const ggml_op_GGML_OP_RMS_NORM: ggml_op = 19; -pub const ggml_op_GGML_OP_RMS_NORM_BACK: ggml_op = 20; -pub const ggml_op_GGML_OP_MUL_MAT: ggml_op = 21; -pub const ggml_op_GGML_OP_OUT_PROD: ggml_op = 22; -pub const ggml_op_GGML_OP_SCALE: ggml_op = 23; -pub const ggml_op_GGML_OP_SET: ggml_op = 24; -pub const ggml_op_GGML_OP_CPY: ggml_op = 25; -pub const ggml_op_GGML_OP_CONT: ggml_op = 26; -pub const ggml_op_GGML_OP_RESHAPE: ggml_op = 27; -pub const ggml_op_GGML_OP_VIEW: ggml_op = 28; -pub const ggml_op_GGML_OP_PERMUTE: ggml_op = 29; -pub const ggml_op_GGML_OP_TRANSPOSE: ggml_op = 30; -pub const ggml_op_GGML_OP_GET_ROWS: ggml_op = 31; -pub const ggml_op_GGML_OP_GET_ROWS_BACK: ggml_op = 32; -pub const ggml_op_GGML_OP_DIAG: ggml_op = 33; -pub const ggml_op_GGML_OP_DIAG_MASK_INF: ggml_op = 34; -pub const ggml_op_GGML_OP_DIAG_MASK_ZERO: ggml_op = 35; -pub const ggml_op_GGML_OP_SOFT_MAX: ggml_op = 36; -pub const ggml_op_GGML_OP_SOFT_MAX_BACK: ggml_op = 37; -pub const ggml_op_GGML_OP_ROPE: ggml_op = 38; -pub const ggml_op_GGML_OP_ROPE_BACK: ggml_op = 39; -pub const ggml_op_GGML_OP_ALIBI: ggml_op = 40; -pub const ggml_op_GGML_OP_CLAMP: ggml_op = 41; -pub const ggml_op_GGML_OP_CONV_1D: ggml_op = 42; -pub const ggml_op_GGML_OP_CONV_2D: ggml_op = 43; -pub const ggml_op_GGML_OP_POOL_1D: ggml_op = 44; -pub const ggml_op_GGML_OP_POOL_2D: ggml_op = 45; -pub const ggml_op_GGML_OP_FLASH_ATTN: ggml_op = 46; -pub const ggml_op_GGML_OP_FLASH_FF: ggml_op = 47; -pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 48; -pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 49; -pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 50; -pub const ggml_op_GGML_OP_UNARY: ggml_op = 51; -pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 52; -pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 53; -pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 54; -pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 55; -pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 56; -pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 57; -pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 58; -pub const ggml_op_GGML_OP_COUNT: ggml_op = 59; +pub const ggml_op_GGML_OP_CONCAT: ggml_op = 17; +pub const ggml_op_GGML_OP_SILU_BACK: ggml_op = 18; +pub const ggml_op_GGML_OP_NORM: ggml_op = 19; +pub const ggml_op_GGML_OP_RMS_NORM: ggml_op = 20; +pub const ggml_op_GGML_OP_RMS_NORM_BACK: ggml_op = 21; +pub const ggml_op_GGML_OP_GROUP_NORM: ggml_op = 22; +pub const ggml_op_GGML_OP_MUL_MAT: ggml_op = 23; +pub const ggml_op_GGML_OP_OUT_PROD: ggml_op = 24; +pub const ggml_op_GGML_OP_SCALE: ggml_op = 25; +pub const ggml_op_GGML_OP_SET: ggml_op = 26; +pub const ggml_op_GGML_OP_CPY: ggml_op = 27; +pub const ggml_op_GGML_OP_CONT: ggml_op = 28; +pub const ggml_op_GGML_OP_RESHAPE: ggml_op = 29; +pub const ggml_op_GGML_OP_VIEW: ggml_op = 30; +pub const ggml_op_GGML_OP_PERMUTE: ggml_op = 31; +pub const ggml_op_GGML_OP_TRANSPOSE: ggml_op = 32; +pub const ggml_op_GGML_OP_GET_ROWS: ggml_op = 33; +pub const ggml_op_GGML_OP_GET_ROWS_BACK: ggml_op = 34; +pub const ggml_op_GGML_OP_DIAG: ggml_op = 35; +pub const ggml_op_GGML_OP_DIAG_MASK_INF: ggml_op = 36; +pub const ggml_op_GGML_OP_DIAG_MASK_ZERO: ggml_op = 37; +pub const ggml_op_GGML_OP_SOFT_MAX: ggml_op = 38; +pub const ggml_op_GGML_OP_SOFT_MAX_BACK: ggml_op = 39; +pub const ggml_op_GGML_OP_ROPE: ggml_op = 40; +pub const ggml_op_GGML_OP_ROPE_BACK: ggml_op = 41; +pub const ggml_op_GGML_OP_ALIBI: ggml_op = 42; +pub const ggml_op_GGML_OP_CLAMP: ggml_op = 43; +pub const ggml_op_GGML_OP_CONV_TRANSPOSE_1D: ggml_op = 44; +pub const ggml_op_GGML_OP_IM2COL: ggml_op = 45; +pub const ggml_op_GGML_OP_CONV_TRANSPOSE_2D: ggml_op = 46; +pub const ggml_op_GGML_OP_POOL_1D: ggml_op = 47; +pub const ggml_op_GGML_OP_POOL_2D: ggml_op = 48; +pub const ggml_op_GGML_OP_UPSCALE: ggml_op = 49; +pub const ggml_op_GGML_OP_FLASH_ATTN: ggml_op = 50; +pub const ggml_op_GGML_OP_FLASH_FF: ggml_op = 51; +pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 52; +pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 53; +pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 54; +pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 55; +pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 56; +pub const ggml_op_GGML_OP_UNARY: ggml_op = 57; +pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 58; +pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 59; +pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 60; +pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 61; +pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 62; +pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 63; +pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 64; +pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 65; +pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 66; +pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 67; +pub const ggml_op_GGML_OP_COUNT: ggml_op = 68; pub type ggml_op = ::std::os::raw::c_uint; pub const ggml_unary_op_GGML_UNARY_OP_ABS: ggml_unary_op = 0; pub const ggml_unary_op_GGML_UNARY_OP_SGN: ggml_unary_op = 1; @@ -456,11 +511,16 @@ pub const ggml_unary_op_GGML_UNARY_OP_RELU: ggml_unary_op = 6; pub const ggml_unary_op_GGML_UNARY_OP_GELU: ggml_unary_op = 7; pub const ggml_unary_op_GGML_UNARY_OP_GELU_QUICK: ggml_unary_op = 8; pub const ggml_unary_op_GGML_UNARY_OP_SILU: ggml_unary_op = 9; +pub const ggml_unary_op_GGML_UNARY_OP_LEAKY: ggml_unary_op = 10; pub type ggml_unary_op = ::std::os::raw::c_uint; pub const ggml_object_type_GGML_OBJECT_TENSOR: ggml_object_type = 0; pub const ggml_object_type_GGML_OBJECT_GRAPH: ggml_object_type = 1; pub const ggml_object_type_GGML_OBJECT_WORK_BUFFER: ggml_object_type = 2; pub type ggml_object_type = ::std::os::raw::c_uint; +pub const ggml_log_level_GGML_LOG_LEVEL_ERROR: ggml_log_level = 2; +pub const ggml_log_level_GGML_LOG_LEVEL_WARN: ggml_log_level = 3; +pub const ggml_log_level_GGML_LOG_LEVEL_INFO: ggml_log_level = 4; +pub type ggml_log_level = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct ggml_object { @@ -540,22 +600,25 @@ pub const GGML_OBJECT_SIZE: usize = 32; #[derive(Debug, Copy, Clone)] pub struct ggml_tensor { pub type_: ggml_type, - pub backend: ggml_backend, + pub backend: ggml_backend_type, + pub buffer: *mut ggml_backend_buffer, pub n_dims: ::std::os::raw::c_int, pub ne: [i64; 4usize], pub nb: [usize; 4usize], pub op: ggml_op, - pub op_params: [i32; 8usize], + pub op_params: [i32; 16usize], pub is_param: bool, pub grad: *mut ggml_tensor, pub src: [*mut ggml_tensor; 6usize], pub perf_runs: ::std::os::raw::c_int, pub perf_cycles: i64, pub perf_time_us: i64, + pub view_src: *mut ggml_tensor, + pub view_offs: usize, pub data: *mut ::std::os::raw::c_void, - pub name: [::std::os::raw::c_char; 48usize], + pub name: [::std::os::raw::c_char; 64usize], pub extra: *mut ::std::os::raw::c_void, - pub padding: [::std::os::raw::c_char; 4usize], + pub padding: [::std::os::raw::c_char; 12usize], } #[test] fn bindgen_test_layout_ggml_tensor() { @@ -563,7 +626,7 @@ fn bindgen_test_layout_ggml_tensor() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 272usize, + 352usize, concat!("Size of: ", stringify!(ggml_tensor)) ); assert_eq!( @@ -592,8 +655,18 @@ fn bindgen_test_layout_ggml_tensor() { ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).n_dims) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).buffer) as usize - ptr as usize }, 8usize, + concat!( + "Offset of field: ", + stringify!(ggml_tensor), + "::", + stringify!(buffer) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_dims) as usize - ptr as usize }, + 16usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -603,7 +676,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).ne) as usize - ptr as usize }, - 16usize, + 24usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -613,7 +686,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).nb) as usize - ptr as usize }, - 48usize, + 56usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -623,7 +696,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).op) as usize - ptr as usize }, - 80usize, + 88usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -633,7 +706,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).op_params) as usize - ptr as usize }, - 84usize, + 92usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -643,7 +716,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).is_param) as usize - ptr as usize }, - 116usize, + 156usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -653,7 +726,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).grad) as usize - ptr as usize }, - 120usize, + 160usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -663,7 +736,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).src) as usize - ptr as usize }, - 128usize, + 168usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -673,7 +746,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).perf_runs) as usize - ptr as usize }, - 176usize, + 216usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -683,7 +756,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).perf_cycles) as usize - ptr as usize }, - 184usize, + 224usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -693,7 +766,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).perf_time_us) as usize - ptr as usize }, - 192usize, + 232usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -701,9 +774,29 @@ fn bindgen_test_layout_ggml_tensor() { stringify!(perf_time_us) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).view_src) as usize - ptr as usize }, + 240usize, + concat!( + "Offset of field: ", + stringify!(ggml_tensor), + "::", + stringify!(view_src) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).view_offs) as usize - ptr as usize }, + 248usize, + concat!( + "Offset of field: ", + stringify!(ggml_tensor), + "::", + stringify!(view_offs) + ) + ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).data) as usize - ptr as usize }, - 200usize, + 256usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -713,7 +806,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).name) as usize - ptr as usize }, - 208usize, + 264usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -723,7 +816,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).extra) as usize - ptr as usize }, - 256usize, + 328usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -733,7 +826,7 @@ fn bindgen_test_layout_ggml_tensor() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).padding) as usize - ptr as usize }, - 264usize, + 336usize, concat!( "Offset of field: ", stringify!(ggml_tensor), @@ -742,14 +835,13 @@ fn bindgen_test_layout_ggml_tensor() { ) ); } -pub const GGML_TENSOR_SIZE: usize = 272; +pub const GGML_TENSOR_SIZE: usize = 352; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct ggml_cplan { pub work_size: usize, pub work_data: *mut u8, pub n_threads: ::std::os::raw::c_int, - pub n_tasks: [::std::os::raw::c_int; 4096usize], pub abort_callback: ::std::option::Option bool>, pub abort_callback_data: *mut ::std::os::raw::c_void, @@ -760,7 +852,7 @@ fn bindgen_test_layout_ggml_cplan() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 16424usize, + 40usize, concat!("Size of: ", stringify!(ggml_cplan)) ); assert_eq!( @@ -799,45 +891,82 @@ fn bindgen_test_layout_ggml_cplan() { ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).n_tasks) as usize - ptr as usize }, - 20usize, + unsafe { ::std::ptr::addr_of!((*ptr).abort_callback) as usize - ptr as usize }, + 24usize, concat!( "Offset of field: ", stringify!(ggml_cplan), "::", - stringify!(n_tasks) + stringify!(abort_callback) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).abort_callback) as usize - ptr as usize }, - 16408usize, + unsafe { ::std::ptr::addr_of!((*ptr).abort_callback_data) as usize - ptr as usize }, + 32usize, concat!( "Offset of field: ", stringify!(ggml_cplan), "::", - stringify!(abort_callback) + stringify!(abort_callback_data) ) ); +} +pub const ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT: ggml_cgraph_eval_order = 0; +pub const ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT: ggml_cgraph_eval_order = 1; +pub const ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_COUNT: ggml_cgraph_eval_order = 2; +pub type ggml_cgraph_eval_order = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct ggml_hash_set { + pub size: usize, + pub keys: *mut *mut ggml_tensor, +} +#[test] +fn bindgen_test_layout_ggml_hash_set() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).abort_callback_data) as usize - ptr as usize }, - 16416usize, + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(ggml_hash_set)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(ggml_hash_set)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).size) as usize - ptr as usize }, + 0usize, concat!( "Offset of field: ", - stringify!(ggml_cplan), + stringify!(ggml_hash_set), "::", - stringify!(abort_callback_data) + stringify!(size) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).keys) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(ggml_hash_set), + "::", + stringify!(keys) ) ); } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct ggml_cgraph { + pub size: ::std::os::raw::c_int, pub n_nodes: ::std::os::raw::c_int, pub n_leafs: ::std::os::raw::c_int, - pub nodes: [*mut ggml_tensor; 4096usize], - pub grads: [*mut ggml_tensor; 4096usize], - pub leafs: [*mut ggml_tensor; 4096usize], - pub visited_hash_table: [*mut ::std::os::raw::c_void; 8273usize], + pub nodes: *mut *mut ggml_tensor, + pub grads: *mut *mut ggml_tensor, + pub leafs: *mut *mut ggml_tensor, + pub visited_hash_table: ggml_hash_set, + pub order: ggml_cgraph_eval_order, pub perf_runs: ::std::os::raw::c_int, pub perf_cycles: i64, pub perf_time_us: i64, @@ -848,7 +977,7 @@ fn bindgen_test_layout_ggml_cgraph() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 164520usize, + 80usize, concat!("Size of: ", stringify!(ggml_cgraph)) ); assert_eq!( @@ -857,8 +986,18 @@ fn bindgen_test_layout_ggml_cgraph() { concat!("Alignment of ", stringify!(ggml_cgraph)) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).n_nodes) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).size) as usize - ptr as usize }, 0usize, + concat!( + "Offset of field: ", + stringify!(ggml_cgraph), + "::", + stringify!(size) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_nodes) as usize - ptr as usize }, + 4usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -868,7 +1007,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).n_leafs) as usize - ptr as usize }, - 4usize, + 8usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -878,7 +1017,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).nodes) as usize - ptr as usize }, - 8usize, + 16usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -888,7 +1027,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).grads) as usize - ptr as usize }, - 32776usize, + 24usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -898,7 +1037,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).leafs) as usize - ptr as usize }, - 65544usize, + 32usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -908,7 +1047,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).visited_hash_table) as usize - ptr as usize }, - 98312usize, + 40usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -916,9 +1055,19 @@ fn bindgen_test_layout_ggml_cgraph() { stringify!(visited_hash_table) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).order) as usize - ptr as usize }, + 56usize, + concat!( + "Offset of field: ", + stringify!(ggml_cgraph), + "::", + stringify!(order) + ) + ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).perf_runs) as usize - ptr as usize }, - 164496usize, + 60usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -928,7 +1077,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).perf_cycles) as usize - ptr as usize }, - 164504usize, + 64usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -938,7 +1087,7 @@ fn bindgen_test_layout_ggml_cgraph() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).perf_time_us) as usize - ptr as usize }, - 164512usize, + 72usize, concat!( "Offset of field: ", stringify!(ggml_cgraph), @@ -947,7 +1096,6 @@ fn bindgen_test_layout_ggml_cgraph() { ) ); } -pub const GGML_GRAPH_SIZE: usize = 164520; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct ggml_scratch { @@ -1145,6 +1293,9 @@ extern "C" { extern "C" { pub fn ggml_cycles_per_ms() -> i64; } +extern "C" { + pub fn ggml_print_backtrace(); +} extern "C" { pub fn ggml_numa_init(); } @@ -1166,6 +1317,9 @@ extern "C" { extern "C" { pub fn ggml_nbytes(tensor: *const ggml_tensor) -> usize; } +extern "C" { + pub fn ggml_nbytes_pad(tensor: *const ggml_tensor) -> usize; +} extern "C" { pub fn ggml_nbytes_split( tensor: *const ggml_tensor, @@ -1208,6 +1362,9 @@ extern "C" { extern "C" { pub fn ggml_is_permuted(tensor: *const ggml_tensor) -> bool; } +extern "C" { + pub fn ggml_are_same_shape(t0: *const ggml_tensor, t1: *const ggml_tensor) -> bool; +} extern "C" { pub fn ggml_tensor_overhead() -> usize; } @@ -1290,7 +1447,16 @@ extern "C" { pub fn ggml_dup_tensor(ctx: *mut ggml_context, src: *const ggml_tensor) -> *mut ggml_tensor; } extern "C" { - pub fn ggml_view_tensor(ctx: *mut ggml_context, src: *const ggml_tensor) -> *mut ggml_tensor; + pub fn ggml_view_tensor(ctx: *mut ggml_context, src: *mut ggml_tensor) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_get_first_tensor(ctx: *mut ggml_context) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_get_next_tensor( + ctx: *mut ggml_context, + tensor: *mut ggml_tensor, + ) -> *mut ggml_tensor; } extern "C" { pub fn ggml_get_tensor( @@ -1307,18 +1473,66 @@ extern "C" { extern "C" { pub fn ggml_set_f32(tensor: *mut ggml_tensor, value: f32) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_unravel_index( + tensor: *const ggml_tensor, + i: i64, + i0: *mut i64, + i1: *mut i64, + i2: *mut i64, + i3: *mut i64, + ); +} extern "C" { pub fn ggml_get_i32_1d(tensor: *const ggml_tensor, i: ::std::os::raw::c_int) -> i32; } extern "C" { pub fn ggml_set_i32_1d(tensor: *const ggml_tensor, i: ::std::os::raw::c_int, value: i32); } +extern "C" { + pub fn ggml_get_i32_nd( + tensor: *const ggml_tensor, + i0: ::std::os::raw::c_int, + i1: ::std::os::raw::c_int, + i2: ::std::os::raw::c_int, + i3: ::std::os::raw::c_int, + ) -> i32; +} +extern "C" { + pub fn ggml_set_i32_nd( + tensor: *const ggml_tensor, + i0: ::std::os::raw::c_int, + i1: ::std::os::raw::c_int, + i2: ::std::os::raw::c_int, + i3: ::std::os::raw::c_int, + value: i32, + ); +} extern "C" { pub fn ggml_get_f32_1d(tensor: *const ggml_tensor, i: ::std::os::raw::c_int) -> f32; } extern "C" { pub fn ggml_set_f32_1d(tensor: *const ggml_tensor, i: ::std::os::raw::c_int, value: f32); } +extern "C" { + pub fn ggml_get_f32_nd( + tensor: *const ggml_tensor, + i0: ::std::os::raw::c_int, + i1: ::std::os::raw::c_int, + i2: ::std::os::raw::c_int, + i3: ::std::os::raw::c_int, + ) -> f32; +} +extern "C" { + pub fn ggml_set_f32_nd( + tensor: *const ggml_tensor, + i0: ::std::os::raw::c_int, + i1: ::std::os::raw::c_int, + i2: ::std::os::raw::c_int, + i3: ::std::os::raw::c_int, + value: f32, + ); +} extern "C" { pub fn ggml_get_data(tensor: *const ggml_tensor) -> *mut ::std::os::raw::c_void; } @@ -1364,6 +1578,14 @@ extern "C" { b: *mut ggml_tensor, ) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_add_cast( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + type_: ggml_type, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_add1( ctx: *mut ggml_context, @@ -1486,6 +1708,13 @@ extern "C" { b: *mut ggml_tensor, ) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_concat( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_abs(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; } @@ -1525,6 +1754,9 @@ extern "C" { extern "C" { pub fn ggml_relu(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_leaky(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_relu_inplace(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; } @@ -1555,10 +1787,14 @@ extern "C" { ) -> *mut ggml_tensor; } extern "C" { - pub fn ggml_norm(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; + pub fn ggml_norm(ctx: *mut ggml_context, a: *mut ggml_tensor, eps: f32) -> *mut ggml_tensor; } extern "C" { - pub fn ggml_norm_inplace(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; + pub fn ggml_norm_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + eps: f32, + ) -> *mut ggml_tensor; } extern "C" { pub fn ggml_rms_norm(ctx: *mut ggml_context, a: *mut ggml_tensor, eps: f32) @@ -1571,11 +1807,26 @@ extern "C" { eps: f32, ) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_group_norm( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + n_groups: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_group_norm_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + n_groups: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_rms_norm_back( ctx: *mut ggml_context, a: *mut ggml_tensor, b: *mut ggml_tensor, + eps: f32, ) -> *mut ggml_tensor; } extern "C" { @@ -1682,6 +1933,36 @@ extern "C" { extern "C" { pub fn ggml_cont_inplace(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_cont_1d(ctx: *mut ggml_context, a: *mut ggml_tensor, ne0: i64) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_cont_2d( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + ne0: i64, + ne1: i64, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_cont_3d( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + ne0: i64, + ne1: i64, + ne2: i64, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_cont_4d( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + ne0: i64, + ne1: i64, + ne2: i64, + ne3: i64, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_reshape( ctx: *mut ggml_context, @@ -1832,6 +2113,14 @@ extern "C" { extern "C" { pub fn ggml_soft_max_inplace(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_soft_max_ext( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + mask: *mut ggml_tensor, + scale: f32, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_soft_max_back( ctx: *mut ggml_context, @@ -1850,7 +2139,7 @@ extern "C" { pub fn ggml_rope( ctx: *mut ggml_context, a: *mut ggml_tensor, - n_past: ::std::os::raw::c_int, + b: *mut ggml_tensor, n_dims: ::std::os::raw::c_int, mode: ::std::os::raw::c_int, n_ctx: ::std::os::raw::c_int, @@ -1860,7 +2149,7 @@ extern "C" { pub fn ggml_rope_inplace( ctx: *mut ggml_context, a: *mut ggml_tensor, - n_past: ::std::os::raw::c_int, + b: *mut ggml_tensor, n_dims: ::std::os::raw::c_int, mode: ::std::os::raw::c_int, n_ctx: ::std::os::raw::c_int, @@ -1870,34 +2159,73 @@ extern "C" { pub fn ggml_rope_custom( ctx: *mut ggml_context, a: *mut ggml_tensor, - n_past: ::std::os::raw::c_int, + b: *mut ggml_tensor, n_dims: ::std::os::raw::c_int, mode: ::std::os::raw::c_int, n_ctx: ::std::os::raw::c_int, + n_orig_ctx: ::std::os::raw::c_int, freq_base: f32, freq_scale: f32, + ext_factor: f32, + attn_factor: f32, + beta_fast: f32, + beta_slow: f32, ) -> *mut ggml_tensor; } extern "C" { pub fn ggml_rope_custom_inplace( ctx: *mut ggml_context, a: *mut ggml_tensor, - n_past: ::std::os::raw::c_int, + b: *mut ggml_tensor, n_dims: ::std::os::raw::c_int, mode: ::std::os::raw::c_int, n_ctx: ::std::os::raw::c_int, + n_orig_ctx: ::std::os::raw::c_int, freq_base: f32, freq_scale: f32, + ext_factor: f32, + attn_factor: f32, + beta_fast: f32, + beta_slow: f32, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_rope_yarn_corr_dims( + n_dims: ::std::os::raw::c_int, + n_orig_ctx: ::std::os::raw::c_int, + freq_base: f32, + beta_fast: f32, + beta_slow: f32, + dims: *mut f32, + ); +} +extern "C" { + pub fn ggml_rope_xpos_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + n_dims: ::std::os::raw::c_int, + base: f32, + down: bool, ) -> *mut ggml_tensor; } extern "C" { pub fn ggml_rope_back( ctx: *mut ggml_context, a: *mut ggml_tensor, - n_past: ::std::os::raw::c_int, + b: *mut ggml_tensor, n_dims: ::std::os::raw::c_int, mode: ::std::os::raw::c_int, n_ctx: ::std::os::raw::c_int, + n_orig_ctx: ::std::os::raw::c_int, + freq_base: f32, + freq_scale: f32, + ext_factor: f32, + attn_factor: f32, + beta_fast: f32, + beta_slow: f32, + xpos_base: f32, + xpos_down: bool, ) -> *mut ggml_tensor; } extern "C" { @@ -1918,26 +2246,27 @@ extern "C" { ) -> *mut ggml_tensor; } extern "C" { - pub fn ggml_conv_1d( + pub fn ggml_im2col( ctx: *mut ggml_context, a: *mut ggml_tensor, b: *mut ggml_tensor, s0: ::std::os::raw::c_int, + s1: ::std::os::raw::c_int, p0: ::std::os::raw::c_int, + p1: ::std::os::raw::c_int, d0: ::std::os::raw::c_int, + d1: ::std::os::raw::c_int, + is_2D: bool, ) -> *mut ggml_tensor; } extern "C" { - pub fn ggml_conv_2d( + pub fn ggml_conv_1d( ctx: *mut ggml_context, a: *mut ggml_tensor, b: *mut ggml_tensor, s0: ::std::os::raw::c_int, - s1: ::std::os::raw::c_int, p0: ::std::os::raw::c_int, - p1: ::std::os::raw::c_int, d0: ::std::os::raw::c_int, - d1: ::std::os::raw::c_int, ) -> *mut ggml_tensor; } extern "C" { @@ -1949,31 +2278,83 @@ extern "C" { d: ::std::os::raw::c_int, ) -> *mut ggml_tensor; } -pub const ggml_op_pool_GGML_OP_POOL_MAX: ggml_op_pool = 0; -pub const ggml_op_pool_GGML_OP_POOL_AVG: ggml_op_pool = 1; -pub const ggml_op_pool_GGML_OP_POOL_COUNT: ggml_op_pool = 2; -pub type ggml_op_pool = ::std::os::raw::c_uint; extern "C" { - pub fn ggml_pool_1d( + pub fn ggml_conv_transpose_1d( ctx: *mut ggml_context, a: *mut ggml_tensor, - op: ggml_op_pool, - k0: ::std::os::raw::c_int, + b: *mut ggml_tensor, s0: ::std::os::raw::c_int, p0: ::std::os::raw::c_int, + d0: ::std::os::raw::c_int, ) -> *mut ggml_tensor; } extern "C" { - pub fn ggml_pool_2d( + pub fn ggml_conv_2d( ctx: *mut ggml_context, a: *mut ggml_tensor, - op: ggml_op_pool, - k0: ::std::os::raw::c_int, - k1: ::std::os::raw::c_int, + b: *mut ggml_tensor, s0: ::std::os::raw::c_int, s1: ::std::os::raw::c_int, p0: ::std::os::raw::c_int, p1: ::std::os::raw::c_int, + d0: ::std::os::raw::c_int, + d1: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_conv_2d_sk_p0( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_conv_2d_s1_ph( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_conv_transpose_2d_p0( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + stride: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} +pub const ggml_op_pool_GGML_OP_POOL_MAX: ggml_op_pool = 0; +pub const ggml_op_pool_GGML_OP_POOL_AVG: ggml_op_pool = 1; +pub const ggml_op_pool_GGML_OP_POOL_COUNT: ggml_op_pool = 2; +pub type ggml_op_pool = ::std::os::raw::c_uint; +extern "C" { + pub fn ggml_pool_1d( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + op: ggml_op_pool, + k0: ::std::os::raw::c_int, + s0: ::std::os::raw::c_int, + p0: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_pool_2d( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + op: ggml_op_pool, + k0: ::std::os::raw::c_int, + k1: ::std::os::raw::c_int, + s0: ::std::os::raw::c_int, + s1: ::std::os::raw::c_int, + p0: f32, + p1: f32, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_upscale( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + scale_factor: ::std::os::raw::c_int, ) -> *mut ggml_tensor; } extern "C" { @@ -2021,6 +2402,44 @@ extern "C" { w: ::std::os::raw::c_int, ) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_unary( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + op: ggml_unary_op, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_unary_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + op: ggml_unary_op, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_get_rel_pos( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + qh: ::std::os::raw::c_int, + kh: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_add_rel_pos( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + pw: *mut ggml_tensor, + ph: *mut ggml_tensor, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_add_rel_pos_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + pw: *mut ggml_tensor, + ph: *mut ggml_tensor, + ) -> *mut ggml_tensor; +} pub type ggml_unary_op_f32_t = ::std::option::Option< unsafe extern "C" fn(arg1: ::std::os::raw::c_int, arg2: *mut f32, arg3: *const f32), >; @@ -2049,20 +2468,6 @@ pub type ggml_custom3_op_f32_t = ::std::option::Option< arg4: *const ggml_tensor, ), >; -extern "C" { - pub fn ggml_unary( - ctx: *mut ggml_context, - a: *mut ggml_tensor, - op: ggml_unary_op, - ) -> *mut ggml_tensor; -} -extern "C" { - pub fn ggml_unary_inplace( - ctx: *mut ggml_context, - a: *mut ggml_tensor, - op: ggml_unary_op, - ) -> *mut ggml_tensor; -} extern "C" { pub fn ggml_map_unary_f32( ctx: *mut ggml_context, @@ -2141,6 +2546,96 @@ extern "C" { fun: ggml_custom3_op_f32_t, ) -> *mut ggml_tensor; } +pub type ggml_custom1_op_t = ::std::option::Option< + unsafe extern "C" fn( + dst: *mut ggml_tensor, + a: *const ggml_tensor, + ith: ::std::os::raw::c_int, + nth: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ), +>; +pub type ggml_custom2_op_t = ::std::option::Option< + unsafe extern "C" fn( + dst: *mut ggml_tensor, + a: *const ggml_tensor, + b: *const ggml_tensor, + ith: ::std::os::raw::c_int, + nth: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ), +>; +pub type ggml_custom3_op_t = ::std::option::Option< + unsafe extern "C" fn( + dst: *mut ggml_tensor, + a: *const ggml_tensor, + b: *const ggml_tensor, + c: *const ggml_tensor, + ith: ::std::os::raw::c_int, + nth: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ), +>; +extern "C" { + pub fn ggml_map_custom1( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + fun: ggml_custom1_op_t, + n_tasks: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_map_custom1_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + fun: ggml_custom1_op_t, + n_tasks: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_map_custom2( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + fun: ggml_custom2_op_t, + n_tasks: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_map_custom2_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + fun: ggml_custom2_op_t, + n_tasks: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_map_custom3( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + c: *mut ggml_tensor, + fun: ggml_custom3_op_t, + n_tasks: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn ggml_map_custom3_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + b: *mut ggml_tensor, + c: *mut ggml_tensor, + fun: ggml_custom3_op_t, + n_tasks: ::std::os::raw::c_int, + userdata: *mut ::std::os::raw::c_void, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_cross_entropy_loss( ctx: *mut ggml_context, @@ -2163,27 +2658,49 @@ extern "C" { pub fn ggml_build_forward_expand(cgraph: *mut ggml_cgraph, tensor: *mut ggml_tensor); } extern "C" { - pub fn ggml_build_forward(tensor: *mut ggml_tensor) -> ggml_cgraph; -} -extern "C" { - pub fn ggml_build_backward( + pub fn ggml_build_backward_expand( ctx: *mut ggml_context, gf: *mut ggml_cgraph, + gb: *mut ggml_cgraph, keep: bool, - ) -> ggml_cgraph; + ); } extern "C" { pub fn ggml_new_graph(ctx: *mut ggml_context) -> *mut ggml_cgraph; } extern "C" { - pub fn ggml_build_forward_ctx( + pub fn ggml_new_graph_custom( ctx: *mut ggml_context, - tensor: *mut ggml_tensor, + size: usize, + grads: bool, ) -> *mut ggml_cgraph; } +extern "C" { + pub fn ggml_graph_dup(ctx: *mut ggml_context, cgraph: *mut ggml_cgraph) -> *mut ggml_cgraph; +} +extern "C" { + pub fn ggml_graph_view( + ctx: *mut ggml_context, + cgraph: *mut ggml_cgraph, + i0: ::std::os::raw::c_int, + i1: ::std::os::raw::c_int, + ) -> *mut ggml_cgraph; +} +extern "C" { + pub fn ggml_graph_cpy(src: *mut ggml_cgraph, dst: *mut ggml_cgraph); +} +extern "C" { + pub fn ggml_graph_reset(cgraph: *mut ggml_cgraph); +} +extern "C" { + pub fn ggml_graph_clear(cgraph: *mut ggml_cgraph); +} extern "C" { pub fn ggml_graph_overhead() -> usize; } +extern "C" { + pub fn ggml_graph_overhead_custom(size: usize, grads: bool) -> usize; +} extern "C" { pub fn ggml_graph_plan( cgraph: *mut ggml_cgraph, @@ -2196,9 +2713,6 @@ extern "C" { cplan: *mut ggml_cplan, ) -> ::std::os::raw::c_int; } -extern "C" { - pub fn ggml_graph_reset(cgraph: *mut ggml_cgraph); -} extern "C" { pub fn ggml_graph_compute_with_ctx( ctx: *mut ggml_context, @@ -2220,7 +2734,7 @@ extern "C" { fname: *const ::std::os::raw::c_char, ctx_data: *mut *mut ggml_context, ctx_eval: *mut *mut ggml_context, - ) -> ggml_cgraph; + ) -> *mut ggml_cgraph; } extern "C" { pub fn ggml_graph_print(cgraph: *const ggml_cgraph); @@ -2232,6 +2746,16 @@ extern "C" { filename: *const ::std::os::raw::c_char, ); } +extern "C" { + pub fn ggml_build_backward_gradient_checkpointing( + ctx: *mut ggml_context, + gf: *mut ggml_cgraph, + gb: *mut ggml_cgraph, + gb_tmp: *mut ggml_cgraph, + checkpoints: *mut *mut ggml_tensor, + n_checkpoints: ::std::os::raw::c_int, + ); +} pub const ggml_opt_type_GGML_OPT_ADAM: ggml_opt_type = 0; pub const ggml_opt_type_GGML_OPT_LBFGS: ggml_opt_type = 1; pub type ggml_opt_type = ::std::os::raw::c_uint; @@ -2245,22 +2769,40 @@ pub const ggml_opt_result_GGML_OPT_DID_NOT_CONVERGE: ggml_opt_result = 1; pub const ggml_opt_result_GGML_OPT_NO_CONTEXT: ggml_opt_result = 2; pub const ggml_opt_result_GGML_OPT_INVALID_WOLFE: ggml_opt_result = 3; pub const ggml_opt_result_GGML_OPT_FAIL: ggml_opt_result = 4; +pub const ggml_opt_result_GGML_OPT_CANCEL: ggml_opt_result = 5; pub const ggml_opt_result_GGML_LINESEARCH_FAIL: ggml_opt_result = -128; pub const ggml_opt_result_GGML_LINESEARCH_MINIMUM_STEP: ggml_opt_result = -127; pub const ggml_opt_result_GGML_LINESEARCH_MAXIMUM_STEP: ggml_opt_result = -126; pub const ggml_opt_result_GGML_LINESEARCH_MAXIMUM_ITERATIONS: ggml_opt_result = -125; pub const ggml_opt_result_GGML_LINESEARCH_INVALID_PARAMETERS: ggml_opt_result = -124; pub type ggml_opt_result = ::std::os::raw::c_int; +pub type ggml_opt_callback = ::std::option::Option< + unsafe extern "C" fn( + data: *mut ::std::os::raw::c_void, + accum_step: ::std::os::raw::c_int, + sched: *mut f32, + cancel: *mut bool, + ), +>; +pub type ggml_log_callback = ::std::option::Option< + unsafe extern "C" fn( + level: ggml_log_level, + text: *const ::std::os::raw::c_char, + user_data: *mut ::std::os::raw::c_void, + ), +>; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct ggml_opt_params { pub type_: ggml_opt_type, + pub graph_size: usize, pub n_threads: ::std::os::raw::c_int, pub past: ::std::os::raw::c_int, pub delta: f32, pub max_no_improvement: ::std::os::raw::c_int, pub print_forward_graph: bool, pub print_backward_graph: bool, + pub n_gradient_accumulation: ::std::os::raw::c_int, pub adam: ggml_opt_params__bindgen_ty_1, pub lbfgs: ggml_opt_params__bindgen_ty_2, } @@ -2270,12 +2812,14 @@ pub struct ggml_opt_params__bindgen_ty_1 { pub n_iter: ::std::os::raw::c_int, pub sched: f32, pub decay: f32, + pub decay_min_ndim: ::std::os::raw::c_int, pub alpha: f32, pub beta1: f32, pub beta2: f32, pub eps: f32, pub eps_f: f32, pub eps_g: f32, + pub gclip: f32, } #[test] fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { @@ -2284,7 +2828,7 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 36usize, + 44usize, concat!("Size of: ", stringify!(ggml_opt_params__bindgen_ty_1)) ); assert_eq!( @@ -2323,8 +2867,18 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).alpha) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).decay_min_ndim) as usize - ptr as usize }, 12usize, + concat!( + "Offset of field: ", + stringify!(ggml_opt_params__bindgen_ty_1), + "::", + stringify!(decay_min_ndim) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).alpha) as usize - ptr as usize }, + 16usize, concat!( "Offset of field: ", stringify!(ggml_opt_params__bindgen_ty_1), @@ -2334,7 +2888,7 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).beta1) as usize - ptr as usize }, - 16usize, + 20usize, concat!( "Offset of field: ", stringify!(ggml_opt_params__bindgen_ty_1), @@ -2344,7 +2898,7 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).beta2) as usize - ptr as usize }, - 20usize, + 24usize, concat!( "Offset of field: ", stringify!(ggml_opt_params__bindgen_ty_1), @@ -2354,7 +2908,7 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).eps) as usize - ptr as usize }, - 24usize, + 28usize, concat!( "Offset of field: ", stringify!(ggml_opt_params__bindgen_ty_1), @@ -2364,7 +2918,7 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).eps_f) as usize - ptr as usize }, - 28usize, + 32usize, concat!( "Offset of field: ", stringify!(ggml_opt_params__bindgen_ty_1), @@ -2374,7 +2928,7 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).eps_g) as usize - ptr as usize }, - 32usize, + 36usize, concat!( "Offset of field: ", stringify!(ggml_opt_params__bindgen_ty_1), @@ -2382,6 +2936,16 @@ fn bindgen_test_layout_ggml_opt_params__bindgen_ty_1() { stringify!(eps_g) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).gclip) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(ggml_opt_params__bindgen_ty_1), + "::", + stringify!(gclip) + ) + ); } #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -2508,12 +3072,12 @@ fn bindgen_test_layout_ggml_opt_params() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 96usize, + 120usize, concat!("Size of: ", stringify!(ggml_opt_params)) ); assert_eq!( ::std::mem::align_of::(), - 4usize, + 8usize, concat!("Alignment of ", stringify!(ggml_opt_params)) ); assert_eq!( @@ -2526,9 +3090,19 @@ fn bindgen_test_layout_ggml_opt_params() { stringify!(type_) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).graph_size) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(ggml_opt_params), + "::", + stringify!(graph_size) + ) + ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).n_threads) as usize - ptr as usize }, - 4usize, + 16usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2538,7 +3112,7 @@ fn bindgen_test_layout_ggml_opt_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).past) as usize - ptr as usize }, - 8usize, + 20usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2548,7 +3122,7 @@ fn bindgen_test_layout_ggml_opt_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).delta) as usize - ptr as usize }, - 12usize, + 24usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2558,7 +3132,7 @@ fn bindgen_test_layout_ggml_opt_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).max_no_improvement) as usize - ptr as usize }, - 16usize, + 28usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2568,7 +3142,7 @@ fn bindgen_test_layout_ggml_opt_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).print_forward_graph) as usize - ptr as usize }, - 20usize, + 32usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2578,7 +3152,7 @@ fn bindgen_test_layout_ggml_opt_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).print_backward_graph) as usize - ptr as usize }, - 21usize, + 33usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2586,9 +3160,19 @@ fn bindgen_test_layout_ggml_opt_params() { stringify!(print_backward_graph) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_gradient_accumulation) as usize - ptr as usize }, + 36usize, + concat!( + "Offset of field: ", + stringify!(ggml_opt_params), + "::", + stringify!(n_gradient_accumulation) + ) + ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).adam) as usize - ptr as usize }, - 24usize, + 40usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2598,7 +3182,7 @@ fn bindgen_test_layout_ggml_opt_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).lbfgs) as usize - ptr as usize }, - 60usize, + 84usize, concat!( "Offset of field: ", stringify!(ggml_opt_params), @@ -2615,19 +3199,17 @@ pub struct ggml_opt_context { pub iter: ::std::os::raw::c_int, pub nx: i64, pub just_initialized: bool, + pub loss_before: f32, + pub loss_after: f32, pub adam: ggml_opt_context__bindgen_ty_1, pub lbfgs: ggml_opt_context__bindgen_ty_2, } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct ggml_opt_context__bindgen_ty_1 { - pub x: *mut ggml_tensor, - pub g1: *mut ggml_tensor, - pub g2: *mut ggml_tensor, + pub g: *mut ggml_tensor, pub m: *mut ggml_tensor, pub v: *mut ggml_tensor, - pub mh: *mut ggml_tensor, - pub vh: *mut ggml_tensor, pub pf: *mut ggml_tensor, pub fx_best: f32, pub fx_prev: f32, @@ -2640,7 +3222,7 @@ fn bindgen_test_layout_ggml_opt_context__bindgen_ty_1() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 80usize, + 48usize, concat!("Size of: ", stringify!(ggml_opt_context__bindgen_ty_1)) ); assert_eq!( @@ -2649,78 +3231,38 @@ fn bindgen_test_layout_ggml_opt_context__bindgen_ty_1() { concat!("Alignment of ", stringify!(ggml_opt_context__bindgen_ty_1)) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).x) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).g) as usize - ptr as usize }, 0usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), "::", - stringify!(x) + stringify!(g) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).g1) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).m) as usize - ptr as usize }, 8usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), "::", - stringify!(g1) + stringify!(m) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).g2) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).v) as usize - ptr as usize }, 16usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), "::", - stringify!(g2) + stringify!(v) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).m) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).pf) as usize - ptr as usize }, 24usize, - concat!( - "Offset of field: ", - stringify!(ggml_opt_context__bindgen_ty_1), - "::", - stringify!(m) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).v) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(ggml_opt_context__bindgen_ty_1), - "::", - stringify!(v) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).mh) as usize - ptr as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(ggml_opt_context__bindgen_ty_1), - "::", - stringify!(mh) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).vh) as usize - ptr as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(ggml_opt_context__bindgen_ty_1), - "::", - stringify!(vh) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pf) as usize - ptr as usize }, - 56usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), @@ -2730,7 +3272,7 @@ fn bindgen_test_layout_ggml_opt_context__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).fx_best) as usize - ptr as usize }, - 64usize, + 32usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), @@ -2740,7 +3282,7 @@ fn bindgen_test_layout_ggml_opt_context__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).fx_prev) as usize - ptr as usize }, - 68usize, + 36usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), @@ -2750,7 +3292,7 @@ fn bindgen_test_layout_ggml_opt_context__bindgen_ty_1() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).n_no_improvement) as usize - ptr as usize }, - 72usize, + 40usize, concat!( "Offset of field: ", stringify!(ggml_opt_context__bindgen_ty_1), @@ -2991,7 +3533,7 @@ fn bindgen_test_layout_ggml_opt_context() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).iter) as usize - ptr as usize }, - 104usize, + 128usize, concat!( "Offset of field: ", stringify!(ggml_opt_context), @@ -3001,7 +3543,7 @@ fn bindgen_test_layout_ggml_opt_context() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).nx) as usize - ptr as usize }, - 112usize, + 136usize, concat!( "Offset of field: ", stringify!(ggml_opt_context), @@ -3011,7 +3553,7 @@ fn bindgen_test_layout_ggml_opt_context() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).just_initialized) as usize - ptr as usize }, - 120usize, + 144usize, concat!( "Offset of field: ", stringify!(ggml_opt_context), @@ -3019,9 +3561,29 @@ fn bindgen_test_layout_ggml_opt_context() { stringify!(just_initialized) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).loss_before) as usize - ptr as usize }, + 148usize, + concat!( + "Offset of field: ", + stringify!(ggml_opt_context), + "::", + stringify!(loss_before) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).loss_after) as usize - ptr as usize }, + 152usize, + concat!( + "Offset of field: ", + stringify!(ggml_opt_context), + "::", + stringify!(loss_after) + ) + ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).adam) as usize - ptr as usize }, - 128usize, + 160usize, concat!( "Offset of field: ", stringify!(ggml_opt_context), @@ -3072,6 +3634,8 @@ extern "C" { f: *mut ggml_tensor, gf: *mut ggml_cgraph, gb: *mut ggml_cgraph, + callback: ggml_opt_callback, + callback_data: *mut ::std::os::raw::c_void, ) -> ggml_opt_result; } extern "C" { @@ -3120,267 +3684,1781 @@ extern "C" { ) -> usize; } extern "C" { - pub fn ggml_quantize_chunk( - type_: ggml_type, + pub fn ggml_quantize_q2_K( src: *const f32, dst: *mut ::std::os::raw::c_void, - start: ::std::os::raw::c_int, n: ::std::os::raw::c_int, + k: ::std::os::raw::c_int, hist: *mut i64, ) -> usize; } extern "C" { - pub fn ggml_cpu_has_avx() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_avx2() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_avx512() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_avx512_vbmi() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_avx512_vnni() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_fma() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_neon() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_arm_fma() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_f16c() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_fp16_va() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_wasm_simd() -> ::std::os::raw::c_int; -} -extern "C" { - pub fn ggml_cpu_has_blas() -> ::std::os::raw::c_int; + pub fn ggml_quantize_q3_K( + src: *const f32, + dst: *mut ::std::os::raw::c_void, + n: ::std::os::raw::c_int, + k: ::std::os::raw::c_int, + hist: *mut i64, + ) -> usize; } extern "C" { - pub fn ggml_cpu_has_cublas() -> ::std::os::raw::c_int; + pub fn ggml_quantize_q4_K( + src: *const f32, + dst: *mut ::std::os::raw::c_void, + n: ::std::os::raw::c_int, + k: ::std::os::raw::c_int, + hist: *mut i64, + ) -> usize; } extern "C" { - pub fn ggml_cpu_has_clblast() -> ::std::os::raw::c_int; + pub fn ggml_quantize_q5_K( + src: *const f32, + dst: *mut ::std::os::raw::c_void, + n: ::std::os::raw::c_int, + k: ::std::os::raw::c_int, + hist: *mut i64, + ) -> usize; } extern "C" { - pub fn ggml_cpu_has_gpublas() -> ::std::os::raw::c_int; + pub fn ggml_quantize_q6_K( + src: *const f32, + dst: *mut ::std::os::raw::c_void, + n: ::std::os::raw::c_int, + k: ::std::os::raw::c_int, + hist: *mut i64, + ) -> usize; } extern "C" { - pub fn ggml_cpu_has_sse3() -> ::std::os::raw::c_int; + pub fn ggml_quantize_chunk( + type_: ggml_type, + src: *const f32, + dst: *mut ::std::os::raw::c_void, + start: ::std::os::raw::c_int, + n: ::std::os::raw::c_int, + hist: *mut i64, + ) -> usize; } -extern "C" { - pub fn ggml_cpu_has_vsx() -> ::std::os::raw::c_int; +pub const gguf_type_GGUF_TYPE_UINT8: gguf_type = 0; +pub const gguf_type_GGUF_TYPE_INT8: gguf_type = 1; +pub const gguf_type_GGUF_TYPE_UINT16: gguf_type = 2; +pub const gguf_type_GGUF_TYPE_INT16: gguf_type = 3; +pub const gguf_type_GGUF_TYPE_UINT32: gguf_type = 4; +pub const gguf_type_GGUF_TYPE_INT32: gguf_type = 5; +pub const gguf_type_GGUF_TYPE_FLOAT32: gguf_type = 6; +pub const gguf_type_GGUF_TYPE_BOOL: gguf_type = 7; +pub const gguf_type_GGUF_TYPE_STRING: gguf_type = 8; +pub const gguf_type_GGUF_TYPE_ARRAY: gguf_type = 9; +pub const gguf_type_GGUF_TYPE_UINT64: gguf_type = 10; +pub const gguf_type_GGUF_TYPE_INT64: gguf_type = 11; +pub const gguf_type_GGUF_TYPE_FLOAT64: gguf_type = 12; +pub const gguf_type_GGUF_TYPE_COUNT: gguf_type = 13; +pub type gguf_type = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct gguf_context { + _unused: [u8; 0], } -pub type ggml_to_float_t = ::std::option::Option< - unsafe extern "C" fn(x: *const ::std::os::raw::c_void, y: *mut f32, k: ::std::os::raw::c_int), ->; -pub type ggml_from_float_t = ::std::option::Option< - unsafe extern "C" fn(x: *const f32, y: *mut ::std::os::raw::c_void, k: ::std::os::raw::c_int), ->; -pub type ggml_vec_dot_t = ::std::option::Option< - unsafe extern "C" fn( - n: ::std::os::raw::c_int, - s: *mut f32, - x: *const ::std::os::raw::c_void, - y: *const ::std::os::raw::c_void, - ), ->; #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct ggml_type_traits_t { - pub to_float: ggml_to_float_t, - pub from_float: ggml_from_float_t, - pub from_float_reference: ggml_from_float_t, - pub vec_dot: ggml_vec_dot_t, - pub vec_dot_type: ggml_type, +pub struct gguf_init_params { + pub no_alloc: bool, + pub ctx: *mut *mut ggml_context, } #[test] -fn bindgen_test_layout_ggml_type_traits_t() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); +fn bindgen_test_layout_gguf_init_params() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); let ptr = UNINIT.as_ptr(); assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(ggml_type_traits_t)) + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(gguf_init_params)) ); assert_eq!( - ::std::mem::align_of::(), + ::std::mem::align_of::(), 8usize, - concat!("Alignment of ", stringify!(ggml_type_traits_t)) + concat!("Alignment of ", stringify!(gguf_init_params)) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).to_float) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).no_alloc) as usize - ptr as usize }, 0usize, concat!( "Offset of field: ", - stringify!(ggml_type_traits_t), + stringify!(gguf_init_params), "::", - stringify!(to_float) + stringify!(no_alloc) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).from_float) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).ctx) as usize - ptr as usize }, 8usize, concat!( "Offset of field: ", - stringify!(ggml_type_traits_t), - "::", - stringify!(from_float) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).from_float_reference) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(ggml_type_traits_t), - "::", - stringify!(from_float_reference) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).vec_dot) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(ggml_type_traits_t), - "::", - stringify!(vec_dot) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).vec_dot_type) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(ggml_type_traits_t), + stringify!(gguf_init_params), "::", - stringify!(vec_dot_type) + stringify!(ctx) ) ); } extern "C" { - pub fn ggml_internal_get_type_traits(i: ggml_type) -> ggml_type_traits_t; + pub fn gguf_init_empty() -> *mut gguf_context; } extern "C" { - pub fn ggml_init_cublas(); + pub fn gguf_init_from_file( + fname: *const ::std::os::raw::c_char, + params: gguf_init_params, + ) -> *mut gguf_context; } extern "C" { - pub fn ggml_cuda_set_tensor_split(tensor_split: *const f32); + pub fn gguf_free(ctx: *mut gguf_context); } extern "C" { - pub fn ggml_cuda_mul(src0: *const ggml_tensor, src1: *const ggml_tensor, dst: *mut ggml_tensor); + pub fn gguf_type_name(type_: gguf_type) -> *const ::std::os::raw::c_char; } extern "C" { - pub fn ggml_cuda_can_mul_mat( - src0: *const ggml_tensor, - src1: *const ggml_tensor, - dst: *mut ggml_tensor, - ) -> bool; + pub fn gguf_get_version(ctx: *const gguf_context) -> ::std::os::raw::c_int; } extern "C" { - pub fn ggml_cuda_mul_mat_get_wsize( - src0: *const ggml_tensor, - src1: *const ggml_tensor, - dst: *mut ggml_tensor, - ) -> usize; + pub fn gguf_get_alignment(ctx: *const gguf_context) -> usize; } extern "C" { - pub fn ggml_cuda_mul_mat( - src0: *const ggml_tensor, - src1: *const ggml_tensor, - dst: *mut ggml_tensor, - wdata: *mut ::std::os::raw::c_void, - wsize: usize, - ); + pub fn gguf_get_data_offset(ctx: *const gguf_context) -> usize; } extern "C" { - pub fn ggml_cuda_host_malloc(size: usize) -> *mut ::std::os::raw::c_void; + pub fn gguf_get_data(ctx: *const gguf_context) -> *mut ::std::os::raw::c_void; } extern "C" { - pub fn ggml_cuda_host_free(ptr: *mut ::std::os::raw::c_void); + pub fn gguf_get_n_kv(ctx: *const gguf_context) -> ::std::os::raw::c_int; } extern "C" { - pub fn ggml_cuda_transform_tensor(data: *mut ::std::os::raw::c_void, tensor: *mut ggml_tensor); + pub fn gguf_find_key( + ctx: *const gguf_context, + key: *const ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; } extern "C" { - pub fn ggml_cuda_free_data(tensor: *mut ggml_tensor); + pub fn gguf_get_key( + ctx: *const gguf_context, + key_id: ::std::os::raw::c_int, + ) -> *const ::std::os::raw::c_char; } extern "C" { - pub fn ggml_cuda_assign_buffers(tensor: *mut ggml_tensor); + pub fn gguf_get_kv_type(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> gguf_type; } extern "C" { - pub fn ggml_cuda_assign_buffers_no_scratch(tensor: *mut ggml_tensor); + pub fn gguf_get_arr_type(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> gguf_type; } extern "C" { - pub fn ggml_cuda_assign_buffers_force_inplace(tensor: *mut ggml_tensor); + pub fn gguf_get_val_u8(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> u8; } extern "C" { - pub fn ggml_cuda_set_main_device(main_device: ::std::os::raw::c_int); + pub fn gguf_get_val_i8(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> i8; } extern "C" { - pub fn ggml_cuda_set_mul_mat_q(mul_mat_q: bool); + pub fn gguf_get_val_u16(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> u16; } extern "C" { - pub fn ggml_cuda_set_scratch_size(scratch_size: usize); + pub fn gguf_get_val_i16(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> i16; } extern "C" { - pub fn ggml_cuda_free_scratch(); + pub fn gguf_get_val_u32(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> u32; } extern "C" { - pub fn ggml_cuda_compute_forward( - params: *mut ggml_compute_params, - tensor: *mut ggml_tensor, - ) -> bool; + pub fn gguf_get_val_i32(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> i32; } -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct llama_model { - _unused: [u8; 0], +extern "C" { + pub fn gguf_get_val_f32(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> f32; } -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct llama_context { - _unused: [u8; 0], +extern "C" { + pub fn gguf_get_val_u64(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> u64; } -pub type llama_token = ::std::os::raw::c_int; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct llama_token_data { - pub id: llama_token, - pub logit: f32, - pub p: f32, +extern "C" { + pub fn gguf_get_val_i64(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> i64; } -#[test] -fn bindgen_test_layout_llama_token_data() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 12usize, - concat!("Size of: ", stringify!(llama_token_data)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(llama_token_data)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).id) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(llama_token_data), - "::", +extern "C" { + pub fn gguf_get_val_f64(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> f64; +} +extern "C" { + pub fn gguf_get_val_bool(ctx: *const gguf_context, key_id: ::std::os::raw::c_int) -> bool; +} +extern "C" { + pub fn gguf_get_val_str( + ctx: *const gguf_context, + key_id: ::std::os::raw::c_int, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn gguf_get_val_data( + ctx: *const gguf_context, + key_id: ::std::os::raw::c_int, + ) -> *const ::std::os::raw::c_void; +} +extern "C" { + pub fn gguf_get_arr_n( + ctx: *const gguf_context, + key_id: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn gguf_get_arr_data( + ctx: *const gguf_context, + key_id: ::std::os::raw::c_int, + ) -> *const ::std::os::raw::c_void; +} +extern "C" { + pub fn gguf_get_arr_str( + ctx: *const gguf_context, + key_id: ::std::os::raw::c_int, + i: ::std::os::raw::c_int, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn gguf_get_n_tensors(ctx: *const gguf_context) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn gguf_find_tensor( + ctx: *const gguf_context, + name: *const ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn gguf_get_tensor_offset(ctx: *const gguf_context, i: ::std::os::raw::c_int) -> usize; +} +extern "C" { + pub fn gguf_get_tensor_name( + ctx: *const gguf_context, + i: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn gguf_set_val_u8(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: u8); +} +extern "C" { + pub fn gguf_set_val_i8(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: i8); +} +extern "C" { + pub fn gguf_set_val_u16(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: u16); +} +extern "C" { + pub fn gguf_set_val_i16(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: i16); +} +extern "C" { + pub fn gguf_set_val_u32(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: u32); +} +extern "C" { + pub fn gguf_set_val_i32(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: i32); +} +extern "C" { + pub fn gguf_set_val_f32(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: f32); +} +extern "C" { + pub fn gguf_set_val_u64(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: u64); +} +extern "C" { + pub fn gguf_set_val_i64(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: i64); +} +extern "C" { + pub fn gguf_set_val_f64(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: f64); +} +extern "C" { + pub fn gguf_set_val_bool(ctx: *mut gguf_context, key: *const ::std::os::raw::c_char, val: bool); +} +extern "C" { + pub fn gguf_set_val_str( + ctx: *mut gguf_context, + key: *const ::std::os::raw::c_char, + val: *const ::std::os::raw::c_char, + ); +} +extern "C" { + pub fn gguf_set_arr_data( + ctx: *mut gguf_context, + key: *const ::std::os::raw::c_char, + type_: gguf_type, + data: *const ::std::os::raw::c_void, + n: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn gguf_set_arr_str( + ctx: *mut gguf_context, + key: *const ::std::os::raw::c_char, + data: *mut *const ::std::os::raw::c_char, + n: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn gguf_set_kv(ctx: *mut gguf_context, src: *mut gguf_context); +} +extern "C" { + pub fn gguf_add_tensor(ctx: *mut gguf_context, tensor: *const ggml_tensor); +} +extern "C" { + pub fn gguf_set_tensor_type( + ctx: *mut gguf_context, + name: *const ::std::os::raw::c_char, + type_: ggml_type, + ); +} +extern "C" { + pub fn gguf_set_tensor_data( + ctx: *mut gguf_context, + name: *const ::std::os::raw::c_char, + data: *const ::std::os::raw::c_void, + size: usize, + ); +} +extern "C" { + pub fn gguf_write_to_file( + ctx: *const gguf_context, + fname: *const ::std::os::raw::c_char, + only_meta: bool, + ); +} +extern "C" { + pub fn gguf_get_meta_size(ctx: *const gguf_context) -> usize; +} +extern "C" { + pub fn gguf_get_meta_data(ctx: *const gguf_context, data: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn ggml_cpu_has_avx() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_avx2() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_avx512() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_avx512_vbmi() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_avx512_vnni() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_fma() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_neon() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_arm_fma() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_metal() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_f16c() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_fp16_va() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_wasm_simd() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_blas() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_cublas() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_clblast() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_gpublas() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_sse3() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_ssse3() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ggml_cpu_has_vsx() -> ::std::os::raw::c_int; +} +pub type ggml_to_float_t = ::std::option::Option< + unsafe extern "C" fn(x: *const ::std::os::raw::c_void, y: *mut f32, k: ::std::os::raw::c_int), +>; +pub type ggml_from_float_t = ::std::option::Option< + unsafe extern "C" fn(x: *const f32, y: *mut ::std::os::raw::c_void, k: ::std::os::raw::c_int), +>; +pub type ggml_vec_dot_t = ::std::option::Option< + unsafe extern "C" fn( + n: ::std::os::raw::c_int, + s: *mut f32, + x: *const ::std::os::raw::c_void, + y: *const ::std::os::raw::c_void, + ), +>; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct ggml_type_traits_t { + pub type_name: *const ::std::os::raw::c_char, + pub blck_size: ::std::os::raw::c_int, + pub type_size: usize, + pub is_quantized: bool, + pub to_float: ggml_to_float_t, + pub from_float: ggml_from_float_t, + pub from_float_reference: ggml_from_float_t, + pub vec_dot: ggml_vec_dot_t, + pub vec_dot_type: ggml_type, +} +#[test] +fn bindgen_test_layout_ggml_type_traits_t() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 72usize, + concat!("Size of: ", stringify!(ggml_type_traits_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(ggml_type_traits_t)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_name) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(type_name) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).blck_size) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(blck_size) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_size) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(type_size) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).is_quantized) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(is_quantized) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).to_float) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(to_float) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).from_float) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(from_float) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).from_float_reference) as usize - ptr as usize }, + 48usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(from_float_reference) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vec_dot) as usize - ptr as usize }, + 56usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(vec_dot) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vec_dot_type) as usize - ptr as usize }, + 64usize, + concat!( + "Offset of field: ", + stringify!(ggml_type_traits_t), + "::", + stringify!(vec_dot_type) + ) + ); +} +extern "C" { + pub fn ggml_internal_get_type_traits(type_: ggml_type) -> ggml_type_traits_t; +} +pub type va_list = __builtin_va_list; +pub type __gnuc_va_list = __builtin_va_list; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct __mbstate_t { + pub __count: ::std::os::raw::c_int, + pub __value: __mbstate_t__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union __mbstate_t__bindgen_ty_1 { + pub __wch: ::std::os::raw::c_uint, + pub __wchb: [::std::os::raw::c_char; 4usize], +} +#[test] +fn bindgen_test_layout___mbstate_t__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit<__mbstate_t__bindgen_ty_1> = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<__mbstate_t__bindgen_ty_1>(), + 4usize, + concat!("Size of: ", stringify!(__mbstate_t__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::<__mbstate_t__bindgen_ty_1>(), + 4usize, + concat!("Alignment of ", stringify!(__mbstate_t__bindgen_ty_1)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__wch) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__mbstate_t__bindgen_ty_1), + "::", + stringify!(__wch) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__wchb) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__mbstate_t__bindgen_ty_1), + "::", + stringify!(__wchb) + ) + ); +} +#[test] +fn bindgen_test_layout___mbstate_t() { + const UNINIT: ::std::mem::MaybeUninit<__mbstate_t> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<__mbstate_t>(), + 8usize, + concat!("Size of: ", stringify!(__mbstate_t)) + ); + assert_eq!( + ::std::mem::align_of::<__mbstate_t>(), + 4usize, + concat!("Alignment of ", stringify!(__mbstate_t)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__count) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__mbstate_t), + "::", + stringify!(__count) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__value) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(__mbstate_t), + "::", + stringify!(__value) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _G_fpos_t { + pub __pos: __off_t, + pub __state: __mbstate_t, +} +#[test] +fn bindgen_test_layout__G_fpos_t() { + const UNINIT: ::std::mem::MaybeUninit<_G_fpos_t> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<_G_fpos_t>(), + 16usize, + concat!("Size of: ", stringify!(_G_fpos_t)) + ); + assert_eq!( + ::std::mem::align_of::<_G_fpos_t>(), + 8usize, + concat!("Alignment of ", stringify!(_G_fpos_t)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__pos) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(_G_fpos_t), + "::", + stringify!(__pos) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__state) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(_G_fpos_t), + "::", + stringify!(__state) + ) + ); +} +pub type __fpos_t = _G_fpos_t; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _G_fpos64_t { + pub __pos: __off64_t, + pub __state: __mbstate_t, +} +#[test] +fn bindgen_test_layout__G_fpos64_t() { + const UNINIT: ::std::mem::MaybeUninit<_G_fpos64_t> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<_G_fpos64_t>(), + 16usize, + concat!("Size of: ", stringify!(_G_fpos64_t)) + ); + assert_eq!( + ::std::mem::align_of::<_G_fpos64_t>(), + 8usize, + concat!("Alignment of ", stringify!(_G_fpos64_t)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__pos) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(_G_fpos64_t), + "::", + stringify!(__pos) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__state) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(_G_fpos64_t), + "::", + stringify!(__state) + ) + ); +} +pub type __fpos64_t = _G_fpos64_t; +pub type __FILE = _IO_FILE; +pub type FILE = _IO_FILE; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_marker { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_codecvt { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_wide_data { + _unused: [u8; 0], +} +pub type _IO_lock_t = ::std::os::raw::c_void; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_FILE { + pub _flags: ::std::os::raw::c_int, + pub _IO_read_ptr: *mut ::std::os::raw::c_char, + pub _IO_read_end: *mut ::std::os::raw::c_char, + pub _IO_read_base: *mut ::std::os::raw::c_char, + pub _IO_write_base: *mut ::std::os::raw::c_char, + pub _IO_write_ptr: *mut ::std::os::raw::c_char, + pub _IO_write_end: *mut ::std::os::raw::c_char, + pub _IO_buf_base: *mut ::std::os::raw::c_char, + pub _IO_buf_end: *mut ::std::os::raw::c_char, + pub _IO_save_base: *mut ::std::os::raw::c_char, + pub _IO_backup_base: *mut ::std::os::raw::c_char, + pub _IO_save_end: *mut ::std::os::raw::c_char, + pub _markers: *mut _IO_marker, + pub _chain: *mut _IO_FILE, + pub _fileno: ::std::os::raw::c_int, + pub _flags2: ::std::os::raw::c_int, + pub _old_offset: __off_t, + pub _cur_column: ::std::os::raw::c_ushort, + pub _vtable_offset: ::std::os::raw::c_schar, + pub _shortbuf: [::std::os::raw::c_char; 1usize], + pub _lock: *mut _IO_lock_t, + pub _offset: __off64_t, + pub _codecvt: *mut _IO_codecvt, + pub _wide_data: *mut _IO_wide_data, + pub _freeres_list: *mut _IO_FILE, + pub _freeres_buf: *mut ::std::os::raw::c_void, + pub __pad5: usize, + pub _mode: ::std::os::raw::c_int, + pub _unused2: [::std::os::raw::c_char; 20usize], +} +#[test] +fn bindgen_test_layout__IO_FILE() { + const UNINIT: ::std::mem::MaybeUninit<_IO_FILE> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<_IO_FILE>(), + 216usize, + concat!("Size of: ", stringify!(_IO_FILE)) + ); + assert_eq!( + ::std::mem::align_of::<_IO_FILE>(), + 8usize, + concat!("Alignment of ", stringify!(_IO_FILE)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._flags) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_flags) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_read_ptr) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_read_ptr) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_read_end) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_read_end) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_read_base) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_read_base) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_write_base) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_write_base) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_write_ptr) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_write_ptr) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_write_end) as usize - ptr as usize }, + 48usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_write_end) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_buf_base) as usize - ptr as usize }, + 56usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_buf_base) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_buf_end) as usize - ptr as usize }, + 64usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_buf_end) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_save_base) as usize - ptr as usize }, + 72usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_save_base) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_backup_base) as usize - ptr as usize }, + 80usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_backup_base) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._IO_save_end) as usize - ptr as usize }, + 88usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_IO_save_end) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._markers) as usize - ptr as usize }, + 96usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_markers) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._chain) as usize - ptr as usize }, + 104usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_chain) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._fileno) as usize - ptr as usize }, + 112usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_fileno) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._flags2) as usize - ptr as usize }, + 116usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_flags2) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._old_offset) as usize - ptr as usize }, + 120usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_old_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._cur_column) as usize - ptr as usize }, + 128usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_cur_column) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._vtable_offset) as usize - ptr as usize }, + 130usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_vtable_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._shortbuf) as usize - ptr as usize }, + 131usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_shortbuf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._lock) as usize - ptr as usize }, + 136usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_lock) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._offset) as usize - ptr as usize }, + 144usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._codecvt) as usize - ptr as usize }, + 152usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_codecvt) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._wide_data) as usize - ptr as usize }, + 160usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_wide_data) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._freeres_list) as usize - ptr as usize }, + 168usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_freeres_list) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._freeres_buf) as usize - ptr as usize }, + 176usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_freeres_buf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).__pad5) as usize - ptr as usize }, + 184usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(__pad5) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._mode) as usize - ptr as usize }, + 192usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_mode) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr)._unused2) as usize - ptr as usize }, + 196usize, + concat!( + "Offset of field: ", + stringify!(_IO_FILE), + "::", + stringify!(_unused2) + ) + ); +} +pub type cookie_read_function_t = ::std::option::Option< + unsafe extern "C" fn( + __cookie: *mut ::std::os::raw::c_void, + __buf: *mut ::std::os::raw::c_char, + __nbytes: usize, + ) -> __ssize_t, +>; +pub type cookie_write_function_t = ::std::option::Option< + unsafe extern "C" fn( + __cookie: *mut ::std::os::raw::c_void, + __buf: *const ::std::os::raw::c_char, + __nbytes: usize, + ) -> __ssize_t, +>; +pub type cookie_seek_function_t = ::std::option::Option< + unsafe extern "C" fn( + __cookie: *mut ::std::os::raw::c_void, + __pos: *mut __off64_t, + __w: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int, +>; +pub type cookie_close_function_t = ::std::option::Option< + unsafe extern "C" fn(__cookie: *mut ::std::os::raw::c_void) -> ::std::os::raw::c_int, +>; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_cookie_io_functions_t { + pub read: cookie_read_function_t, + pub write: cookie_write_function_t, + pub seek: cookie_seek_function_t, + pub close: cookie_close_function_t, +} +#[test] +fn bindgen_test_layout__IO_cookie_io_functions_t() { + const UNINIT: ::std::mem::MaybeUninit<_IO_cookie_io_functions_t> = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<_IO_cookie_io_functions_t>(), + 32usize, + concat!("Size of: ", stringify!(_IO_cookie_io_functions_t)) + ); + assert_eq!( + ::std::mem::align_of::<_IO_cookie_io_functions_t>(), + 8usize, + concat!("Alignment of ", stringify!(_IO_cookie_io_functions_t)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).read) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(_IO_cookie_io_functions_t), + "::", + stringify!(read) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).write) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(_IO_cookie_io_functions_t), + "::", + stringify!(write) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).seek) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(_IO_cookie_io_functions_t), + "::", + stringify!(seek) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).close) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(_IO_cookie_io_functions_t), + "::", + stringify!(close) + ) + ); +} +pub type cookie_io_functions_t = _IO_cookie_io_functions_t; +pub type off_t = __off_t; +pub type off64_t = __off64_t; +pub type fpos_t = __fpos_t; +pub type fpos64_t = __fpos64_t; +extern "C" { + pub static mut stdin: *mut FILE; +} +extern "C" { + pub static mut stdout: *mut FILE; +} +extern "C" { + pub static mut stderr: *mut FILE; +} +extern "C" { + pub fn remove(__filename: *const ::std::os::raw::c_char) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rename( + __old: *const ::std::os::raw::c_char, + __new: *const ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn renameat( + __oldfd: ::std::os::raw::c_int, + __old: *const ::std::os::raw::c_char, + __newfd: ::std::os::raw::c_int, + __new: *const ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn renameat2( + __oldfd: ::std::os::raw::c_int, + __old: *const ::std::os::raw::c_char, + __newfd: ::std::os::raw::c_int, + __new: *const ::std::os::raw::c_char, + __flags: ::std::os::raw::c_uint, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fclose(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn tmpfile() -> *mut FILE; +} +extern "C" { + pub fn tmpfile64() -> *mut FILE; +} +extern "C" { + pub fn tmpnam(arg1: *mut ::std::os::raw::c_char) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn tmpnam_r(__s: *mut ::std::os::raw::c_char) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn tempnam( + __dir: *const ::std::os::raw::c_char, + __pfx: *const ::std::os::raw::c_char, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn fflush(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fflush_unlocked(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fcloseall() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fopen( + __filename: *const ::std::os::raw::c_char, + __modes: *const ::std::os::raw::c_char, + ) -> *mut FILE; +} +extern "C" { + pub fn freopen( + __filename: *const ::std::os::raw::c_char, + __modes: *const ::std::os::raw::c_char, + __stream: *mut FILE, + ) -> *mut FILE; +} +extern "C" { + pub fn fopen64( + __filename: *const ::std::os::raw::c_char, + __modes: *const ::std::os::raw::c_char, + ) -> *mut FILE; +} +extern "C" { + pub fn freopen64( + __filename: *const ::std::os::raw::c_char, + __modes: *const ::std::os::raw::c_char, + __stream: *mut FILE, + ) -> *mut FILE; +} +extern "C" { + pub fn fdopen(__fd: ::std::os::raw::c_int, __modes: *const ::std::os::raw::c_char) + -> *mut FILE; +} +extern "C" { + pub fn fopencookie( + __magic_cookie: *mut ::std::os::raw::c_void, + __modes: *const ::std::os::raw::c_char, + __io_funcs: cookie_io_functions_t, + ) -> *mut FILE; +} +extern "C" { + pub fn fmemopen( + __s: *mut ::std::os::raw::c_void, + __len: usize, + __modes: *const ::std::os::raw::c_char, + ) -> *mut FILE; +} +extern "C" { + pub fn open_memstream( + __bufloc: *mut *mut ::std::os::raw::c_char, + __sizeloc: *mut usize, + ) -> *mut FILE; +} +extern "C" { + pub fn setbuf(__stream: *mut FILE, __buf: *mut ::std::os::raw::c_char); +} +extern "C" { + pub fn setvbuf( + __stream: *mut FILE, + __buf: *mut ::std::os::raw::c_char, + __modes: ::std::os::raw::c_int, + __n: usize, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn setbuffer(__stream: *mut FILE, __buf: *mut ::std::os::raw::c_char, __size: usize); +} +extern "C" { + pub fn setlinebuf(__stream: *mut FILE); +} +extern "C" { + pub fn fprintf( + __stream: *mut FILE, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn printf(__format: *const ::std::os::raw::c_char, ...) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn sprintf( + __s: *mut ::std::os::raw::c_char, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vfprintf( + __s: *mut FILE, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vprintf( + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vsprintf( + __s: *mut ::std::os::raw::c_char, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn snprintf( + __s: *mut ::std::os::raw::c_char, + __maxlen: usize, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vsnprintf( + __s: *mut ::std::os::raw::c_char, + __maxlen: usize, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vasprintf( + __ptr: *mut *mut ::std::os::raw::c_char, + __f: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn __asprintf( + __ptr: *mut *mut ::std::os::raw::c_char, + __fmt: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn asprintf( + __ptr: *mut *mut ::std::os::raw::c_char, + __fmt: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vdprintf( + __fd: ::std::os::raw::c_int, + __fmt: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn dprintf( + __fd: ::std::os::raw::c_int, + __fmt: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fscanf( + __stream: *mut FILE, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn scanf(__format: *const ::std::os::raw::c_char, ...) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn sscanf( + __s: *const ::std::os::raw::c_char, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +pub type _Float32 = f32; +pub type _Float64 = f64; +pub type _Float32x = f64; +pub type _Float64x = u128; +extern "C" { + #[link_name = "\u{1}__isoc99_fscanf"] + pub fn fscanf1( + __stream: *mut FILE, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + #[link_name = "\u{1}__isoc99_scanf"] + pub fn scanf1(__format: *const ::std::os::raw::c_char, ...) -> ::std::os::raw::c_int; +} +extern "C" { + #[link_name = "\u{1}__isoc99_sscanf"] + pub fn sscanf1( + __s: *const ::std::os::raw::c_char, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vfscanf( + __s: *mut FILE, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vscanf( + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn vsscanf( + __s: *const ::std::os::raw::c_char, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + #[link_name = "\u{1}__isoc99_vfscanf"] + pub fn vfscanf1( + __s: *mut FILE, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + #[link_name = "\u{1}__isoc99_vscanf"] + pub fn vscanf1( + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + #[link_name = "\u{1}__isoc99_vsscanf"] + pub fn vsscanf1( + __s: *const ::std::os::raw::c_char, + __format: *const ::std::os::raw::c_char, + __arg: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fgetc(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getc(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getchar() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getc_unlocked(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getchar_unlocked() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fgetc_unlocked(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fputc(__c: ::std::os::raw::c_int, __stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn putc(__c: ::std::os::raw::c_int, __stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn putchar(__c: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fputc_unlocked(__c: ::std::os::raw::c_int, __stream: *mut FILE) + -> ::std::os::raw::c_int; +} +extern "C" { + pub fn putc_unlocked(__c: ::std::os::raw::c_int, __stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn putchar_unlocked(__c: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getw(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn putw(__w: ::std::os::raw::c_int, __stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fgets( + __s: *mut ::std::os::raw::c_char, + __n: ::std::os::raw::c_int, + __stream: *mut FILE, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn fgets_unlocked( + __s: *mut ::std::os::raw::c_char, + __n: ::std::os::raw::c_int, + __stream: *mut FILE, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn __getdelim( + __lineptr: *mut *mut ::std::os::raw::c_char, + __n: *mut usize, + __delimiter: ::std::os::raw::c_int, + __stream: *mut FILE, + ) -> __ssize_t; +} +extern "C" { + pub fn getdelim( + __lineptr: *mut *mut ::std::os::raw::c_char, + __n: *mut usize, + __delimiter: ::std::os::raw::c_int, + __stream: *mut FILE, + ) -> __ssize_t; +} +extern "C" { + pub fn getline( + __lineptr: *mut *mut ::std::os::raw::c_char, + __n: *mut usize, + __stream: *mut FILE, + ) -> __ssize_t; +} +extern "C" { + pub fn fputs(__s: *const ::std::os::raw::c_char, __stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn puts(__s: *const ::std::os::raw::c_char) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ungetc(__c: ::std::os::raw::c_int, __stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fread( + __ptr: *mut ::std::os::raw::c_void, + __size: usize, + __n: usize, + __stream: *mut FILE, + ) -> usize; +} +extern "C" { + pub fn fwrite( + __ptr: *const ::std::os::raw::c_void, + __size: usize, + __n: usize, + __s: *mut FILE, + ) -> usize; +} +extern "C" { + pub fn fputs_unlocked( + __s: *const ::std::os::raw::c_char, + __stream: *mut FILE, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fread_unlocked( + __ptr: *mut ::std::os::raw::c_void, + __size: usize, + __n: usize, + __stream: *mut FILE, + ) -> usize; +} +extern "C" { + pub fn fwrite_unlocked( + __ptr: *const ::std::os::raw::c_void, + __size: usize, + __n: usize, + __stream: *mut FILE, + ) -> usize; +} +extern "C" { + pub fn fseek( + __stream: *mut FILE, + __off: ::std::os::raw::c_long, + __whence: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ftell(__stream: *mut FILE) -> ::std::os::raw::c_long; +} +extern "C" { + pub fn rewind(__stream: *mut FILE); +} +extern "C" { + pub fn fseeko( + __stream: *mut FILE, + __off: __off_t, + __whence: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ftello(__stream: *mut FILE) -> __off_t; +} +extern "C" { + pub fn fgetpos(__stream: *mut FILE, __pos: *mut fpos_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fsetpos(__stream: *mut FILE, __pos: *const fpos_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fseeko64( + __stream: *mut FILE, + __off: __off64_t, + __whence: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ftello64(__stream: *mut FILE) -> __off64_t; +} +extern "C" { + pub fn fgetpos64(__stream: *mut FILE, __pos: *mut fpos64_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fsetpos64(__stream: *mut FILE, __pos: *const fpos64_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn clearerr(__stream: *mut FILE); +} +extern "C" { + pub fn feof(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ferror(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn clearerr_unlocked(__stream: *mut FILE); +} +extern "C" { + pub fn feof_unlocked(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn ferror_unlocked(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn perror(__s: *const ::std::os::raw::c_char); +} +extern "C" { + pub fn fileno(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn fileno_unlocked(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn pclose(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn popen( + __command: *const ::std::os::raw::c_char, + __modes: *const ::std::os::raw::c_char, + ) -> *mut FILE; +} +extern "C" { + pub fn ctermid(__s: *mut ::std::os::raw::c_char) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn cuserid(__s: *mut ::std::os::raw::c_char) -> *mut ::std::os::raw::c_char; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct obstack { + _unused: [u8; 0], +} +extern "C" { + pub fn obstack_printf( + __obstack: *mut obstack, + __format: *const ::std::os::raw::c_char, + ... + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn obstack_vprintf( + __obstack: *mut obstack, + __format: *const ::std::os::raw::c_char, + __args: *mut __va_list_tag, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn flockfile(__stream: *mut FILE); +} +extern "C" { + pub fn ftrylockfile(__stream: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn funlockfile(__stream: *mut FILE); +} +extern "C" { + pub fn __uflow(arg1: *mut FILE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn __overflow(arg1: *mut FILE, arg2: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_model { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_context { + _unused: [u8; 0], +} +pub type llama_pos = i32; +pub type llama_token = i32; +pub type llama_seq_id = i32; +pub const llama_vocab_type_LLAMA_VOCAB_TYPE_SPM: llama_vocab_type = 0; +pub const llama_vocab_type_LLAMA_VOCAB_TYPE_BPE: llama_vocab_type = 1; +pub type llama_vocab_type = ::std::os::raw::c_uint; +pub const llama_token_type_LLAMA_TOKEN_TYPE_UNDEFINED: llama_token_type = 0; +pub const llama_token_type_LLAMA_TOKEN_TYPE_NORMAL: llama_token_type = 1; +pub const llama_token_type_LLAMA_TOKEN_TYPE_UNKNOWN: llama_token_type = 2; +pub const llama_token_type_LLAMA_TOKEN_TYPE_CONTROL: llama_token_type = 3; +pub const llama_token_type_LLAMA_TOKEN_TYPE_USER_DEFINED: llama_token_type = 4; +pub const llama_token_type_LLAMA_TOKEN_TYPE_UNUSED: llama_token_type = 5; +pub const llama_token_type_LLAMA_TOKEN_TYPE_BYTE: llama_token_type = 6; +pub type llama_token_type = ::std::os::raw::c_uint; +pub const llama_ftype_LLAMA_FTYPE_ALL_F32: llama_ftype = 0; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_F16: llama_ftype = 1; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_0: llama_ftype = 2; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1: llama_ftype = 3; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: llama_ftype = 4; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q8_0: llama_ftype = 7; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_0: llama_ftype = 8; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_1: llama_ftype = 9; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q2_K: llama_ftype = 10; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_S: llama_ftype = 11; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_M: llama_ftype = 12; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_L: llama_ftype = 13; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_S: llama_ftype = 14; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_M: llama_ftype = 15; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_S: llama_ftype = 16; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_M: llama_ftype = 17; +pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q6_K: llama_ftype = 18; +pub const llama_ftype_LLAMA_FTYPE_GUESSED: llama_ftype = 1024; +pub type llama_ftype = ::std::os::raw::c_uint; +pub const llama_rope_scaling_type_LLAMA_ROPE_SCALING_UNSPECIFIED: llama_rope_scaling_type = -1; +pub const llama_rope_scaling_type_LLAMA_ROPE_SCALING_NONE: llama_rope_scaling_type = 0; +pub const llama_rope_scaling_type_LLAMA_ROPE_SCALING_LINEAR: llama_rope_scaling_type = 1; +pub const llama_rope_scaling_type_LLAMA_ROPE_SCALING_YARN: llama_rope_scaling_type = 2; +pub const llama_rope_scaling_type_LLAMA_ROPE_SCALING_MAX_VALUE: llama_rope_scaling_type = 2; +pub type llama_rope_scaling_type = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_token_data { + pub id: llama_token, + pub logit: f32, + pub p: f32, +} +#[test] +fn bindgen_test_layout_llama_token_data() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(llama_token_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(llama_token_data)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).id) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_token_data), + "::", stringify!(id) ) ); @@ -3462,26 +5540,259 @@ pub type llama_progress_callback = ::std::option::Option; #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct llama_context_params { - pub seed: u32, - pub n_ctx: i32, - pub n_batch: i32, - pub n_gqa: i32, - pub rms_norm_eps: f32, +pub struct llama_batch { + pub n_tokens: i32, + pub token: *mut llama_token, + pub embd: *mut f32, + pub pos: *mut llama_pos, + pub n_seq_id: *mut i32, + pub seq_id: *mut *mut llama_seq_id, + pub logits: *mut i8, + pub all_pos_0: llama_pos, + pub all_pos_1: llama_pos, + pub all_seq_id: llama_seq_id, +} +#[test] +fn bindgen_test_layout_llama_batch() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 72usize, + concat!("Size of: ", stringify!(llama_batch)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(llama_batch)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_tokens) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(n_tokens) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).token) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(token) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).embd) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(embd) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).pos) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(pos) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_seq_id) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(n_seq_id) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).seq_id) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(seq_id) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).logits) as usize - ptr as usize }, + 48usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(logits) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).all_pos_0) as usize - ptr as usize }, + 56usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(all_pos_0) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).all_pos_1) as usize - ptr as usize }, + 60usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(all_pos_1) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).all_seq_id) as usize - ptr as usize }, + 64usize, + concat!( + "Offset of field: ", + stringify!(llama_batch), + "::", + stringify!(all_seq_id) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_model_params { pub n_gpu_layers: i32, pub main_gpu: i32, pub tensor_split: *const f32, - pub rope_freq_base: f32, - pub rope_freq_scale: f32, pub progress_callback: llama_progress_callback, pub progress_callback_user_data: *mut ::std::os::raw::c_void, - pub low_vram: bool, - pub mul_mat_q: bool, - pub f16_kv: bool, - pub logits_all: bool, pub vocab_only: bool, pub use_mmap: bool, pub use_mlock: bool, +} +#[test] +fn bindgen_test_layout_llama_model_params() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 40usize, + concat!("Size of: ", stringify!(llama_model_params)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(llama_model_params)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_gpu_layers) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(n_gpu_layers) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).main_gpu) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(main_gpu) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).tensor_split) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(tensor_split) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).progress_callback) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(progress_callback) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).progress_callback_user_data) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(progress_callback_user_data) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vocab_only) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(vocab_only) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).use_mmap) as usize - ptr as usize }, + 33usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(use_mmap) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).use_mlock) as usize - ptr as usize }, + 34usize, + concat!( + "Offset of field: ", + stringify!(llama_model_params), + "::", + stringify!(use_mlock) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_context_params { + pub seed: u32, + pub n_ctx: u32, + pub n_batch: u32, + pub n_threads: u32, + pub n_threads_batch: u32, + pub rope_scaling_type: i8, + pub rope_freq_base: f32, + pub rope_freq_scale: f32, + pub yarn_ext_factor: f32, + pub yarn_attn_factor: f32, + pub yarn_beta_fast: f32, + pub yarn_beta_slow: f32, + pub yarn_orig_ctx: u32, + pub mul_mat_q: bool, + pub f16_kv: bool, + pub logits_all: bool, pub embedding: bool, } #[test] @@ -3490,12 +5801,12 @@ fn bindgen_test_layout_llama_context_params() { let ptr = UNINIT.as_ptr(); assert_eq!( ::std::mem::size_of::(), - 72usize, + 56usize, concat!("Size of: ", stringify!(llama_context_params)) ); assert_eq!( ::std::mem::align_of::(), - 8usize, + 4usize, concat!("Alignment of ", stringify!(llama_context_params)) ); assert_eq!( @@ -3529,58 +5840,38 @@ fn bindgen_test_layout_llama_context_params() { ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).n_gqa) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).n_threads) as usize - ptr as usize }, 12usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(n_gqa) + stringify!(n_threads) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).rms_norm_eps) as usize - ptr as usize }, + unsafe { ::std::ptr::addr_of!((*ptr).n_threads_batch) as usize - ptr as usize }, 16usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(rms_norm_eps) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).n_gpu_layers) as usize - ptr as usize }, - 20usize, - concat!( - "Offset of field: ", - stringify!(llama_context_params), - "::", - stringify!(n_gpu_layers) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).main_gpu) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(llama_context_params), - "::", - stringify!(main_gpu) + stringify!(n_threads_batch) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tensor_split) as usize - ptr as usize }, - 32usize, + unsafe { ::std::ptr::addr_of!((*ptr).rope_scaling_type) as usize - ptr as usize }, + 20usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(tensor_split) + stringify!(rope_scaling_type) ) ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).rope_freq_base) as usize - ptr as usize }, - 40usize, + 24usize, concat!( "Offset of field: ", stringify!(llama_context_params), @@ -3590,7 +5881,7 @@ fn bindgen_test_layout_llama_context_params() { ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).rope_freq_scale) as usize - ptr as usize }, - 44usize, + 28usize, concat!( "Offset of field: ", stringify!(llama_context_params), @@ -3599,98 +5890,88 @@ fn bindgen_test_layout_llama_context_params() { ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).progress_callback) as usize - ptr as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(llama_context_params), - "::", - stringify!(progress_callback) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).progress_callback_user_data) as usize - ptr as usize }, - 56usize, + unsafe { ::std::ptr::addr_of!((*ptr).yarn_ext_factor) as usize - ptr as usize }, + 32usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(progress_callback_user_data) + stringify!(yarn_ext_factor) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).low_vram) as usize - ptr as usize }, - 64usize, + unsafe { ::std::ptr::addr_of!((*ptr).yarn_attn_factor) as usize - ptr as usize }, + 36usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(low_vram) + stringify!(yarn_attn_factor) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).mul_mat_q) as usize - ptr as usize }, - 65usize, + unsafe { ::std::ptr::addr_of!((*ptr).yarn_beta_fast) as usize - ptr as usize }, + 40usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(mul_mat_q) + stringify!(yarn_beta_fast) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).f16_kv) as usize - ptr as usize }, - 66usize, + unsafe { ::std::ptr::addr_of!((*ptr).yarn_beta_slow) as usize - ptr as usize }, + 44usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(f16_kv) + stringify!(yarn_beta_slow) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).logits_all) as usize - ptr as usize }, - 67usize, + unsafe { ::std::ptr::addr_of!((*ptr).yarn_orig_ctx) as usize - ptr as usize }, + 48usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(logits_all) + stringify!(yarn_orig_ctx) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).vocab_only) as usize - ptr as usize }, - 68usize, + unsafe { ::std::ptr::addr_of!((*ptr).mul_mat_q) as usize - ptr as usize }, + 52usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(vocab_only) + stringify!(mul_mat_q) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).use_mmap) as usize - ptr as usize }, - 69usize, + unsafe { ::std::ptr::addr_of!((*ptr).f16_kv) as usize - ptr as usize }, + 53usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(use_mmap) + stringify!(f16_kv) ) ); assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).use_mlock) as usize - ptr as usize }, - 70usize, + unsafe { ::std::ptr::addr_of!((*ptr).logits_all) as usize - ptr as usize }, + 54usize, concat!( "Offset of field: ", stringify!(llama_context_params), "::", - stringify!(use_mlock) + stringify!(logits_all) ) ); assert_eq!( unsafe { ::std::ptr::addr_of!((*ptr).embedding) as usize - ptr as usize }, - 71usize, + 55usize, concat!( "Offset of field: ", stringify!(llama_context_params), @@ -3699,24 +5980,6 @@ fn bindgen_test_layout_llama_context_params() { ) ); } -pub const llama_ftype_LLAMA_FTYPE_ALL_F32: llama_ftype = 0; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_F16: llama_ftype = 1; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_0: llama_ftype = 2; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1: llama_ftype = 3; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: llama_ftype = 4; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q8_0: llama_ftype = 7; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_0: llama_ftype = 8; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_1: llama_ftype = 9; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q2_K: llama_ftype = 10; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_S: llama_ftype = 11; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_M: llama_ftype = 12; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_L: llama_ftype = 13; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_S: llama_ftype = 14; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_M: llama_ftype = 15; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_S: llama_ftype = 16; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_M: llama_ftype = 17; -pub const llama_ftype_LLAMA_FTYPE_MOSTLY_Q6_K: llama_ftype = 18; -pub type llama_ftype = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct llama_model_quantize_params { @@ -3724,6 +5987,8 @@ pub struct llama_model_quantize_params { pub ftype: llama_ftype, pub allow_requantize: bool, pub quantize_output_tensor: bool, + pub only_copy: bool, + pub pure_: bool, } #[test] fn bindgen_test_layout_llama_model_quantize_params() { @@ -3780,6 +6045,26 @@ fn bindgen_test_layout_llama_model_quantize_params() { stringify!(quantize_output_tensor) ) ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).only_copy) as usize - ptr as usize }, + 10usize, + concat!( + "Offset of field: ", + stringify!(llama_model_quantize_params), + "::", + stringify!(only_copy) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).pure_) as usize - ptr as usize }, + 11usize, + concat!( + "Offset of field: ", + stringify!(llama_model_quantize_params), + "::", + stringify!(pure_) + ) + ); } #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -3955,7 +6240,7 @@ fn bindgen_test_layout_llama_timings() { ); } extern "C" { - pub fn llama_max_devices() -> ::std::os::raw::c_int; + pub fn llama_model_default_params() -> llama_model_params; } extern "C" { pub fn llama_context_default_params() -> llama_context_params; @@ -3963,6 +6248,36 @@ extern "C" { extern "C" { pub fn llama_model_quantize_default_params() -> llama_model_quantize_params; } +extern "C" { + pub fn llama_backend_init(numa: bool); +} +extern "C" { + pub fn llama_backend_free(); +} +extern "C" { + pub fn llama_load_model_from_file( + path_model: *const ::std::os::raw::c_char, + params: llama_model_params, + ) -> *mut llama_model; +} +extern "C" { + pub fn llama_free_model(model: *mut llama_model); +} +extern "C" { + pub fn llama_new_context_with_model( + model: *mut llama_model, + params: llama_context_params, + ) -> *mut llama_context; +} +extern "C" { + pub fn llama_free(ctx: *mut llama_context); +} +extern "C" { + pub fn llama_time_us() -> i64; +} +extern "C" { + pub fn llama_max_devices() -> ::std::os::raw::c_int; +} extern "C" { pub fn llama_mmap_supported() -> bool; } @@ -3970,66 +6285,284 @@ extern "C" { pub fn llama_mlock_supported() -> bool; } extern "C" { - pub fn llama_backend_init(numa: bool); + pub fn llama_get_model(ctx: *const llama_context) -> *const llama_model; } extern "C" { - pub fn llama_backend_free(); + pub fn llama_n_ctx(ctx: *const llama_context) -> ::std::os::raw::c_int; } extern "C" { - pub fn llama_time_us() -> i64; + pub fn llama_vocab_type(model: *const llama_model) -> llama_vocab_type; +} +extern "C" { + pub fn llama_n_vocab(model: *const llama_model) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_n_ctx_train(model: *const llama_model) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_n_embd(model: *const llama_model) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_rope_freq_scale_train(model: *const llama_model) -> f32; +} +extern "C" { + pub fn llama_model_meta_val_str( + model: *const llama_model, + key: *const ::std::os::raw::c_char, + buf: *mut ::std::os::raw::c_char, + buf_size: usize, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_model_meta_count(model: *const llama_model) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_model_meta_key_by_index( + model: *const llama_model, + i: ::std::os::raw::c_int, + buf: *mut ::std::os::raw::c_char, + buf_size: usize, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_model_meta_val_str_by_index( + model: *const llama_model, + i: ::std::os::raw::c_int, + buf: *mut ::std::os::raw::c_char, + buf_size: usize, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_model_desc( + model: *const llama_model, + buf: *mut ::std::os::raw::c_char, + buf_size: usize, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_model_size(model: *const llama_model) -> u64; +} +extern "C" { + pub fn llama_model_n_params(model: *const llama_model) -> u64; +} +extern "C" { + pub fn llama_get_model_tensor( + model: *mut llama_model, + name: *const ::std::os::raw::c_char, + ) -> *mut ggml_tensor; +} +extern "C" { + pub fn llama_model_quantize( + fname_inp: *const ::std::os::raw::c_char, + fname_out: *const ::std::os::raw::c_char, + params: *const llama_model_quantize_params, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_apply_lora_from_file( + ctx: *mut llama_context, + path_lora: *const ::std::os::raw::c_char, + scale: f32, + path_base_model: *const ::std::os::raw::c_char, + n_threads: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_model_apply_lora_from_file( + model: *const llama_model, + path_lora: *const ::std::os::raw::c_char, + scale: f32, + path_base_model: *const ::std::os::raw::c_char, + n_threads: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_kv_cache_view_cell { + pub pos: llama_pos, +} +#[test] +fn bindgen_test_layout_llama_kv_cache_view_cell() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(llama_kv_cache_view_cell)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(llama_kv_cache_view_cell)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).pos) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view_cell), + "::", + stringify!(pos) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_kv_cache_view { + pub n_cells: i32, + pub n_max_seq: i32, + pub token_count: i32, + pub used_cells: i32, + pub max_contiguous: i32, + pub max_contiguous_idx: i32, + pub cells: *mut llama_kv_cache_view_cell, + pub cells_sequences: *mut llama_seq_id, +} +#[test] +fn bindgen_test_layout_llama_kv_cache_view() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 40usize, + concat!("Size of: ", stringify!(llama_kv_cache_view)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(llama_kv_cache_view)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_cells) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(n_cells) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_max_seq) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(n_max_seq) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).token_count) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(token_count) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).used_cells) as usize - ptr as usize }, + 12usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(used_cells) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).max_contiguous) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(max_contiguous) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).max_contiguous_idx) as usize - ptr as usize }, + 20usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(max_contiguous_idx) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).cells) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(cells) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).cells_sequences) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(llama_kv_cache_view), + "::", + stringify!(cells_sequences) + ) + ); } extern "C" { - pub fn llama_load_model_from_file( - path_model: *const ::std::os::raw::c_char, - params: llama_context_params, - ) -> *mut llama_model; + pub fn llama_kv_cache_view_init( + ctx: *const llama_context, + n_max_seq: i32, + ) -> llama_kv_cache_view; } extern "C" { - pub fn llama_free_model(model: *mut llama_model); + pub fn llama_kv_cache_view_free(view: *mut llama_kv_cache_view); } extern "C" { - pub fn llama_new_context_with_model( - model: *mut llama_model, - params: llama_context_params, - ) -> *mut llama_context; + pub fn llama_kv_cache_view_update(ctx: *const llama_context, view: *mut llama_kv_cache_view); } extern "C" { - pub fn llama_init_from_file( - path_model: *const ::std::os::raw::c_char, - params: llama_context_params, - ) -> *mut llama_context; + pub fn llama_get_kv_cache_token_count(ctx: *const llama_context) -> ::std::os::raw::c_int; } extern "C" { - pub fn llama_free(ctx: *mut llama_context); + pub fn llama_get_kv_cache_used_cells(ctx: *const llama_context) -> ::std::os::raw::c_int; } extern "C" { - pub fn llama_model_quantize( - fname_inp: *const ::std::os::raw::c_char, - fname_out: *const ::std::os::raw::c_char, - params: *const llama_model_quantize_params, - ) -> ::std::os::raw::c_int; + pub fn llama_kv_cache_clear(ctx: *mut llama_context); } extern "C" { - pub fn llama_apply_lora_from_file( + pub fn llama_kv_cache_seq_rm( ctx: *mut llama_context, - path_lora: *const ::std::os::raw::c_char, - path_base_model: *const ::std::os::raw::c_char, - n_threads: ::std::os::raw::c_int, - ) -> ::std::os::raw::c_int; + seq_id: llama_seq_id, + p0: llama_pos, + p1: llama_pos, + ); } extern "C" { - pub fn llama_model_apply_lora_from_file( - model: *const llama_model, - path_lora: *const ::std::os::raw::c_char, - path_base_model: *const ::std::os::raw::c_char, - n_threads: ::std::os::raw::c_int, - ) -> ::std::os::raw::c_int; + pub fn llama_kv_cache_seq_cp( + ctx: *mut llama_context, + seq_id_src: llama_seq_id, + seq_id_dst: llama_seq_id, + p0: llama_pos, + p1: llama_pos, + ); } extern "C" { - pub fn llama_get_kv_cache_token_count(ctx: *const llama_context) -> ::std::os::raw::c_int; + pub fn llama_kv_cache_seq_keep(ctx: *mut llama_context, seq_id: llama_seq_id); } extern "C" { - pub fn llama_set_rng_seed(ctx: *mut llama_context, seed: u32); + pub fn llama_kv_cache_seq_shift( + ctx: *mut llama_context, + seq_id: llama_seq_id, + p0: llama_pos, + p1: llama_pos, + delta: llama_pos, + ); } extern "C" { pub fn llama_get_state_size(ctx: *const llama_context) -> usize; @@ -4060,105 +6593,106 @@ extern "C" { extern "C" { pub fn llama_eval( ctx: *mut llama_context, - tokens: *const llama_token, - n_tokens: ::std::os::raw::c_int, + tokens: *mut llama_token, + n_tokens: i32, n_past: ::std::os::raw::c_int, - n_threads: ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; } extern "C" { pub fn llama_eval_embd( ctx: *mut llama_context, - embd: *const f32, - n_tokens: ::std::os::raw::c_int, + embd: *mut f32, + n_tokens: i32, n_past: ::std::os::raw::c_int, - n_threads: ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; } extern "C" { - pub fn llama_eval_export( - ctx: *mut llama_context, - fname: *const ::std::os::raw::c_char, - ) -> ::std::os::raw::c_int; + pub fn llama_batch_get_one( + tokens: *mut llama_token, + n_tokens: i32, + pos_0: llama_pos, + seq_id: llama_seq_id, + ) -> llama_batch; } extern "C" { - pub fn llama_tokenize( - ctx: *mut llama_context, - text: *const ::std::os::raw::c_char, - tokens: *mut llama_token, - n_max_tokens: ::std::os::raw::c_int, - add_bos: bool, - ) -> ::std::os::raw::c_int; + pub fn llama_batch_init(n_tokens: i32, embd: i32, n_seq_max: i32) -> llama_batch; } extern "C" { - pub fn llama_tokenize_with_model( - model: *const llama_model, - text: *const ::std::os::raw::c_char, - tokens: *mut llama_token, - n_max_tokens: ::std::os::raw::c_int, - add_bos: bool, - ) -> ::std::os::raw::c_int; + pub fn llama_batch_free(batch: llama_batch); } extern "C" { - pub fn llama_n_vocab(ctx: *const llama_context) -> ::std::os::raw::c_int; + pub fn llama_decode(ctx: *mut llama_context, batch: llama_batch) -> ::std::os::raw::c_int; } extern "C" { - pub fn llama_n_ctx(ctx: *const llama_context) -> ::std::os::raw::c_int; + pub fn llama_set_n_threads(ctx: *mut llama_context, n_threads: u32, n_threads_batch: u32); } extern "C" { - pub fn llama_n_embd(ctx: *const llama_context) -> ::std::os::raw::c_int; + pub fn llama_get_logits(ctx: *mut llama_context) -> *mut f32; } extern "C" { - pub fn llama_n_vocab_from_model(model: *const llama_model) -> ::std::os::raw::c_int; + pub fn llama_get_logits_ith(ctx: *mut llama_context, i: i32) -> *mut f32; } extern "C" { - pub fn llama_n_ctx_from_model(model: *const llama_model) -> ::std::os::raw::c_int; + pub fn llama_get_embeddings(ctx: *mut llama_context) -> *mut f32; } extern "C" { - pub fn llama_n_embd_from_model(model: *const llama_model) -> ::std::os::raw::c_int; + pub fn llama_token_get_text( + model: *const llama_model, + token: llama_token, + ) -> *const ::std::os::raw::c_char; } extern "C" { - pub fn llama_get_vocab( - ctx: *const llama_context, - strings: *mut *const ::std::os::raw::c_char, - scores: *mut f32, - capacity: ::std::os::raw::c_int, - ) -> ::std::os::raw::c_int; + pub fn llama_token_get_score(model: *const llama_model, token: llama_token) -> f32; } extern "C" { - pub fn llama_get_vocab_from_model( - model: *const llama_model, - strings: *mut *const ::std::os::raw::c_char, - scores: *mut f32, - capacity: ::std::os::raw::c_int, - ) -> ::std::os::raw::c_int; + pub fn llama_token_get_type(model: *const llama_model, token: llama_token) -> llama_token_type; } extern "C" { - pub fn llama_get_logits(ctx: *mut llama_context) -> *mut f32; + pub fn llama_token_bos(model: *const llama_model) -> llama_token; } extern "C" { - pub fn llama_get_embeddings(ctx: *mut llama_context) -> *mut f32; + pub fn llama_token_eos(model: *const llama_model) -> llama_token; } extern "C" { - pub fn llama_token_to_str( - ctx: *const llama_context, - token: llama_token, - ) -> *const ::std::os::raw::c_char; + pub fn llama_token_nl(model: *const llama_model) -> llama_token; } extern "C" { - pub fn llama_token_to_str_with_model( - model: *const llama_model, - token: llama_token, - ) -> *const ::std::os::raw::c_char; + pub fn llama_add_bos_token(model: *const llama_model) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_add_eos_token(model: *const llama_model) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn llama_token_prefix(model: *const llama_model) -> llama_token; +} +extern "C" { + pub fn llama_token_middle(model: *const llama_model) -> llama_token; } extern "C" { - pub fn llama_token_bos() -> llama_token; + pub fn llama_token_suffix(model: *const llama_model) -> llama_token; } extern "C" { - pub fn llama_token_eos() -> llama_token; + pub fn llama_token_eot(model: *const llama_model) -> llama_token; +} +extern "C" { + #[doc = " @details Convert the provided text into tokens.\n @param tokens The tokens pointer must be large enough to hold the resulting tokens.\n @return Returns the number of tokens on success, no more than n_max_tokens\n @return Returns a negative number on failure - the number of tokens that would have been returned\n @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.\n Does not insert a leading space."] + pub fn llama_tokenize( + model: *const llama_model, + text: *const ::std::os::raw::c_char, + text_len: ::std::os::raw::c_int, + tokens: *mut llama_token, + n_max_tokens: ::std::os::raw::c_int, + add_bos: bool, + special: bool, + ) -> ::std::os::raw::c_int; } extern "C" { - pub fn llama_token_nl() -> llama_token; + pub fn llama_token_to_piece( + model: *const llama_model, + token: llama_token, + buf: *mut ::std::os::raw::c_char, + length: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; } extern "C" { pub fn llama_grammar_init( @@ -4171,24 +6705,21 @@ extern "C" { pub fn llama_grammar_free(grammar: *mut llama_grammar); } extern "C" { - #[doc = " @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix."] - pub fn llama_sample_repetition_penalty( - ctx: *mut llama_context, - candidates: *mut llama_token_data_array, - last_tokens: *const llama_token, - last_tokens_size: usize, - penalty: f32, - ); + pub fn llama_grammar_copy(grammar: *const llama_grammar) -> *mut llama_grammar; } extern "C" { - #[doc = " @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details."] - pub fn llama_sample_frequency_and_presence_penalties( + pub fn llama_set_rng_seed(ctx: *mut llama_context, seed: u32); +} +extern "C" { + #[doc = " @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.\n @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details."] + pub fn llama_sample_repetition_penalties( ctx: *mut llama_context, candidates: *mut llama_token_data_array, last_tokens: *const llama_token, - last_tokens_size: usize, - alpha_frequency: f32, - alpha_presence: f32, + penalty_last_n: usize, + penalty_repeat: f32, + penalty_freq: f32, + penalty_present: f32, ); } extern "C" { @@ -4222,6 +6753,15 @@ extern "C" { min_keep: usize, ); } +extern "C" { + #[doc = " @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841"] + pub fn llama_sample_min_p( + ctx: *mut llama_context, + candidates: *mut llama_token_data_array, + p: f32, + min_keep: usize, + ); +} extern "C" { #[doc = " @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/."] pub fn llama_sample_tail_free( @@ -4240,6 +6780,13 @@ extern "C" { min_keep: usize, ); } +extern "C" { + pub fn llama_sample_temp( + ctx: *mut llama_context, + candidates: *mut llama_token_data_array, + temp: f32, + ); +} extern "C" { pub fn llama_sample_temperature( ctx: *mut llama_context, @@ -4277,7 +6824,7 @@ extern "C" { ) -> llama_token; } extern "C" { - #[doc = " @details Selects the token with the highest probability."] + #[doc = " @details Selects the token with the highest probability.\n Does not compute the token probabilities. Use llama_sample_softmax() instead."] pub fn llama_sample_token_greedy( ctx: *mut llama_context, candidates: *mut llama_token_data_array, @@ -4298,6 +6845,146 @@ extern "C" { token: llama_token, ); } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_beam_view { + pub tokens: *const llama_token, + pub n_tokens: usize, + pub p: f32, + pub eob: bool, +} +#[test] +fn bindgen_test_layout_llama_beam_view() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(llama_beam_view)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(llama_beam_view)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).tokens) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_beam_view), + "::", + stringify!(tokens) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_tokens) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(llama_beam_view), + "::", + stringify!(n_tokens) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).p) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(llama_beam_view), + "::", + stringify!(p) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).eob) as usize - ptr as usize }, + 20usize, + concat!( + "Offset of field: ", + stringify!(llama_beam_view), + "::", + stringify!(eob) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct llama_beams_state { + pub beam_views: *mut llama_beam_view, + pub n_beams: usize, + pub common_prefix_length: usize, + pub last_call: bool, +} +#[test] +fn bindgen_test_layout_llama_beams_state() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(llama_beams_state)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(llama_beams_state)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).beam_views) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(llama_beams_state), + "::", + stringify!(beam_views) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).n_beams) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(llama_beams_state), + "::", + stringify!(n_beams) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).common_prefix_length) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(llama_beams_state), + "::", + stringify!(common_prefix_length) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).last_call) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(llama_beams_state), + "::", + stringify!(last_call) + ) + ); +} +pub type llama_beam_search_callback_fn_t = ::std::option::Option< + unsafe extern "C" fn(callback_data: *mut ::std::os::raw::c_void, arg1: llama_beams_state), +>; +extern "C" { + #[doc = " @details Deterministically returns entire sentence constructed by a beam search.\n @param ctx Pointer to the llama_context.\n @param callback Invoked for each iteration of the beam_search loop, passing in beams_state.\n @param callback_data A pointer that is simply passed back to callback.\n @param n_beams Number of beams to use.\n @param n_past Number of tokens already evaluated.\n @param n_predict Maximum number of tokens to predict. EOS may occur earlier."] + pub fn llama_beam_search( + ctx: *mut llama_context, + callback: llama_beam_search_callback_fn_t, + callback_data: *mut ::std::os::raw::c_void, + n_beams: usize, + n_past: ::std::os::raw::c_int, + n_predict: ::std::os::raw::c_int, + ); +} extern "C" { pub fn llama_get_timings(ctx: *mut llama_context) -> llama_timings; } @@ -4310,3 +6997,78 @@ extern "C" { extern "C" { pub fn llama_print_system_info() -> *const ::std::os::raw::c_char; } +extern "C" { + pub fn llama_log_set(log_callback: ggml_log_callback, user_data: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn llama_dump_timing_info_yaml(stream: *mut FILE, ctx: *const llama_context); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct ggml_backend_buffer { + pub _address: u8, +} +pub type __builtin_va_list = [__va_list_tag; 1usize]; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __va_list_tag { + pub gp_offset: ::std::os::raw::c_uint, + pub fp_offset: ::std::os::raw::c_uint, + pub overflow_arg_area: *mut ::std::os::raw::c_void, + pub reg_save_area: *mut ::std::os::raw::c_void, +} +#[test] +fn bindgen_test_layout___va_list_tag() { + const UNINIT: ::std::mem::MaybeUninit<__va_list_tag> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<__va_list_tag>(), + 24usize, + concat!("Size of: ", stringify!(__va_list_tag)) + ); + assert_eq!( + ::std::mem::align_of::<__va_list_tag>(), + 8usize, + concat!("Alignment of ", stringify!(__va_list_tag)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).gp_offset) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(gp_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).fp_offset) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(fp_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).overflow_arg_area) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(overflow_arg_area) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reg_save_area) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(reg_save_area) + ) + ); +} diff --git a/crates/llm-chain-llama/examples/alpaca.rs b/crates/llm-chain-llama/examples/alpaca.rs index 6e581976..3b92d0f6 100644 --- a/crates/llm-chain-llama/examples/alpaca.rs +++ b/crates/llm-chain-llama/examples/alpaca.rs @@ -1,7 +1,7 @@ use llm_chain::executor; use llm_chain::{parameters, prompt}; -#[tokio::main(flavor = "current_thread")] +#[tokio::main] async fn main() -> Result<(), Box> { let exec = executor!(llama)?; let res = prompt!("Write a hypothetical weather report for {season} in {location}.") diff --git a/crates/llm-chain-llama/examples/few_shot.rs b/crates/llm-chain-llama/examples/few_shot.rs index e3f3e389..dcc09d22 100644 --- a/crates/llm-chain-llama/examples/few_shot.rs +++ b/crates/llm-chain-llama/examples/few_shot.rs @@ -1,4 +1,3 @@ -use llm_chain::options; use llm_chain::prompt::Conversation; use llm_chain::{chains::conversation::Chain, executor, parameters, prompt, step::Step}; /// This example demonstrates how to use the llm-chain for few-shot prompting @@ -10,12 +9,7 @@ use llm_chain::{chains::conversation::Chain, executor, parameters, prompt, step: /// Make sure to have the env var 'LLM_CHAIN_MODEL' set #[tokio::main(flavor = "multi_thread", worker_threads = 1)] async fn main() -> Result<(), Box> { - let opts = options!( - NThreads: 4_usize, - StopSequence: vec!["\n".to_string()] - ); - - let exec_1 = executor!(llama, opts.clone())?; + let exec_1 = executor!(llama)?; let user_prompt = "Take the last letters of the words in '{{ full_name }}' and concatenate them"; @@ -47,7 +41,7 @@ async fn main() -> Result<(), Box> { // Define the step let step = Step::for_prompt_template(prompt!(user: user_prompt)); // Execute the chain. - let exec_2 = executor!(llama, opts)?; + let exec_2 = executor!(llama)?; let res = chain .send_message(step, ¶meters!().with("full_name", "Elon Musk"), &exec_2) .await?; diff --git a/crates/llm-chain-llama/examples/map_reduce_llama.rs b/crates/llm-chain-llama/examples/map_reduce_llama.rs index 884304da..87102cff 100644 --- a/crates/llm-chain-llama/examples/map_reduce_llama.rs +++ b/crates/llm-chain-llama/examples/map_reduce_llama.rs @@ -1,14 +1,35 @@ use llm_chain::chains::map_reduce::Chain; use llm_chain::executor; +use llm_chain::options; use llm_chain::{prompt, step::Step, Parameters}; -#[tokio::main(flavor = "current_thread")] +#[tokio::main] async fn main() -> Result<(), Box> { - let exec = executor!(llama)?; + let opts = options!( + ModelType: "llama", + MaxContextSize: 4096_usize, + NThreads: 4_usize, + MaxTokens: 2048_usize, + MaxBatchSize: 4096_usize, + TopK: 40_i32, + TopP: 0.95, + TfsZ: 1.0, + TypicalP: 1.0, + Temperature: 0.8, + RepeatPenalty: 1.1, + RepeatPenaltyLastN: 64_usize, + FrequencyPenalty: 0.0, + PresencePenalty: 0.0, + Mirostat: 0_i32, + MirostatTau: 5.0, + MirostatEta: 0.1, + PenalizeNl: true, + StopSequence: vec!["\n\n".to_string()] + ); + let exec = executor!(llama, opts.clone())?; let map_prompt = Step::for_prompt_template(prompt!("== ARTICLE ==\n{{text}}== SUMMARY ==\n")); let reduce_prompt = Step::for_prompt_template(prompt!("== ARTICLE ==\n{{text}}== FINAL SUMMARY ==\n")); - let chain = Chain::new(map_prompt, reduce_prompt); let article = include_str!("article_to_summarize.md"); let docs = vec![Parameters::new_with_text(article)]; diff --git a/crates/llm-chain-llama/examples/simple_llama.rs b/crates/llm-chain-llama/examples/simple_llama.rs index 079c0acb..28cdd6cf 100644 --- a/crates/llm-chain-llama/examples/simple_llama.rs +++ b/crates/llm-chain-llama/examples/simple_llama.rs @@ -30,7 +30,7 @@ async fn main() -> Result<(), Box> { ModelType: "llama", MaxContextSize: 512_usize, NThreads: 4_usize, - MaxTokens: 0_usize, + MaxTokens: 512_usize, TopK: 40_i32, TopP: 0.95, TfsZ: 1.0, diff --git a/crates/llm-chain-llama/examples/stream.rs b/crates/llm-chain-llama/examples/stream.rs index 0274e730..906f16b3 100644 --- a/crates/llm-chain-llama/examples/stream.rs +++ b/crates/llm-chain-llama/examples/stream.rs @@ -4,10 +4,9 @@ use llm_chain::{executor, parameters, prompt}; /// This example demonstrates how to use the llm-chain-llama crate to generate streaming text using a /// LLaMA model. /// -/// Usage: cargo run --example simple path/to/llama-or-alpaca-model +/// Usage: cargo run --example stream /// -/// For example, if the model is located at "/models/llama" -/// cargo run --example simple /models/llama +/// Make sure to have the env var 'LLM_CHAIN_MODEL' set. #[tokio::main(flavor = "current_thread")] async fn main() -> Result<(), Box> { let exec = executor!(llama)?; diff --git a/crates/llm-chain-llama/src/batch.rs b/crates/llm-chain-llama/src/batch.rs new file mode 100644 index 00000000..17af1c73 --- /dev/null +++ b/crates/llm-chain-llama/src/batch.rs @@ -0,0 +1,118 @@ +use llm_chain_llama_sys::{llama_batch, llama_batch_free, llama_seq_id}; +use std::ptr::null_mut; + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct LlamaBatch { + n_tokens: i32, + token: Vec, + embd: Vec, + pos: Vec, + n_seq_id: Vec, + seq_id: Vec>, + logits: Vec, + all_pos_0: i32, + all_pos_1: i32, + all_seq_id: i32, +} + +impl LlamaBatch { + pub fn new_with_tokens(tokens: Vec, max_seq: i32) -> Self { + let pos = (0..tokens.len()).map(|p| p as i32).collect::>(); + let embd = vec![]; + let n_seq_id = vec![max_seq; tokens.len()]; + let seq_id = vec![vec![0; tokens.len()]; tokens.len()]; + let logits = vec![false; tokens.len()]; + let all_pos_0 = 0; + let all_pos_1 = 0; + let all_seq_id = 0; + + Self { + n_tokens: tokens.len() as i32, + token: tokens, + embd, + pos, + n_seq_id, + seq_id, + logits, + all_pos_0, + all_pos_1, + all_seq_id, + } + } + + pub fn new_with_token(token: i32, pos: i32) -> Self { + Self { + n_tokens: 1, + token: vec![token], + embd: vec![], + pos: vec![pos], + n_seq_id: vec![1], + seq_id: vec![vec![0]], + logits: vec![true], + all_pos_0: 0, + all_pos_1: 0, + all_seq_id: 0, + } + } + + pub fn token_count(&self) -> usize { + self.n_tokens as usize + } + + pub fn enable_logits(&mut self, pos: usize) { + self.logits[pos] = true; + } +} + +impl Drop for LlamaBatch { + fn drop(&mut self) { + unsafe { + llama_batch_free(self.into()); + } + } +} + +fn convert_llama_batch(batch: &LlamaBatch) -> llama_batch { + let n_tokens = batch.n_tokens; + let token_ptr = Box::leak(batch.token.clone().into_boxed_slice()).as_mut_ptr(); + let embd_ptr = if batch.embd.is_empty() { + null_mut() + } else { + Box::leak(batch.embd.clone().into_boxed_slice()).as_mut_ptr() + }; + let pos_ptr = Box::leak(batch.pos.clone().into_boxed_slice()).as_mut_ptr(); + let n_seq_id_ptr = Box::leak(batch.n_seq_id.clone().into_boxed_slice()).as_mut_ptr(); + let raw_pointers = batch + .seq_id + .clone() + .into_iter() + .map(|inner_vec| Box::leak(inner_vec.into_boxed_slice()).as_mut_ptr()) + .collect::>(); + let seq_id_ptr = Box::leak(raw_pointers.into_boxed_slice()).as_mut_ptr(); + let logits_ptr = Box::leak(batch.logits.clone().into_boxed_slice()).as_mut_ptr(); + llama_batch { + n_tokens, + token: token_ptr, + embd: embd_ptr, + pos: pos_ptr, + n_seq_id: n_seq_id_ptr, + seq_id: seq_id_ptr, + logits: logits_ptr as *mut i8, + all_pos_0: batch.all_pos_0, + all_pos_1: batch.all_pos_1, + all_seq_id: batch.all_seq_id, + } +} + +impl From<&LlamaBatch> for llama_batch { + fn from(batch: &LlamaBatch) -> Self { + convert_llama_batch(batch) + } +} + +impl From<&mut LlamaBatch> for llama_batch { + fn from(batch: &mut LlamaBatch) -> Self { + convert_llama_batch(batch) + } +} diff --git a/crates/llm-chain-llama/src/context.rs b/crates/llm-chain-llama/src/context.rs index cbae028f..a84894ce 100644 --- a/crates/llm-chain-llama/src/context.rs +++ b/crates/llm-chain-llama/src/context.rs @@ -1,20 +1,22 @@ -use std::{ - ffi::{CStr, CString}, - ptr::null_mut, -}; +use std::ffi::{CStr, CString}; +use crate::batch; +use crate::model::ModelParams; use crate::options::LlamaInvocation; use anyhow::Result; use llm_chain_llama_sys::{ - llama_context, llama_context_default_params, llama_context_params, llama_eval, llama_free, - llama_get_logits, llama_init_from_file, llama_n_vocab, - llama_sample_frequency_and_presence_penalties, llama_sample_repetition_penalty, + llama_context, llama_context_default_params, llama_context_params, llama_decode, llama_eval, + llama_free, llama_get_logits, llama_get_logits_ith, llama_load_model_from_file, llama_model, + llama_n_vocab, llama_new_context_with_model, llama_sample_repetition_penalties, llama_sample_tail_free, llama_sample_temperature, llama_sample_token, llama_sample_token_greedy, llama_sample_token_mirostat, llama_sample_token_mirostat_v2, llama_sample_top_k, llama_sample_top_p, llama_sample_typical, llama_token_data, - llama_token_data_array, llama_token_nl, llama_token_to_str, + llama_token_data_array, llama_token_eos, llama_token_get_text, llama_token_nl, + llama_token_to_piece, }; +pub use batch::LlamaBatch; + #[derive(Debug, thiserror::Error)] #[error("LLAMA.cpp returned error-code {0}")] pub struct LLAMACPPErrorCode(i32); @@ -22,13 +24,22 @@ pub struct LLAMACPPErrorCode(i32); // Represents the configuration parameters for a LLamaContext. #[derive(Debug, Clone)] pub struct ContextParams { - pub n_parts: i32, - pub n_ctx: i32, - pub seed: i32, + pub seed: u32, + pub n_ctx: u32, + pub n_batch: u32, + pub n_threads: u32, + pub n_threads_batch: u32, + pub rope_scaling_type: i8, + pub rope_freq_base: f32, + pub rope_freq_scale: f32, + pub yarn_ext_factor: f32, + pub yarn_attn_factor: f32, + pub yarn_beta_fast: f32, + pub yarn_beta_slow: f32, + pub yarn_orig_ctx: u32, + pub mul_mat_q: bool, pub f16_kv: bool, - pub vocab_only: bool, - pub use_mlock: bool, - pub use_mmap: bool, + pub logits_all: bool, pub embedding: bool, } @@ -57,17 +68,23 @@ impl Default for ContextParams { impl From for llama_context_params { fn from(params: ContextParams) -> Self { llama_context_params { - n_parts: params.n_parts, - n_ctx: params.n_ctx, seed: params.seed, + n_ctx: params.n_ctx, + n_batch: params.n_batch, + n_threads: params.n_threads, + n_threads_batch: params.n_threads_batch, + rope_scaling_type: params.rope_scaling_type, + rope_freq_base: params.rope_freq_base, + rope_freq_scale: params.rope_freq_scale, + yarn_ext_factor: params.yarn_ext_factor, + yarn_attn_factor: params.yarn_attn_factor, + yarn_beta_fast: params.yarn_beta_fast, + yarn_beta_slow: params.yarn_beta_slow, + yarn_orig_ctx: params.yarn_orig_ctx, + mul_mat_q: params.mul_mat_q, f16_kv: params.f16_kv, logits_all: false, - vocab_only: params.vocab_only, - use_mlock: params.use_mlock, - use_mmap: params.use_mmap, embedding: params.embedding, - progress_callback: None, - progress_callback_user_data: null_mut(), } } } @@ -75,13 +92,22 @@ impl From for llama_context_params { impl From for ContextParams { fn from(params: llama_context_params) -> Self { ContextParams { - n_ctx: params.n_ctx, - n_parts: params.n_parts, seed: params.seed, + n_ctx: params.n_ctx, + n_batch: params.n_batch, + n_threads: params.n_threads, + n_threads_batch: params.n_threads_batch, + rope_scaling_type: params.rope_scaling_type, + rope_freq_base: params.rope_freq_base, + rope_freq_scale: params.rope_freq_scale, + yarn_ext_factor: params.yarn_ext_factor, + yarn_attn_factor: params.yarn_attn_factor, + yarn_beta_fast: params.yarn_beta_fast, + yarn_beta_slow: params.yarn_beta_slow, + yarn_orig_ctx: params.yarn_orig_ctx, + mul_mat_q: params.mul_mat_q, f16_kv: params.f16_kv, - vocab_only: params.vocab_only, - use_mlock: params.use_mlock, - use_mmap: params.use_mmap, + logits_all: params.logits_all, embedding: params.embedding, } } @@ -90,21 +116,31 @@ impl From for ContextParams { // Represents the LLamaContext which wraps FFI calls to the llama.cpp library. pub(crate) struct LLamaContext { ctx: *mut llama_context, + pub model: *mut llama_model, } +#[allow(dead_code)] impl LLamaContext { // Creates a new LLamaContext from the specified file and configuration parameters. pub fn from_file_and_params( path: &str, - params: Option<&ContextParams>, + model_params: Option<&ModelParams>, + context_params: Option<&ContextParams>, ) -> Result> { let path = CString::new(path).expect("could not convert to CString"); - let params = ContextParams::or_default(params); - let ctx = unsafe { llama_init_from_file(path.into_raw() as *const i8, params) }; + let model_params = ModelParams::or_default(model_params); + let model = + unsafe { llama_load_model_from_file(path.into_raw() as *const i8, model_params) }; + if model.is_null() { + return Err("Initializing llama model returned nullptr".into()); + } + + let context_params = ContextParams::or_default(context_params); + let ctx = unsafe { llama_new_context_with_model(model, context_params) }; if ctx.is_null() { return Err("Initializing llama context returned nullptr".into()); } - Ok(Self { ctx }) + Ok(Self { ctx, model }) } // Token logits obtained from the last call to llama_eval() @@ -117,7 +153,12 @@ impl LLamaContext { unsafe { std::slice::from_raw_parts_mut(llama_get_logits(self.ctx), len) }.to_vec() } pub fn llama_n_vocab(&self) -> i32 { - unsafe { llama_n_vocab(self.ctx) } + unsafe { llama_n_vocab(self.model) } + } + + pub fn llama_get_logits_ith(&self, index: usize) -> Vec { + let float_ptr = unsafe { llama_get_logits_ith(self.ctx, index as i32) }; + Vec::from(unsafe { std::slice::from_raw_parts(float_ptr, self.llama_n_vocab() as usize) }) } // Executes the LLama sampling process with the specified configuration. @@ -127,6 +168,7 @@ impl LLamaContext { last_n_tokens_data: &[i32], last_n_tokens_size: i32, input: &LlamaInvocation, + batch_n_tokens: i32, ) -> i32 { let top_k = if input.top_k <= 0 { self.llama_n_vocab() @@ -140,7 +182,7 @@ impl LLamaContext { }; let n_vocab = self.llama_n_vocab() as usize; // only get the last row, as the sample only requires this. - let mut logits = self.llama_get_logits_as_slice(1, n_vocab); + let mut logits = self.llama_get_logits_ith((batch_n_tokens - 1) as usize); // let id : llama_token = 0; input @@ -160,11 +202,11 @@ impl LLamaContext { size: candidates.len(), sorted: false, }; - let nl_logit = logits[unsafe { llama_token_nl() } as usize]; + let nl_logit = logits[unsafe { llama_token_nl(self.model) } as usize]; let last_n_repeat = i32::min(i32::min(last_n_tokens_size, repeat_last_n), n_ctx) as usize; unsafe { - llama_sample_repetition_penalty( + llama_sample_repetition_penalties( self.ctx, &mut candidates_p, last_n_tokens_data @@ -172,22 +214,12 @@ impl LLamaContext { .add((last_n_tokens_size - last_n_repeat as i32) as usize), last_n_repeat, input.repeat_penalty, - ) - }; - unsafe { - llama_sample_frequency_and_presence_penalties( - self.ctx, - &mut candidates_p, - last_n_tokens_data - .as_ptr() - .add((last_n_tokens_size - last_n_repeat as i32) as usize), - last_n_repeat, input.frequency_penalty, input.presence_penalty, ) }; if !input.penalize_nl { - logits[unsafe { llama_token_nl() as usize }] = nl_logit; + logits[unsafe { llama_token_nl(self.model) as usize }] = nl_logit; } if input.temp <= 0.0 { @@ -231,26 +263,74 @@ impl LLamaContext { } pub fn llama_token_to_bytes(&self, token: &i32) -> Vec { - let c_ptr = unsafe { llama_token_to_str(self.ctx, *token) }; + let c_ptr = unsafe { llama_token_get_text(self.model, *token) }; unsafe { CStr::from_ptr(c_ptr) }.to_bytes().to_vec() } // Evaluates the given tokens with the specified configuration. pub fn llama_eval( &self, - tokens: &[i32], + tokens: &mut [i32], n_tokens: i32, n_past: i32, - input: &LlamaInvocation, + _input: &LlamaInvocation, ) -> Result<(), LLAMACPPErrorCode> { - let res = - unsafe { llama_eval(self.ctx, tokens.as_ptr(), n_tokens, n_past, input.n_threads) }; + let res = unsafe { llama_eval(self.ctx, tokens.as_mut_ptr(), n_tokens, n_past) }; if res == 0 { Ok(()) } else { Err(LLAMACPPErrorCode(res)) } } + + // Evaluates the provided batch. + pub fn llama_decode(&self, batch: &LlamaBatch) -> Result<(), LLAMACPPErrorCode> { + let res = unsafe { llama_decode(self.ctx, batch.into()) }; + if res == 0 { + Ok(()) + } else { + Err(LLAMACPPErrorCode(res)) + } + } + + pub fn llama_token_eos(&self) -> i32 { + unsafe { llama_token_eos(self.model) } + } + + pub fn llama_token_nl(&self) -> i32 { + unsafe { llama_token_nl(self.model) } + } + + pub fn llama_token_to_piece( + &self, + token_id: i32, + ) -> Result { + let mut result = vec![0 as i8; 8]; + let n_tokens = unsafe { + llama_token_to_piece( + self.model, + token_id, + result.as_mut_ptr(), + result.len() as i32, + ) + }; + if n_tokens < 0 { + result.resize(-n_tokens as usize, 0 as i8); + let check = unsafe { + llama_token_to_piece( + self.model, + token_id, + result.as_mut_ptr(), + result.len() as i32, + ) + }; + assert_eq!(check, -n_tokens); + } else { + result.resize(n_tokens as usize, 0 as i8); + } + let result_bytes: Vec = result.into_iter().map(|b| b as u8).collect(); + String::from_utf8(result_bytes) + } } // Provides thread-safe behavior for LLamaContext. diff --git a/crates/llm-chain-llama/src/executor.rs b/crates/llm-chain-llama/src/executor.rs index e5118792..174580e6 100644 --- a/crates/llm-chain-llama/src/executor.rs +++ b/crates/llm-chain-llama/src/executor.rs @@ -1,9 +1,9 @@ use std::marker::PhantomData; use std::sync::Arc; -use crate::context::{ContextParams, LLamaContext}; +use crate::context::{ContextParams, LLamaContext, LlamaBatch}; use crate::options::{get_executor_initial_opts, LlamaInvocation, DEFAULT_OPTIONS}; -use crate::tokenizer::{embedding_to_output, llama_token_eos, tokenize, tokens_to_string}; +use crate::tokenizer::{embedding_to_output, tokenize}; use async_trait::async_trait; @@ -61,6 +61,7 @@ impl Executor { tokio::task::spawn_blocking(move || { let context_size = context_size; let context = context.blocking_lock(); + let tokenized_stop_prompt = tokenize( &context, input @@ -69,6 +70,7 @@ impl Executor { .map(|x| x.as_str()) .unwrap_or("\n\n"), false, + true, ); if tokenized_stop_prompt.len() > context_size { @@ -77,68 +79,78 @@ impl Executor { } let prompt_text = input.prompt.to_text(); - let tokenized_input = tokenize(&context, prompt_text.as_str(), true); + + let tokenized_input = tokenize(&context, prompt_text.as_str(), true, false); if tokenized_input.len() > context_size { must_send!(sender, StreamSegment::Err(ExecutorError::ContextTooSmall)); return; } - // Embd contains the prompt and the completion. The longer the prompt, the shorter the completion. + // embd contains the prompt and the completion. The longer the + // prompt, the shorter the completion. + // It will initially contain a copy the tokenized prompt and then + // may be extended with the tokenized answer prefix. After each + // sampling the sampled token will also be added to this vector. + // This is done so that the sampling function has access to all the + // tokens which it may need for repetition penalties, etc. let mut embd = tokenized_input.clone(); - // Evaluate the prompt in full. + let mut batch = LlamaBatch::new_with_tokens(tokenized_input.clone(), 1); + let last_idx = (batch.token_count() - 1) as usize; + batch.enable_logits(last_idx); + bail!( context - .llama_eval( - tokenized_input.as_slice(), - tokenized_input.len() as i32, - 0, - &input, - ) + .llama_decode(&batch) .map_err(|e| ExecutorError::InnerError(e.into())), sender ); + let mut n_cur = batch.token_count(); + let mut n_used = (batch.token_count() - 1) as usize; let mut n_remaining = context_size - tokenized_input.len(); - let mut n_used = tokenized_input.len() - 1; if let Some(prefix) = answer_prefix { - let tokenized_answer_prefix = tokenize(&context, prefix.as_str(), false); + let tokenized_answer_prefix = tokenize(&context, prefix.as_str(), true, true); if tokenized_answer_prefix.len() > context_size { must_send!(sender, StreamSegment::Err(ExecutorError::ContextTooSmall)); return; } - + let batch = LlamaBatch::new_with_tokens(tokenized_answer_prefix.clone(), 1); // Evaluate the answer prefix (the role -- should be Assistant: ) bail!( context - .llama_eval( - tokenized_answer_prefix.as_slice(), - tokenized_answer_prefix.len() as i32, - n_used as i32, - &input, - ) + .llama_decode(&batch) .map_err(|e| ExecutorError::InnerError(e.into())), sender ); n_remaining -= tokenized_answer_prefix.len(); - n_used += tokenized_answer_prefix.len(); embd.extend(tokenized_answer_prefix); + n_cur += batch.token_count(); + n_used += (batch.token_count() - 1) as usize; } embd.resize(context_size, 0); - let token_eos = llama_token_eos(); + let token_eos = context.llama_token_eos(); + let mut stop_sequence_i = 0; + let mut n_batch = batch.token_count(); + let mut n_samples = 0; + let ignore_initial_nls = input.prompt.to_text().ends_with('?'); + let nl_token = context.llama_token_nl(); + // Generate remaining tokens. - let mut leftover_bytes: Vec = vec![]; while n_remaining > 0 { let tok = context.llama_sample( context_size as i32, embd.as_slice(), n_used as i32, &input, + n_batch as i32, ); + n_samples += 1; n_used += 1; n_remaining -= 1; embd[n_used] = tok; + if tok == token_eos { break; } @@ -147,47 +159,43 @@ impl Executor { { break; } + + // If the input prompt is in the form of a question then next + // predicted tok will be a new line to finish off the question + // itself, followed by another new line before the actual + // answer. This is what the following is checking for. + if n_samples <= 2 && ignore_initial_nls && tok == nl_token { + continue; + } + if tok == tokenized_stop_prompt[stop_sequence_i] { stop_sequence_i += 1; if stop_sequence_i >= tokenized_stop_prompt.len() { break; } } else { - let str_output = - tokens_to_string(&context, &embd[n_used - stop_sequence_i..n_used]); - // XXX: make into chat if chat - must_send!(sender, StreamSegment::Content(str_output)); + let piece = bail!( + context + .llama_token_to_piece(tok) + .map_err(|e| ExecutorError::InnerError(e.into())), + sender + ); + must_send!(sender, StreamSegment::Content(piece)); stop_sequence_i = 0; - } - bail!( - context - .llama_eval(&embd[n_used..], 1, n_used as i32, &input) - .map_err(|e| ExecutorError::InnerError(e.into())), - sender - ); - if n_used >= tokenized_input.len() && stop_sequence_i == 0 { - let bytes_output: Vec = - [leftover_bytes, context.llama_token_to_bytes(&embd[n_used])].concat(); + let batch = LlamaBatch::new_with_token(tok, n_cur as i32); - let (str_output, leftover) = decode_up_to_valid_utf8(&bytes_output); - leftover_bytes = leftover; - // XXX: make into chat if chat - if sender.send(StreamSegment::Content(str_output)).is_err() { - panic!("Failed to send"); - } + n_batch = batch.token_count(); + n_cur += 1; + + bail!( + context + .llama_decode(&batch) + .map_err(|e| ExecutorError::InnerError(e.into())), + sender + ); } } - if sender - .send(StreamSegment::Content( - std::char::REPLACEMENT_CHARACTER - .to_string() - .repeat(leftover_bytes.len()), - )) - .is_err() - { - panic!("Failed to send"); - } }); //JoinHandle is dropped? not sure how this works output @@ -206,10 +214,11 @@ impl ExecutorTrait for Executor { .with_options(&opts_from_env) .with_options(&options); - let (model_path, context_params) = get_executor_initial_opts(&cas)?; + let (model_path, model_params, context_params) = get_executor_initial_opts(&cas)?; Ok(Self { context: Arc::new(Mutex::new(LLamaContext::from_file_and_params( &model_path, + Some(&model_params), Some(&context_params), )?)), options, @@ -233,18 +242,18 @@ impl ExecutorTrait for Executor { let mut tokens_used = tokenizer .tokenize_str(&input) .map_err(|_e| PromptTokensError::UnableToCompute)? - .len() as i32; + .len(); // includes answer_prefix let answer_prefix = self.answer_prefix(prompt); if let Some(prefix) = answer_prefix { let answer_used = tokenizer .tokenize_str(&prefix) .map_err(|_e| PromptTokensError::UnableToCompute)? - .len() as i32; + .len(); tokens_used += answer_used } let max_tokens = self.max_tokens_allowed(options); - Ok(TokenCount::new(max_tokens, tokens_used)) + Ok(TokenCount::new(max_tokens, tokens_used as i32)) } fn answer_prefix(&self, prompt: &Prompt) -> Option { @@ -263,7 +272,7 @@ impl ExecutorTrait for Executor { } fn max_tokens_allowed(&self, _step: &Options) -> i32 { - self.context_params.n_ctx + self.context_params.n_ctx as i32 } fn get_tokenizer(&self, _step: &Options) -> Result { @@ -289,7 +298,7 @@ impl Tokenizer for LLamaTokenizer<'_> { fn tokenize_str(&self, doc: &str) -> Result { let tokenized = tokio::task::block_in_place(|| { let context = self.context.blocking_lock(); - tokenize(&context, doc, true) + tokenize(&context, doc, true, false) }); Ok(tokenized.into()) } @@ -303,34 +312,3 @@ impl Tokenizer for LLamaTokenizer<'_> { Ok(output.to_string()) } } - -fn decode_up_to_valid_utf8(bytes: &[u8]) -> (String, Vec) { - let (str_output, leftover): (String, Vec) = match std::str::from_utf8(bytes) { - Ok(s) => (s.to_owned(), Vec::new()), - Err(unicode_err) => { - let index = unicode_err.valid_up_to(); - let good = &bytes[0..index]; - match unicode_err.error_len() { - None => { - let leftover = bytes[index..].to_vec(); - let out = std::str::from_utf8(good).unwrap().to_owned(); - (out, leftover) - } - Some(len) => { - //let bad = &bytes[index..index+len]; - //eprintln!("bad utf8: {:?}", bad); - let rest = &bytes[index + len..]; - let beggining = std::str::from_utf8(good).unwrap().to_owned(); - let (after, leftover) = decode_up_to_valid_utf8(rest); - - let mut out = beggining; - out.push_str(&std::char::REPLACEMENT_CHARACTER.to_string().repeat(len)); - out.push_str(&after); - - (out, leftover) - } - } - } - }; - (str_output, leftover) -} diff --git a/crates/llm-chain-llama/src/lib.rs b/crates/llm-chain-llama/src/lib.rs index 48b39b19..16222a9d 100644 --- a/crates/llm-chain-llama/src/lib.rs +++ b/crates/llm-chain-llama/src/lib.rs @@ -21,13 +21,16 @@ //! //! Happy coding, and enjoy the amazing world of LLMs with llm-chain-llama! 🥳🚀 +mod batch; mod context; mod executor; +mod model; mod options; mod tokenizer; pub use context::ContextParams; pub use executor::Executor; +pub use model::ModelParams; #[deprecated(note = "Use llm_chain::step::Step instead", since = "0.7.0")] pub use llm_chain::step::Step; diff --git a/crates/llm-chain-llama/src/model.rs b/crates/llm-chain-llama/src/model.rs new file mode 100644 index 00000000..9a6aba8f --- /dev/null +++ b/crates/llm-chain-llama/src/model.rs @@ -0,0 +1,69 @@ +use llm_chain_llama_sys::{llama_model_default_params, llama_model_params, LLAMA_MAX_DEVICES}; +use std::ptr::null_mut; + +// Represents the configuration parameters for a LLama model. +#[derive(Debug, Clone)] +pub struct ModelParams { + pub n_gpu_layers: i32, + pub main_gpu: i32, + pub tensor_split: Vec, + pub vocab_only: bool, + pub use_mmap: bool, + pub use_mlock: bool, +} + +impl ModelParams { + pub fn new() -> ModelParams { + unsafe { llama_model_default_params() }.into() + } + // Returns the default parameters or the user-specified parameters. + pub(crate) fn or_default(params: Option<&ModelParams>) -> llama_model_params { + match params { + Some(params) => params.clone().into(), + None => unsafe { llama_model_default_params() }, + } + } +} + +impl Default for ModelParams { + fn default() -> Self { + Self::new() + } +} + +impl From for llama_model_params { + fn from(params: ModelParams) -> Self { + llama_model_params { + n_gpu_layers: params.n_gpu_layers, + main_gpu: params.main_gpu, + tensor_split: params.tensor_split.as_ptr() as *const f32, + vocab_only: params.vocab_only, + use_mmap: params.use_mmap, + use_mlock: params.use_mlock, + progress_callback: None, + progress_callback_user_data: null_mut(), + } + } +} + +impl From for ModelParams { + fn from(params: llama_model_params) -> Self { + let tensor_split = unsafe { + if params.tensor_split.is_null() { + Vec::new() + } else { + let slice = + std::slice::from_raw_parts(params.tensor_split, LLAMA_MAX_DEVICES as usize); + slice.to_vec() + } + }; + ModelParams { + n_gpu_layers: params.n_gpu_layers, + main_gpu: params.main_gpu, + tensor_split, + vocab_only: params.vocab_only, + use_mmap: params.use_mmap, + use_mlock: params.use_mlock, + } + } +} diff --git a/crates/llm-chain-llama/src/options.rs b/crates/llm-chain-llama/src/options.rs index 37b62318..e6bd8750 100644 --- a/crates/llm-chain-llama/src/options.rs +++ b/crates/llm-chain-llama/src/options.rs @@ -9,10 +9,10 @@ use llm_chain::{ use std::collections::HashMap; use crate::context::ContextParams; +use crate::model::ModelParams; /// Represents a concrete call to the LLM model, with all the parameters specified, and no implicit behavior. pub struct LlamaInvocation { - pub(crate) n_threads: i32, pub(crate) n_tok_predict: usize, pub(crate) logit_bias: HashMap, pub(crate) top_k: i32, @@ -49,7 +49,6 @@ impl LlamaInvocation { opt: OptionsCascade, prompt: &Prompt, ) -> Result { - let n_threads = opt_extract!(opt, n_threads, NThreads)?; let n_tok_predict = opt_extract!(opt, n_tok_predict, MaxTokens)?; let top_k = opt_extract!(opt, top_k, TopK)?; let top_p = opt_extract!(opt, top_p, TopP)?; @@ -70,7 +69,6 @@ impl LlamaInvocation { let logit_bias = HashMap::::new(); // token_bias.as_i32_f32_hashmap()?; Ok(LlamaInvocation { - n_threads: *n_threads as i32, n_tok_predict: *n_tok_predict, logit_bias, top_k: *top_k, @@ -97,6 +95,7 @@ lazy_static! { // ModelType: "llama", // not used NThreads: 1_usize, MaxTokens: 0_usize, + MaxBatchSize: 512_usize, MaxContextSize: 2048_usize, TopK: 40_i32, TopP: 0.95, @@ -111,18 +110,58 @@ lazy_static! { MirostatTau: 5.0, MirostatEta: 0.1, PenalizeNl: true, - StopSequence: vec!["\n\n".to_string()] + StopSequence: vec!["\n\n".to_string()], + NGpuLayers: 0_i32, + MainGpu: 0_i32, + TensorSplit: Vec::new(), + VocabOnly: false, + UseMmap: true, + UseMlock: false ); } pub(crate) fn get_executor_initial_opts( opt: &OptionsCascade, -) -> Result<(String, ContextParams), ExecutorCreationError> { +) -> Result<(String, ModelParams, ContextParams), ExecutorCreationError> { let model = opt_extract!(opt, model, Model)?; - let max_context_size = opt_extract!(opt, max_context_size, MaxContextSize)?; + + let mut mp = ModelParams::new(); + if let Some(Opt::NGpuLayers(value)) = opt.get(OptDiscriminants::NGpuLayers) { + mp.n_gpu_layers = *value; + } + if let Some(Opt::MainGpu(value)) = opt.get(OptDiscriminants::MainGpu) { + mp.main_gpu = *value; + } + if let Some(Opt::TensorSplit(values)) = opt.get(OptDiscriminants::TensorSplit) { + mp.tensor_split = values.clone(); + } + // Currently, the setting of vocab_only is not allowed as it will cause + // a crash when using the llama executor which needs to have wieghts loaded + // in order to work. + mp.vocab_only = false; + + if let Some(Opt::UseMmap(value)) = opt.get(OptDiscriminants::UseMmap) { + mp.use_mmap = *value; + } + if let Some(Opt::UseMlock(value)) = opt.get(OptDiscriminants::UseMlock) { + mp.use_mlock = *value; + } let mut cp = ContextParams::new(); - cp.n_ctx = *max_context_size as i32; + if let Some(Opt::NThreads(value)) = opt.get(OptDiscriminants::NThreads) { + cp.n_threads = *value as u32; + } + + let max_context_size = opt_extract!(opt, max_context_size, MaxContextSize)?; + cp.n_ctx = *max_context_size as u32; + + let n_batch = opt_extract!(opt, nbatch, MaxBatchSize)?; + cp.n_batch = *n_batch as u32; + if max_context_size < n_batch { + return Err(ExecutorCreationError::InvalidValue( + "MaxBatchSize must be less than or equal to MaxContextSize".to_string(), + )); + } - Ok((model.to_path(), cp)) + Ok((model.to_path(), mp, cp)) } diff --git a/crates/llm-chain-llama/src/tokenizer.rs b/crates/llm-chain-llama/src/tokenizer.rs index 6e12ce97..373ade31 100644 --- a/crates/llm-chain-llama/src/tokenizer.rs +++ b/crates/llm-chain-llama/src/tokenizer.rs @@ -3,9 +3,7 @@ use llm_chain::prompt::Data; use std::ffi::{CStr, CString}; use std::os::raw::c_char; -use llm_chain_llama_sys::{ - llama_token, llama_token_eos as inner_eos, llama_token_to_str, llama_tokenize, -}; +use llm_chain_llama_sys::{llama_token, llama_token_get_text, llama_tokenize}; use crate::context::LLamaContext; @@ -25,17 +23,13 @@ fn to_cstring(s: &str) -> CString { /// /// A Rust String representation of the given llama_token. fn to_output(context: &LLamaContext, token: i32) -> String { - let c_ptr = unsafe { llama_token_to_str(**context, token) }; + let c_ptr = unsafe { llama_token_get_text(context.model, token) }; let native_string = unsafe { CStr::from_ptr(c_ptr) } .to_string_lossy() .into_owned(); native_string } -pub fn llama_token_eos() -> i32 { - unsafe { inner_eos() } -} - /// Helper function to tokenize text using the provided LLamaContext and add_bos option. /// /// # Arguments @@ -47,21 +41,42 @@ pub fn llama_token_eos() -> i32 { /// # Returns /// /// A Vec of llama_tokens representing the tokenized input. -pub(crate) fn tokenize(context: &LLamaContext, text: &str, add_bos: bool) -> Vec { +pub(crate) fn tokenize( + context: &LLamaContext, + text: &str, + add_bos: bool, + special: bool, +) -> Vec { let mut res = Vec::with_capacity(text.len() + add_bos as usize); let c_text = to_cstring(text); - - let n = unsafe { + let n_tokens = unsafe { llama_tokenize( - **context, + context.model, c_text.as_ptr() as *const c_char, + c_text.to_bytes().len() as i32, res.as_mut_ptr(), res.capacity() as i32, add_bos, + special, ) }; - assert!(n >= 0); - unsafe { res.set_len(n as usize) }; + if n_tokens < 0 { + res.resize(-n_tokens as usize, 0); + let new_n_tokens = unsafe { + llama_tokenize( + context.model, + c_text.as_ptr() as *const c_char, + c_text.to_bytes().len() as i32, + res.as_mut_ptr(), + res.capacity() as i32, + add_bos, + special, + ) + }; + assert!(new_n_tokens == -n_tokens); + } else { + unsafe { res.set_len(n_tokens as usize) }; + } res } diff --git a/crates/llm-chain/src/options.rs b/crates/llm-chain/src/options.rs index ee180638..bdef2cb4 100644 --- a/crates/llm-chain/src/options.rs +++ b/crates/llm-chain/src/options.rs @@ -341,6 +341,9 @@ pub enum Opt { MaxTokens(usize), /// The maximum context size of the model. MaxContextSize(usize), + /// The maximum batch size of the model. + /// This is used by llama models. + MaxBatchSize(usize), /// The sequences that, when encountered, will cause the model to stop generating further tokens. /// OpenAI models allow up to four stop sequences. StopSequence(Vec), @@ -394,6 +397,19 @@ pub enum Opt { User(String), /// The type of the model. ModelType(String), + + // The number of layers to be stored in GPU VRAM for llm-chain-llama. + NGpuLayers(i32), + // The GPU that should be used for scratch and small tensors for llm-chain-llama. + MainGpu(i32), + // How the layers should be split accross the available GPUs for llm-chain-llama. + TensorSplit(Vec), + // Only load the vocabulary for llm-chain-llama, no weights will be loaded. + VocabOnly(bool), + // Use memory mapped files for llm-chain-llama where possible. + UseMmap(bool), + // Force the system to keep the model in memory for llm-chain-llama. + UseMlock(bool), } // Helper function to extract environment variables diff --git a/crates/llm-chain/src/traits.rs b/crates/llm-chain/src/traits.rs index b98b5c36..65bc4284 100644 --- a/crates/llm-chain/src/traits.rs +++ b/crates/llm-chain/src/traits.rs @@ -27,6 +27,8 @@ pub enum ExecutorCreationError { InnerError(#[from] Box), #[error("Field must be set: {0}")] FieldRequiredError(String), + #[error("Invalid value. {0}")] + InvalidValue(String), } #[derive(thiserror::Error, Debug)]