Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Commit

Permalink
Merge branch 'develop' in
Browse files Browse the repository at this point in the history
  • Loading branch information
philpax committed Nov 12, 2023
1 parent 99a9fb4 commit 4ca066c
Show file tree
Hide file tree
Showing 41 changed files with 1,974 additions and 596 deletions.
102 changes: 46 additions & 56 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ members = [
"crates/llm",
"crates/llm-base",
"crates/models/*",
"binaries/*"
"binaries/*",
]
resolver = "2"
default-members = ["binaries/llm-cli", "crates/llm"]
Expand All @@ -27,12 +27,12 @@ anyhow = "1.0"
rustyline = { version = "11.0.0", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0" }
spinoff = { version = "0.7.0", default-features = false, features = ["dots2"] }
spinoff = { version = "0.8.0", default-features = false, features = ["dots2"] }
clap = { version = "4.1.8", features = ["derive"] }
memmap2 = "0.5.10"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing = { version = "0.1", features = ["log"] }
llm-samplers = "=0.0.6"
llm-samplers = "=0.0.7"

# Config for 'cargo dist'
[workspace.metadata.dist]
Expand All @@ -45,7 +45,12 @@ ci = ["github"]
# The installers to generate for each app
installers = ["shell", "powershell"]
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"]
targets = [
"x86_64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"aarch64-apple-darwin",
]

# The profile that 'cargo dist' will build with
[profile.dist]
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ Absolutely! Please see the [contributing guide](./doc/CONTRIBUTING.md).
inference API on your local machine using `llm`.
- [secondbrain](https://github.com/juliooa/secondbrain): Desktop app to download and run LLMs locally in your computer using `llm`.
- [floneum](https://floneum.com/): A graph editor for local AI workflows.
- [poly](https://github.com/pixelspark/poly): A versatile LLM serving back-end with tasks, streaming completion, memory retrieval, and more.

#### Libraries

Expand Down
5 changes: 5 additions & 0 deletions binaries/generate-ggml-bindings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ fn generate_main(ggml_path: &Path, src_path: &Path) {
.allowlist_file(r".*ggml.h")
.header(ggml_path.join("k_quants.h").to_string_lossy())
.allowlist_file(r".*k_quants.h")
.header(ggml_path.join("ggml-alloc.h").to_string_lossy())
.allowlist_file(r".*ggml-alloc.h")
// Suppress some warnings
.raw_line("#![allow(non_upper_case_globals)]")
.raw_line("#![allow(non_camel_case_types)]")
Expand Down Expand Up @@ -88,6 +90,9 @@ fn generate_metal(ggml_path: &Path, src_path: &Path) {
generate_extra("metal", ggml_path, src_path, |b| {
b.header(ggml_path.join("ggml-metal.h").to_string_lossy())
.allowlist_file(r".*ggml-metal\.h")
.raw_line("use super::ggml_tensor;")
.raw_line("use super::ggml_log_callback;")
.raw_line("use super::ggml_cgraph;")
});
}

Expand Down
15 changes: 12 additions & 3 deletions binaries/llm-cli/src/cli_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ pub struct Generate {
/// top_p - The probability for the top tokens are added until the result is greater or equal to P and at least min_keep tokens have been seen.
/// p(0.95): The cumulative probability after which no more tokens are kept for sampling.
/// min_keep(1): Minimum tokens to keep. Setting this to 0 is not recommended.
///
/// top_a (default: disabled) - This sampler prunes tokens that don't meet a threshold based on the most probable token. The formula is `a1 * pow(max_prob, a2)`. See https://github.com/BlinkDL/RWKV-LM#the-top-a-sampling-method for more information.
/// a1(0.0): Threshold scale. A reasonable value is 0.2. Setting either a1 or a2 to 0 disables the sampler.
/// a2(0.0): Threshold power. A reasonable value is 2.
/// min_keep(1): Minimum tokens to keep. Setting this to 0 is not recommended.
///
/// min_p (default: disabled) - This sampler prunes tokens that don't meet a certain percentage of the most probable token. For example if `p` is `0.05` then after `min_keep` is satisfied, other tokens must be at least 5% of the most probable token. See https://github.com/ggerganov/llama.cpp/issues/3483 for more information.
/// p(0.0): Probability threshold. 0.05 to 0.2 are good starting values to try. Setting this to 0 disables the sampler.
/// min_keep(1): Minimum tokens to keep. Setting this to 0 is not recommended.
#[arg(long = "sampler", short = 's', verbatim_doc_comment)]
pub sampler_options: Vec<String>,

Expand Down Expand Up @@ -533,7 +542,7 @@ impl ModelLoad {
let tokenizer_source = match self.model_and_tokenizer.to_source() {
Ok(vs) => vs,
Err(err) => {
if let Some(sp) = sp.take() {
if let Some(mut sp) = sp.take() {
sp.fail(&format!("Failed to load tokenizer: {}", err));
}
return Err(err);
Expand Down Expand Up @@ -586,7 +595,7 @@ impl ModelLoad {
file_size,
tensor_count,
} => {
if let Some(sp) = sp.take() {
if let Some(mut sp) = sp.take() {
sp.success(&format!(
"Loaded {tensor_count} tensors ({}) after {}ms",
bytesize::to_string(file_size, false),
Expand All @@ -601,7 +610,7 @@ impl ModelLoad {
if model.is_err() {
// If we've failed at loading the model, we probably haven't stopped the spinner yet.
// Cancel it now if needed.
if let Some(sp) = sp {
if let Some(mut sp) = sp {
sp.fail("Failed to load model")
}
}
Expand Down
5 changes: 2 additions & 3 deletions binaries/llm-cli/src/interactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ fn feed_prompt_with_spinner(
prompt.insert(0, '\n');
}

let sp = spinoff::Spinner::new(spinoff::spinners::Dots2, "".to_string(), None);
let mut sp = spinoff::Spinner::new(spinoff::spinners::Dots2, "".to_string(), None);
let result = session.feed_prompt(
model,
&prompt,
Expand All @@ -165,8 +165,7 @@ fn session_ends_with_newline(session: &llm::InferenceSession) -> bool {
session
.decoded_tokens()
.last()
.map(|t| *t == b'\n')
.unwrap_or(true)
.map_or(true, |t| *t == b'\n')
}

fn readline_loop(mut body: impl FnMut(String) -> eyre::Result<()>) -> eyre::Result<()> {
Expand Down
Loading

0 comments on commit 4ca066c

Please sign in to comment.