diff --git a/crates/llm-chain-llama/src/options.rs b/crates/llm-chain-llama/src/options.rs index 03510aaa..69db239f 100644 --- a/crates/llm-chain-llama/src/options.rs +++ b/crates/llm-chain-llama/src/options.rs @@ -113,7 +113,13 @@ lazy_static! { MirostatTau: 5.0, MirostatEta: 0.1, PenalizeNl: true, - StopSequence: vec!["\\n\\n".to_string()] + StopSequence: vec!["\\n\\n".to_string()], + NGpuLayers: 0_i32, + MainGpu: 0_i32, + TensorSplit: Vec::new(), + VocabOnly: false, + UseMmap: true, + UseMlock: false ); } @@ -122,8 +128,28 @@ pub(crate) fn get_executor_initial_opts( ) -> Result<(String, ModelParams, ContextParams), ExecutorCreationError> { let model = opt_extract!(opt, model, Model)?; - let mp = ModelParams::new(); - // TODO(danbev) - add setting of model params + let mut mp = ModelParams::new(); + if let Some(Opt::NGpuLayers(value)) = opt.get(OptDiscriminants::NGpuLayers) { + mp.n_gpu_layers = *value; + } + if let Some(Opt::MainGpu(value)) = opt.get(OptDiscriminants::MainGpu) { + mp.main_gpu = *value; + } + if let Some(Opt::TensorSplit(values)) = opt.get(OptDiscriminants::TensorSplit) { + mp.tensor_split = values.clone(); + } + // Currently, the setting of vocab_only is not allowed as it will cause + // a crash when using the llama executor which needs to have wieghts loaded + // in order to work. + mp.vocab_only = false; + + if let Some(Opt::UseMmap(value)) = opt.get(OptDiscriminants::UseMmap) { + mp.use_mmap = *value; + } + if let Some(Opt::UseMlock(value)) = opt.get(OptDiscriminants::UseMlock) { + mp.use_mlock = *value; + } + let mut cp = ContextParams::new(); let max_context_size = opt_extract!(opt, max_context_size, MaxContextSize)?; cp.n_ctx = *max_context_size as u32;