Skip to content

Commit

Permalink
Add default
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaddair committed Nov 19, 2024
1 parent 7ca481b commit 0d3778d
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,8 @@ struct Args {
speculative_tokens: Option<usize>,

// The maximum batch size past which speculative decoding is disabled.
#[clap(long, env)]
speculation_max_batch_size: Option<usize>,
#[clap(default_value = "32", long, env)]
speculation_max_batch_size: usize,

/// The list of adapter ids to preload during initialization (to avoid cold start times).
#[clap(long, env)]
Expand Down Expand Up @@ -642,7 +642,7 @@ fn shard_manager(
quantize: Option<Quantization>,
compile: bool,
speculative_tokens: Option<usize>,
speculation_max_batch_size: Option<usize>,
speculation_max_batch_size: usize,
preloaded_adapter_ids: Vec<String>,
preloaded_adapter_source: Option<String>,
predibase_api_token: Option<String>,
Expand Down Expand Up @@ -808,12 +808,10 @@ fn shard_manager(
}

// Speculative decoding max batch size
if let Some(speculation_max_batch_size) = speculation_max_batch_size {
envs.push((
"LORAX_SPECULATION_MAX_BATCH_SIZE".into(),
speculation_max_batch_size.to_string().into(),
));
}
envs.push((
"LORAX_SPECULATION_MAX_BATCH_SIZE".into(),
speculation_max_batch_size.to_string().into(),
));

// Backend
if backend == Backend::FlashInfer {
Expand Down

0 comments on commit 0d3778d

Please sign in to comment.