diff --git a/router/src/queue.rs b/router/src/queue.rs index 2a51d2779..78eaaff06 100644 --- a/router/src/queue.rs +++ b/router/src/queue.rs @@ -9,7 +9,7 @@ use std::{ use tokio::{sync::Notify, time::Instant}; use tracing::info_span; -use crate::{adapter::Adapter, batch::Entry, infer::InferStreamResponse}; +use crate::{adapter::Adapter, batch::Entry}; #[derive(Debug, PartialEq)] pub(crate) enum AdapterStatus { @@ -214,7 +214,6 @@ impl AdapterQueuesState { // ensure that append completes before sending batcher message let queue = self.queue_map.get_mut(&adapter).unwrap(); - let id = self.next_id; queue.append(self.next_id, entry); self.next_id += 1; diff --git a/server/lorax_server/utils/layers.py b/server/lorax_server/utils/layers.py index 8b43d89b7..d25be2128 100644 --- a/server/lorax_server/utils/layers.py +++ b/server/lorax_server/utils/layers.py @@ -78,7 +78,7 @@ def forward_layer_type( # Triton Punica kernels if ( - adapter_data.punica_wrapper.enabled + adapter_data.punica_wrapper is not None and adapter_data.punica_wrapper.enabled and input.shape[0] <= adapter_data.punica_wrapper.max_batch_size and can_vectorize ):