From 41c518ff8b81ebe2c70667d30e7289762c62144b Mon Sep 17 00:00:00 2001 From: Oleg Mosalov Date: Tue, 17 Dec 2024 09:58:53 +0100 Subject: [PATCH] Renamed add_input to add_inputs in punica_cpu.py. Signed-off-by: Oleg Mosalov --- vllm/lora/punica_wrapper/punica_cpu.py | 34 +++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/vllm/lora/punica_wrapper/punica_cpu.py b/vllm/lora/punica_wrapper/punica_cpu.py index d6f4fa36561d5..4235e7bf4485b 100644 --- a/vllm/lora/punica_wrapper/punica_cpu.py +++ b/vllm/lora/punica_wrapper/punica_cpu.py @@ -54,7 +54,7 @@ def _expand_prefill( y: torch.Tensor, x: torch.Tensor, w_t_all: torch.Tensor, - add_input: bool, + add_inputs: bool, ): #No LoRA request, so return directly if self.no_lora: @@ -64,7 +64,7 @@ def _expand_prefill( w_t_all, y, *self.prefill_metadata, - add_input, + add_inputs, ) def _expand_decode( @@ -72,9 +72,9 @@ def _expand_decode( y: torch.Tensor, x: torch.Tensor, w_t_all: torch.Tensor, - add_input: bool, + add_inputs: bool, ): - bgmv_expand(x, w_t_all, y, self.token_lora_indices, add_input) + bgmv_expand(x, w_t_all, y, self.token_lora_indices, add_inputs) def _expand_slice_prefill( self, @@ -83,7 +83,7 @@ def _expand_slice_prefill( w_t_all: torch.Tensor, y_offset: int, y_slice_size: int, - add_input: bool, + add_inputs: bool, ): #No LoRA request, so return directly if self.no_lora: @@ -95,7 +95,7 @@ def _expand_slice_prefill( *self.prefill_metadata, y_offset, y_slice_size, - add_input, + add_inputs, ) def _expand_slice_decode( @@ -105,10 +105,10 @@ def _expand_slice_decode( w_t_all: torch.Tensor, y_offset: int, y_slice_size: int, - add_input: bool, + add_inputs: bool, ): bgmv_expand_slice(x, w_t_all, y, self.token_lora_indices, y_offset, - y_slice_size, add_input) + y_slice_size, add_inputs) def _apply_expand( self, @@ -117,7 +117,7 @@ def _apply_expand( w_t_all: torch.Tensor, y_offset: int, y_slice_size: int, - add_input: bool = True, + add_inputs: bool = True, ): """ Perform the ` y[:,y_offset:y_offset+y_slice_size]+=x@w_t_all` @@ -128,7 +128,7 @@ def _apply_expand( expand_slice_fun: Callable = (self._expand_slice_prefill if self.is_prefill else self._expand_slice_decode) - expand_slice_fun(y, x, w_t_all, y_offset, y_slice_size, add_input) + expand_slice_fun(y, x, w_t_all, y_offset, y_slice_size, add_inputs) def _apply_shrink(self, y: torch.Tensor, x: torch.Tensor, w_t_all: torch.Tensor, scale: float): @@ -181,7 +181,7 @@ def add_expand(self, lora_bias_stacked: Optional[Tuple[torch.Tensor, ...]], output_slices: Tuple[int, ...], offset_start: int = 0, - add_input=True, + add_inputs=True, **kwargs) -> None: """ Performs GEMM and bias addition for multiple slices of lora_b. @@ -200,7 +200,7 @@ def add_expand(self, lora_bias_stacked (Optional[Tuple[torch.Tensor, ...]]): bias's weight output_slices (Tuple[int, ...]): Every slice's size - add_input (bool): Defaults to True. + add_inputs (bool): Defaults to True. """ y_org = y y = y.view(-1, y.shape[-1]) @@ -215,7 +215,7 @@ def add_expand(self, lora_b_stacked[slice_idx], offset_left, output_slices[slice_idx], - add_input=add_input, + add_inputs=add_inputs, ) offset_left += output_slices[slice_idx] y = y.view_as(y_org) @@ -224,7 +224,7 @@ def add_lora_embedding(self, y: torch.Tensor, x: torch.Tensor, lora_b_stacked: torch.Tensor, - add_input: bool = True, + add_inputs: bool = True, **kwargs) -> None: """ Applies lora specifically for VocabParallelEmbeddingWithLoRA. @@ -236,13 +236,13 @@ def add_lora_embedding(self, y (torch.Tensor): Output tensor. x (torch.Tensor): Input tensor. lora_b_stacked (torch.Tensor): lora_b's weights. - add_input (bool): Default to True. + add_inputs (bool): Default to True. """ # Embedding layer only need expand op expand_fun: Callable = (self._expand_prefill if self.is_prefill else self._expand_decode) - expand_fun(y, x, lora_b_stacked, add_input) + expand_fun(y, x, lora_b_stacked, add_inputs) def add_lora_linear(self, y: torch.Tensor, @@ -298,7 +298,7 @@ def add_lora_linear(self, lora_b_stacked, None, output_slices, - add_input=True, + add_inputs=True, **kwargs) def add_lora_logits(self,