From 41c518ff8b81ebe2c70667d30e7289762c62144b Mon Sep 17 00:00:00 2001
From: Oleg Mosalov <oleg@krai.ai>
Date: Tue, 17 Dec 2024 09:58:53 +0100
Subject: [PATCH] Renamed add_input to add_inputs in punica_cpu.py.

Signed-off-by: Oleg Mosalov <oleg@krai.ai>
---
 vllm/lora/punica_wrapper/punica_cpu.py | 34 +++++++++++++-------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/vllm/lora/punica_wrapper/punica_cpu.py b/vllm/lora/punica_wrapper/punica_cpu.py
index d6f4fa36561d5..4235e7bf4485b 100644
--- a/vllm/lora/punica_wrapper/punica_cpu.py
+++ b/vllm/lora/punica_wrapper/punica_cpu.py
@@ -54,7 +54,7 @@ def _expand_prefill(
         y: torch.Tensor,
         x: torch.Tensor,
         w_t_all: torch.Tensor,
-        add_input: bool,
+        add_inputs: bool,
     ):
         #No LoRA request, so return directly
         if self.no_lora:
@@ -64,7 +64,7 @@ def _expand_prefill(
             w_t_all,
             y,
             *self.prefill_metadata,
-            add_input,
+            add_inputs,
         )
 
     def _expand_decode(
@@ -72,9 +72,9 @@ def _expand_decode(
         y: torch.Tensor,
         x: torch.Tensor,
         w_t_all: torch.Tensor,
-        add_input: bool,
+        add_inputs: bool,
     ):
-        bgmv_expand(x, w_t_all, y, self.token_lora_indices, add_input)
+        bgmv_expand(x, w_t_all, y, self.token_lora_indices, add_inputs)
 
     def _expand_slice_prefill(
         self,
@@ -83,7 +83,7 @@ def _expand_slice_prefill(
         w_t_all: torch.Tensor,
         y_offset: int,
         y_slice_size: int,
-        add_input: bool,
+        add_inputs: bool,
     ):
         #No LoRA request, so return directly
         if self.no_lora:
@@ -95,7 +95,7 @@ def _expand_slice_prefill(
             *self.prefill_metadata,
             y_offset,
             y_slice_size,
-            add_input,
+            add_inputs,
         )
 
     def _expand_slice_decode(
@@ -105,10 +105,10 @@ def _expand_slice_decode(
         w_t_all: torch.Tensor,
         y_offset: int,
         y_slice_size: int,
-        add_input: bool,
+        add_inputs: bool,
     ):
         bgmv_expand_slice(x, w_t_all, y, self.token_lora_indices, y_offset,
-                          y_slice_size, add_input)
+                          y_slice_size, add_inputs)
 
     def _apply_expand(
         self,
@@ -117,7 +117,7 @@ def _apply_expand(
         w_t_all: torch.Tensor,
         y_offset: int,
         y_slice_size: int,
-        add_input: bool = True,
+        add_inputs: bool = True,
     ):
         """
         Perform the ` y[:,y_offset:y_offset+y_slice_size]+=x@w_t_all` 
@@ -128,7 +128,7 @@ def _apply_expand(
         expand_slice_fun: Callable = (self._expand_slice_prefill
                                       if self.is_prefill else
                                       self._expand_slice_decode)
-        expand_slice_fun(y, x, w_t_all, y_offset, y_slice_size, add_input)
+        expand_slice_fun(y, x, w_t_all, y_offset, y_slice_size, add_inputs)
 
     def _apply_shrink(self, y: torch.Tensor, x: torch.Tensor,
                       w_t_all: torch.Tensor, scale: float):
@@ -181,7 +181,7 @@ def add_expand(self,
                    lora_bias_stacked: Optional[Tuple[torch.Tensor, ...]],
                    output_slices: Tuple[int, ...],
                    offset_start: int = 0,
-                   add_input=True,
+                   add_inputs=True,
                    **kwargs) -> None:
         """
         Performs GEMM and bias addition for multiple slices of lora_b.
@@ -200,7 +200,7 @@ def add_expand(self,
             lora_bias_stacked (Optional[Tuple[torch.Tensor, ...]]): 
                 bias's weight
             output_slices (Tuple[int, ...]): Every slice's size
-            add_input (bool):  Defaults to True.
+            add_inputs (bool):  Defaults to True.
         """
         y_org = y
         y = y.view(-1, y.shape[-1])
@@ -215,7 +215,7 @@ def add_expand(self,
                 lora_b_stacked[slice_idx],
                 offset_left,
                 output_slices[slice_idx],
-                add_input=add_input,
+                add_inputs=add_inputs,
             )
             offset_left += output_slices[slice_idx]
         y = y.view_as(y_org)
@@ -224,7 +224,7 @@ def add_lora_embedding(self,
                            y: torch.Tensor,
                            x: torch.Tensor,
                            lora_b_stacked: torch.Tensor,
-                           add_input: bool = True,
+                           add_inputs: bool = True,
                            **kwargs) -> None:
         """
         Applies lora  specifically for VocabParallelEmbeddingWithLoRA.
@@ -236,13 +236,13 @@ def add_lora_embedding(self,
             y (torch.Tensor): Output tensor.
             x (torch.Tensor): Input tensor.
             lora_b_stacked (torch.Tensor): lora_b's weights.
-            add_input (bool): Default to True.
+            add_inputs (bool): Default to True.
         """
 
         # Embedding layer only need expand op
         expand_fun: Callable = (self._expand_prefill
                                 if self.is_prefill else self._expand_decode)
-        expand_fun(y, x, lora_b_stacked, add_input)
+        expand_fun(y, x, lora_b_stacked, add_inputs)
 
     def add_lora_linear(self,
                         y: torch.Tensor,
@@ -298,7 +298,7 @@ def add_lora_linear(self,
                         lora_b_stacked,
                         None,
                         output_slices,
-                        add_input=True,
+                        add_inputs=True,
                         **kwargs)
 
     def add_lora_logits(self,