remove unused args and create utility fuction

lm-sys · Nov 11, 2023 · 86039cf · 86039cf
1 parent 8173ee3
commit 86039cf
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 49 deletions.
diff --git a/fastchat/conversation.py b/fastchat/conversation.py
@@ -219,35 +219,39 @@ def get_prompt(self) -> str:
         else:
             raise ValueError(f"Invalid style: {self.sep_style}")
 
-    def get_images(self, return_pil=False):
+    def extract_base64encoded_image_from_message(self, message):
+        """Given a message with an input tuple of (str, PIL.image), we return the base64 encoded image string."""
+        import base64
+        from io import BytesIO
+        from PIL import Image
+
+        msg, image = msg
+        max_hw, min_hw = max(image.size), min(image.size)
+        aspect_ratio = max_hw / min_hw
+        max_len, min_len = 800, 400
+        shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
+        longest_edge = int(shortest_edge * aspect_ratio)
+        W, H = image.size
+        if longest_edge != max(image.size):
+            if H > W:
+                H, W = longest_edge, shortest_edge
+            else:
+                H, W = shortest_edge, longest_edge
+            image = image.resize((W, H))
+
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+
+        return img_b64_str
+
+    def get_images(self):
         images = []
         for i, (role, msg) in enumerate(self.messages[self.offset :]):
             if i % 2 == 0:
                 if type(msg) is tuple:
-                    import base64
-                    from io import BytesIO
-                    from PIL import Image
+                    images.append(self.extract_base64encoded_image_from_message(msg))
 
-                    msg, image = msg
-                    max_hw, min_hw = max(image.size), min(image.size)
-                    aspect_ratio = max_hw / min_hw
-                    max_len, min_len = 800, 400
-                    shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
-                    longest_edge = int(shortest_edge * aspect_ratio)
-                    W, H = image.size
-                    if longest_edge != max(image.size):
-                        if H > W:
-                            H, W = longest_edge, shortest_edge
-                        else:
-                            H, W = shortest_edge, longest_edge
-                        image = image.resize((W, H))
-                    if return_pil:
-                        images.append(image)
-                    else:
-                        buffered = BytesIO()
-                        image.save(buffered, format="PNG")
-                        img_b64_str = base64.b64encode(buffered.getvalue()).decode()
-                        images.append(img_b64_str)
         return images
 
     def set_system_message(self, system_message: str):
@@ -272,24 +276,8 @@ def to_gradio_chatbot(self):
         for i, (role, msg) in enumerate(self.messages[self.offset :]):
             if i % 2 == 0:
                 if type(msg) is tuple:
-                    import base64
-                    from io import BytesIO
-
                     msg, image = msg
-                    max_hw, min_hw = max(image.size), min(image.size)
-                    aspect_ratio = max_hw / min_hw
-                    max_len, min_len = 800, 400
-                    shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
-                    longest_edge = int(shortest_edge * aspect_ratio)
-                    W, H = image.size
-                    if H > W:
-                        H, W = longest_edge, shortest_edge
-                    else:
-                        H, W = shortest_edge, longest_edge
-                    image = image.resize((W, H))
-                    buffered = BytesIO()
-                    image.save(buffered, format="JPEG")
-                    img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                    img_b64_str = self.extract_base64encoded_image_from_message(msg)
                     img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
                     msg = img_str + msg.replace("<image>", "").strip()
                     ret.append([msg, None])

diff --git a/fastchat/serve/gradio_web_server_vision.py b/fastchat/serve/gradio_web_server_vision.py
@@ -124,7 +124,7 @@ def add_text(state, model_selector, text, image, request: gr.Request):
                 no_change_btn,
             ) * 5
 
-    if image is not None and len(state.conv.get_images(return_pil=True)) > 0:
+    if image is not None and len(state.conv.get_images()) > 0:
         # reset convo with new image
         state.conv = get_conversation_template(state.model_name)
 

diff --git a/fastchat/serve/multimodal_model_worker.py b/fastchat/serve/multimodal_model_worker.py
@@ -51,7 +51,6 @@ def __init__(
         device: str,
         num_gpus: int,
         max_gpu_memory: str,
-        multimodal: bool,
         dtype: Optional[torch.dtype] = None,
         load_8bit: bool = False,
         load_4bit: bool = False,
@@ -75,7 +74,7 @@ def __init__(
         self.controller_addr = controller_addr
         self.worker_addr = worker_addr
         self.worker_id = worker_id
-        self.multimodal = multimodal
+        self.multimodal = True
 
         logger.info(f"Loading the model {self.model_names} on worker {worker_id} ...")
 
@@ -157,14 +156,11 @@ def create_multimodal_model_worker():
     parser.add_argument(
         "--controller-address", type=str, default="http://localhost:21001"
     )
-    # FOR PEFT (not supported yet): parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--embed-in-truncate", action="store_true")
     parser.add_argument(
         "--model-names",
         type=lambda s: s.split(","),
         help="Optional display comma separated names",
     )
-    parser.add_argument("--multimodal", action="store_true", default=True)
     parser.add_argument(
         "--conv-template", type=str, default=None, help="Conversation prompt template."
     )
@@ -196,7 +192,6 @@ def create_multimodal_model_worker():
         device=args.device,
         num_gpus=args.num_gpus,
         max_gpu_memory=args.max_gpu_memory,
-        multimodal=args.multimodal,
         dtype=str_to_torch_dtype(args.dtype),
         load_8bit=args.load_8bit,
         cpu_offloading=args.cpu_offloading,

diff --git a/fastchat/utils.py b/fastchat/utils.py
@@ -13,7 +13,6 @@
 import warnings
 
 import requests
-from PIL import Image
 
 from fastchat.constants import LOGDIR
 
@@ -336,6 +335,8 @@ def str_to_torch_dtype(dtype: str):
 
 
 def load_image(image_file):
+    from PIL import Image
+
     if image_file.startswith("http://") or image_file.startswith("https://"):
         response = requests.get(image_file)
         image = Image.open(BytesIO(response.content)).convert("RGB")