InternLM · grimoire · Jan 15, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 17, 2025
diff --git a/lmdeploy/messages.py b/lmdeploy/messages.py
@@ -248,6 +248,8 @@ class PytorchEngineConfig:
             The `auto` option will use FP16 precision for FP32 and FP16
             models, and BF16 precision for BF16 models.
         tp (int): Tensor Parallelism. default 1.
+        dp (int): Data Parallelism. default 1.
+        dp_rank (int): rank of dp.
         session_len (int): Max session length. Default None.
         max_batch_size (int): Max batch size. If it is not specified,
             the engine will automatically set it according to the device
@@ -280,9 +282,13 @@ class PytorchEngineConfig:
             bit, set it to 4 or 8, respectively
         distributed_executor_backend (str): backend of distributed backend,
             options: ['uni', 'mp', 'ray']
+        should_execute_dummy_batch (str): execute dummy batch when if dp rank
+            has no request.
     """
     dtype: str = 'auto'
     tp: int = 1
+    dp: int = 1
+    dp_rank: int = 0
     session_len: int = None
     max_batch_size: int = None
     cache_max_entry_count: float = 0.8
@@ -301,11 +307,13 @@ class PytorchEngineConfig:
     revision: str = None
     quant_policy: Literal[0, 4, 8] = 0
     distributed_executor_backend: str = None
+    should_execute_dummy_batch: bool = False
 
     def __post_init__(self):
         """Check input validation."""
         assert self.dtype in ['auto', 'float16', 'bfloat16']
         assert self.tp >= 1, 'invalid tp'
+        assert self.dp >= 1, 'invalid dp'
         assert 0 < self.cache_max_entry_count < 1, \
             'invalid cache_max_entry_count'
         assert self.num_cpu_blocks >= 0, 'invalid num_cpu_blocks'

diff --git a/lmdeploy/pytorch/backends/blockedf8_modules.py b/lmdeploy/pytorch/backends/blockedf8_modules.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import List, Optional
 
 import torch
 
@@ -18,7 +18,9 @@ def forward(self,
                 weight: torch.Tensor,
                 scale: torch.Tensor,
                 bias: Optional[torch.Tensor] = None,
-                all_reduce: bool = False):
+                all_reduce: bool = False,
+                rank: int = 0,
+                scatter_size: List[int] = None):
         """forward."""
         raise NotImplementedError
 

diff --git a/lmdeploy/pytorch/backends/cuda/attention.py b/lmdeploy/pytorch/backends/cuda/attention.py
@@ -5,7 +5,7 @@
 
 import torch
 
-from lmdeploy.pytorch.distributed import get_world_rank
+from lmdeploy.pytorch.distributed import get_tp_world_rank
 
 from ..attention import AttentionBuilder, AttentionImpl, AttentionMetadata
 
@@ -69,7 +69,7 @@ def __init__(
         self.flash_attention_fwd = flash_attention_fwd
 
         # for alibi attention
-        world_size, rank = get_world_rank()
+        world_size, rank = get_tp_world_rank()
         self.alibi_head_offset = self.num_heads * rank
         self.alibi_num_heads = self.num_heads * world_size
 

diff --git a/lmdeploy/pytorch/backends/cuda/blockedf8_modules.py b/lmdeploy/pytorch/backends/cuda/blockedf8_modules.py
@@ -1,5 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Optional
+from typing import List, Optional
 
 import torch
 
@@ -9,6 +9,15 @@
 from ..blockedf8_modules import LinearBlockedF8Builder, LinearBlockedF8Impl
 
 
+def _reduce_scatter_input(out: torch.Tensor, rank: int, tp_sizes: List[int]):
+    """reduce scatter."""
+    outs = out.split(tp_sizes, -2)
+    out = outs[rank]
+    outs = list(outs)
+    dist.reduce_scatter(out, outs)
+    return out
+
+
 class TritonLinearBlockedF8Impl(LinearBlockedF8Impl):
     """triton linear blocked f8 implementation."""
 
@@ -23,7 +32,9 @@ def forward(self,
                 weight: torch.Tensor,
                 scale: torch.Tensor,
                 bias: Optional[torch.Tensor] = None,
-                all_reduce: bool = False):
+                all_reduce: bool = False,
+                rank: int = 0,
+                scatter_size: List[int] = None):
         """forward."""
         x_shape = x.shape
         x = x.flatten(0, -2)
@@ -34,7 +45,10 @@ def forward(self,
             out += bias
 
         if all_reduce:
-            dist.all_reduce(out)
+            if scatter_size is not None:
+                out = _reduce_scatter_input(out, rank, scatter_size)
+            else:
+                dist.all_reduce(out)
 
         out = out.unflatten(0, x_shape[:-1])
         return out

diff --git a/lmdeploy/pytorch/backends/default/linear.py b/lmdeploy/pytorch/backends/default/linear.py
@@ -1,5 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Optional
+from typing import List, Optional
 
 import torch
 import torch.nn.functional as F
@@ -9,14 +9,32 @@
 from ..linear import LinearBuilder, LinearImpl
 
 
+def _reduce_scatter_input(out: torch.Tensor, rank: int, tp_sizes: List[int]):
+    """reduce scatter."""
+    outs = out.split(tp_sizes, -2)
+    out = outs[rank]
+    outs = list(outs)
+    dist.reduce_scatter(out, outs)
+    return out
+
+
 class DefaultLinearImpl(LinearImpl):
     """Linear implementation api."""
 
-    def forward(self, x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, all_reduce: bool = False):
+    def forward(self,
+                x,
+                weight: torch.Tensor,
+                bias: Optional[torch.Tensor] = None,
+                all_reduce: bool = False,
+                rank: int = 0,
+                scatter_size: List[int] = None):
         """forward."""
         out = F.linear(x, weight, bias)
         if all_reduce:
-            dist.all_reduce(out)
+            if scatter_size is not None:
+                out = _reduce_scatter_input(out, rank, scatter_size)
+            else:
+                dist.all_reduce(out)
         return out
 
 

diff --git a/lmdeploy/pytorch/backends/dlinfer/linear.py b/lmdeploy/pytorch/backends/dlinfer/linear.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
-from typing import Optional
+from typing import List, Optional
 
 import torch
 
@@ -18,7 +18,13 @@ def update_weights(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = No
             weight = weight.data.t().contiguous()
         return weight, bias
 
-    def forward(self, x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, all_reduce: bool = False):
+    def forward(self,
+                x,
+                weight: torch.Tensor,
+                bias: Optional[torch.Tensor] = None,
+                all_reduce: bool = False,
+                rank: int = 0,
+                scatter_size: List[int] = None):
         """forward."""
         return linear(x, weight, bias, all_reduce)
 

diff --git a/lmdeploy/pytorch/backends/linear.py b/lmdeploy/pytorch/backends/linear.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import List, Optional
 
 import torch
 
@@ -13,7 +13,13 @@ def update_weights(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = No
         return weight, bias
 
     @abstractmethod
-    def forward(self, x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, all_reduce: bool = False):
+    def forward(self,
+                x,
+                weight: torch.Tensor,
+                bias: Optional[torch.Tensor] = None,
+                all_reduce: bool = False,
+                rank: int = 0,
+                scatter_size: List[int] = None):
         """forward."""
         raise NotImplementedError
 

diff --git a/lmdeploy/pytorch/check_env/dist.py b/lmdeploy/pytorch/check_env/dist.py
@@ -1,5 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
+from lmdeploy.pytorch.config import DistConfig
+
 from .base import BaseChecker
 
 
@@ -10,7 +12,8 @@ def __init__(self, tp: int, dp: int, distributed_executor_backend: str, device_t
         super().__init__(logger)
         self.tp = tp
         self.dp = dp
-        self.world_size = tp * dp
+        self.dist_config = DistConfig(dp=dp, tp=tp)
+        self.world_size = self.dist_config.world_size
         self.distributed_executor_backend = distributed_executor_backend
         self.device_type = device_type
 

diff --git a/lmdeploy/pytorch/config.py b/lmdeploy/pytorch/config.py
@@ -89,6 +89,35 @@ def __post_init__(self):
             self.enable_prefix_caching = False
 
 
+@dataclass
+class DistConfig:
+    dp: int = 1
+    tp: int = 1
+    ep: int = 1
+    dp_rank: int = 0
+    world_size: int = None
+    attn_config: 'DistConfig' = None
+
+    def __post_init__(self):
+        """post init."""
+        assert self.dp_rank < self.dp
+        assert self.dp >= 1
+        if self.dp == 1:
+            world_size = max(self.tp, self.ep)
+            attn_config = self
+        else:
+            world_size = self.dp
+            attn_config = DistConfig(dp=1, tp=1, ep=1, dp_rank=0)
+        self.world_size = world_size
+        self.attn_config = attn_config
+
+    def need_dummy_batch(self):
+        """need dummy batch."""
+        if self.dp == 1:
+            return False
+        return self.tp > 1 or self.ep > 1
+
+
 @dataclass
 class ModelConfig:
     """Config of model."""
@@ -118,7 +147,7 @@ def from_pretrained(cls,
                         pretrained_model_name_or_path: str,
                         trust_remote_code: bool = True,
                         dtype: str = 'auto',
-                        tp: int = 1):
+                        dist_config: DistConfig = None):
         """Instantiate one of the configuration classes of the library from a
         pretrained model configuration.
 
@@ -134,12 +163,22 @@ def from_pretrained(cls,
         if getattr(hf_config, 'model_type', None) in ['phi3']:
             # phi3 + trust_remote_code leads to error when tp.
             hf_config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
-        return cls.from_hf_config(hf_config, pretrained_model_name_or_path, dtype=dtype, tp=tp)
+        return cls.from_hf_config(hf_config, pretrained_model_name_or_path, dtype=dtype, dist_config=dist_config)
 
     @classmethod
-    def from_hf_config(cls, hf_config: Any, model_path: str = None, dtype: str = 'auto', tp: int = 1):
+    def from_hf_config(cls,
+                       hf_config: Any,
+                       model_path: str = None,
+                       dtype: str = 'auto',
+                       dist_config: DistConfig = None):
         """from huggingface config."""
         from lmdeploy.pytorch.configurations import AutoModelConfigBuilder
+        if dist_config is None:
+            dist_config = DistConfig()
+        if dist_config.dp == 1:
+            tp = dist_config.tp
+        else:
+            tp = 1
 
         model_config = AutoModelConfigBuilder.build(hf_config, model_path, tp=tp)