Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
xrsrke committed Sep 21, 2023
1 parent 43334d3 commit 1ffb1b1
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 10 deletions.
4 changes: 2 additions & 2 deletions pipegoose/nn/pipeline_parallel2/partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def split(self) -> List[nn.Module]:
raise NotImplementedError


class _UniformPartitioner(BasePartitioner):
class UniformPartitioner(BasePartitioner):
def __init__(self, module: nn.Module, parallel_context: ParallelContext):
self.module = module
self.parallel_context = parallel_context
Expand Down Expand Up @@ -61,7 +61,7 @@ def split(self) -> List[nn.Module]:
def _get_partitioner(policy: PartitionPolicy) -> BasePartitioner:
"""Return the corresponding partitioner based on the policy."""
policy_to_partitioner = {
PartitionPolicy.UNIFORM: _UniformPartitioner,
PartitionPolicy.UNIFORM: UniformPartitioner,
}

return policy_to_partitioner[policy]
Expand Down
28 changes: 28 additions & 0 deletions tests/nn/expert_parallel/test_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import torch
from torch import nn

from pipegoose.nn.expert_parallel.layers import MoELayer
from pipegoose.nn.expert_parallel.routers import Top1Router


def test_moe_layer():
BATCH_SIZE = 10
SEQ_LEN = 5
HIDDEN_DIM = 64
N_EXPERTS = 10

inputs = torch.randn(BATCH_SIZE, SEQ_LEN, HIDDEN_DIM)
expert = nn.Linear(10, 10)

router = Top1Router(n_experts=N_EXPERTS)

layer = MoELayer(
expert=expert,
n_experts=10,
router=router,
)

outputs = layer(inputs)

assert isinstance(outputs, torch.Tensor)
assert outputs.shape == (BATCH_SIZE, SEQ_LEN, HIDDEN_DIM)
29 changes: 21 additions & 8 deletions tests/nn/pipeline_parallel_2/test_partitioner.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,40 @@
import pytest
from torch import nn
from transformers import AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer

from pipegoose.nn.pipeline_parallel2.partitioner import (
PartitionPolicy,
get_model_partition,
from pipegoose.nn.pipeline_parallel2.partitioner import ( # PartitionPolicy,; get_model_partition,
UniformPartitioner,
)
from pipegoose.testing.utils import init_parallel_context, spawn

MODEL_NAME = "sshleifer/tiny-gpt2"


@pytest.mark.skip("implement this")
def run_model_partitioner(rank, world_size, port, tensor_parallel_size, pipeline_parallel_size, data_parallel_size):
parallel_context = init_parallel_context(
rank, world_size, port, tensor_parallel_size, pipeline_parallel_size, data_parallel_size
)
module = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

policy = PartitionPolicy.UNIFORM
partition = get_model_partition(module, policy, parallel_context)
text = ["Hello world", "How are you?"]
inputs = tokenizer(text, return_tensors="pt", padding=True)

assert isinstance(partition, nn.Module)
assert partition != module
# policy = PartitionPolicy.UNIFORM
partitions = UniformPartitioner(module, parallel_context).split()
# partition = get_model_partition(module, policy, parallel_context)

assert isinstance(partitions, list)
assert len(partitions) == pipeline_parallel_size

for partition in partitions:
assert isinstance(partition, nn.Module)
assert partition != module

outputs = inputs
for partition in partitions:
outputs = partition(outputs)


@pytest.mark.parametrize("pipeline_parallel_size", [1, 2])
Expand Down

0 comments on commit 1ffb1b1

Please sign in to comment.