Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4f62c98
Fix torch FutureWarning
MGAMZ Jul 21, 2024
b6b4224
Fix torch FutureWarning
MGAMZ Jul 21, 2024
4c7a5d4
Optimize the prompt for compile
MGAMZ Jul 26, 2024
28d47f8
Fix Incorrect Optim Param Resume Method
MGAMZ Aug 21, 2024
91d945f
Update runner.py to support pure-python style model wrapper configura…
MGAMZ Aug 28, 2024
0934d75
Merge branch 'open-mmlab:main' into main
MGAMZ Sep 22, 2024
7103c3e
reconstruct
MGAMZ Sep 23, 2024
eecaa92
PyTorch Profiler within IterBasedTrainLoop
MGAMZ Nov 3, 2024
698ad5e
enable hook error exception traceback
MGAMZ Nov 3, 2024
8c80332
Merge branch 'main' of github.com:MGAMZ/mmengine
MGAMZ Nov 3, 2024
3cf1003
Merge branch 'open-mmlab:main' into main
MGAMZ Nov 4, 2024
1e4c2ed
improve codes
MGAMZ Nov 15, 2024
2a5a1fe
Merge branch 'open-mmlab:main' into main
MGAMZ Nov 23, 2024
29e3a08
KeyError: 'Adafactor is already registered in optimizer at torch.optim'.
MGAMZ Jan 3, 2025
896576b
Merge branch 'main' of https://github.com/MGAMZ/mmengine
MGAMZ Jan 3, 2025
be86710
Update support for deep speed and multiple improvements.
MGAMZ Jan 11, 2025
dadedbb
Merge branch 'main' of gitee.com:MGAM/mmengine
MGAMZ Jan 11, 2025
861fc1b
improve multiple mmengine undeveloped issues.
MGAMZ Jan 12, 2025
8f37dd2
Multiple improvements
MGAMZ Jan 17, 2025
bed2660
Merge branch 'open-mmlab:main' into main
MGAMZ Jan 17, 2025
d45205c
update dependency and bump versions
MGAMZ Feb 22, 2025
c472f2b
fix wrong pyproject config.
MGAMZ Mar 3, 2025
2cacfc0
Merge branch 'open-mmlab:main' into main
MGAMZ Mar 5, 2025
4b3627a
sync version
MGAMZ Mar 16, 2025
c5f5ca7
disable HistoryBuffer's torch compile
MGAMZ Mar 16, 2025
438eb64
Fix histort buffer bug when using torch.compile
MGAMZ Mar 17, 2025
de1eaf9
Merge branch 'main' of https://gitee.com/MGAM/mmengine
MGAMZ Mar 17, 2025
6d618bc
1. Undo changes made to history buffer.
MGAMZ Mar 30, 2025
eaf6d3c
Merge branch 'main' of gitee.com:MGAM/mmengine
MGAMZ Mar 30, 2025
6149316
1. remove unnecessary distributed warp.
MGAMZ Apr 25, 2025
6939758
Merge branch 'main' of https://gitee.com/MGAM/mmengine
MGAMZ Apr 25, 2025
385d029
Remove setup.py to fix installation bug using python>=3.13
MGAMZ Jul 21, 2025
07518d9
AllinOne Commit
MGAMZ Oct 17, 2025
c62a408
Change the pip install method
MGAMZ Oct 17, 2025
ebcee9c
Fix code style and type annotations in builder.py
MGAMZ Oct 17, 2025
428bd84
Packing according to PyPA.
MGAMZ Oct 17, 2025
c19a00c
Make Lint Happy.
MGAMZ Oct 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ repos:
rev: 5.11.5
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.32.0
- repo: local
hooks:
- id: yapf
name: yapf
entry: yapf
language: system
types: [python]
args: ["-i"]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
Expand Down
27 changes: 12 additions & 15 deletions examples/distributed_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ def compute_metrics(self, results):

def parse_args():
parser = argparse.ArgumentParser(description='Distributed Training')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)

args = parser.parse_args()
Expand All @@ -73,16 +72,14 @@ def main():
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize(**norm_cfg)]))
train_dataloader = dict(
batch_size=32,
dataset=train_set,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'))
val_dataloader = dict(
batch_size=32,
dataset=valid_set,
sampler=dict(type='DefaultSampler', shuffle=False),
collate_fn=dict(type='default_collate'))
train_dataloader = dict(batch_size=32,
dataset=train_set,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'))
val_dataloader = dict(batch_size=32,
dataset=valid_set,
sampler=dict(type='DefaultSampler', shuffle=False),
collate_fn=dict(type='default_collate'))
runner = Runner(
model=MMResNet50(),
work_dir='./work_dirs',
Expand Down
77 changes: 37 additions & 40 deletions examples/distributed_training_with_flexible_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,14 @@ def main():
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize(**norm_cfg)]))
train_dataloader = dict(
batch_size=128,
dataset=train_set,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'))
val_dataloader = dict(
batch_size=128,
dataset=valid_set,
sampler=dict(type='DefaultSampler', shuffle=False),
collate_fn=dict(type='default_collate'))
train_dataloader = dict(batch_size=128,
dataset=train_set,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'))
val_dataloader = dict(batch_size=128,
dataset=valid_set,
sampler=dict(type='DefaultSampler', shuffle=False),
collate_fn=dict(type='default_collate'))

if args.use_deepspeed:
strategy = dict(
Expand All @@ -97,30 +95,28 @@ def main():
# bf16=dict(
# enabled=True,
# ),
zero_optimization=dict(
stage=3,
allgather_partitions=True,
reduce_scatter=True,
allgather_bucket_size=50000000,
reduce_bucket_size=50000000,
overlap_comm=True,
contiguous_gradients=True,
cpu_offload=False),
zero_optimization=dict(stage=3,
allgather_partitions=True,
reduce_scatter=True,
allgather_bucket_size=50000000,
reduce_bucket_size=50000000,
overlap_comm=True,
contiguous_gradients=True,
cpu_offload=False),
)
optim_wrapper = dict(
type='DeepSpeedOptimWrapper',
optimizer=dict(type='AdamW', lr=1e-3))
optim_wrapper = dict(type='DeepSpeedOptimWrapper',
optimizer=dict(type='AdamW', lr=1e-3))
elif args.use_fsdp:
from functools import partial

from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy
size_based_auto_wrap_policy = partial(
size_based_auto_wrap_policy, min_num_params=1e7)
size_based_auto_wrap_policy = partial(size_based_auto_wrap_policy,
min_num_params=1e7)
strategy = dict(
type='FSDPStrategy',
model_wrapper=dict(auto_wrap_policy=size_based_auto_wrap_policy))
optim_wrapper = dict(
type='AmpOptimWrapper', optimizer=dict(type='AdamW', lr=1e-3))
optim_wrapper = dict(type='AmpOptimWrapper',
optimizer=dict(type='AdamW', lr=1e-3))
elif args.use_colossalai:
from colossalai.tensor.op_wrapper import colo_op_impl

Expand All @@ -142,20 +138,21 @@ def main():
optim_wrapper = dict(optimizer=dict(type='HybridAdam', lr=1e-3))
else:
strategy = None
optim_wrapper = dict(
type='AmpOptimWrapper', optimizer=dict(type='AdamW', lr=1e-3))

runner = FlexibleRunner(
model=MMResNet50(),
work_dir='./work_dirs',
strategy=strategy,
train_dataloader=train_dataloader,
optim_wrapper=optim_wrapper,
param_scheduler=dict(type='LinearLR'),
train_cfg=dict(by_epoch=True, max_epochs=10, val_interval=1),
val_dataloader=val_dataloader,
val_cfg=dict(),
val_evaluator=dict(type=Accuracy))
optim_wrapper = dict(type='AmpOptimWrapper',
optimizer=dict(type='AdamW', lr=1e-3))

runner = FlexibleRunner(model=MMResNet50(),
work_dir='./work_dirs',
strategy=strategy,
train_dataloader=train_dataloader,
optim_wrapper=optim_wrapper,
param_scheduler=dict(type='LinearLR'),
train_cfg=dict(by_epoch=True,
max_epochs=10,
val_interval=1),
val_dataloader=val_dataloader,
val_cfg=dict(),
val_evaluator=dict(type=Accuracy))
runner.train()


Expand Down
40 changes: 18 additions & 22 deletions examples/llama2/fsdp_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,14 @@ def parse_args():
def train():
args = parse_args()
# Setup distributed related component in Strategy.
strategy = FSDPStrategy(
model_wrapper=dict(
auto_wrap_policy=partial(
transformer_auto_wrap_policy,
transformer_layer_cls={LlamaDecoderLayer})),
state_dict_cfg='full',
env_kwargs=dict(randomness=dict(seed=42)))
visualizer = Visualizer(
name='mmengine',
save_dir=args.output_dir,
vis_backends=[dict(type=WandbVisBackend)])
strategy = FSDPStrategy(model_wrapper=dict(
auto_wrap_policy=partial(transformer_auto_wrap_policy,
transformer_layer_cls={LlamaDecoderLayer})),
state_dict_cfg='full',
env_kwargs=dict(randomness=dict(seed=42)))
visualizer = Visualizer(name='mmengine',
save_dir=args.output_dir,
vis_backends=[dict(type=WandbVisBackend)])

# Prepare model
tokenizer = LlamaTokenizer.from_pretrained(args.checkpoint)
Expand All @@ -112,21 +109,20 @@ def train():
model.train()

# Prepare dataset
train_dataset = AlpacaDataset(
tokenizer=tokenizer, data_path=args.data_root)
train_dataloader = DataLoader(
train_dataset,
batch_size=args.batch_size,
sampler=DefaultSampler(train_dataset, seed=0),
collate_fn=default_data_collator,
drop_last=True)
train_dataset = AlpacaDataset(tokenizer=tokenizer,
data_path=args.data_root)
train_dataloader = DataLoader(train_dataset,
batch_size=args.batch_size,
sampler=DefaultSampler(train_dataset,
seed=0),
collate_fn=default_data_collator,
drop_last=True)

# Get the prepared model, scheduler and optimizer from strategy
epoch_length = len(train_dataloader)
max_iters = epoch_length * args.max_epoch
optim_cfg = dict(
optimizer=dict(type=AdamW, lr=1e-4, weight_decay=0.0),
accumulative_counts=ORI_BATCH_SIZE / args.batch_size)
optim_cfg = dict(optimizer=dict(type=AdamW, lr=1e-4, weight_decay=0.0),
accumulative_counts=ORI_BATCH_SIZE / args.batch_size)
scheduler_cfgs = [dict(type=StepLR, step_size=1, gamma=0.85)]
model, optimizer, schedulers = strategy.prepare(
model,
Expand Down
7 changes: 3 additions & 4 deletions examples/llama2/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def parse_args():
with torch.no_grad():
generate_ids = model.generate(inputs.input_ids.cuda(), max_length=300)
print(
tokenizer.batch_decode(
generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)[0])
tokenizer.batch_decode(generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)[0])
76 changes: 36 additions & 40 deletions examples/segmentation/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def __init__(self,
mask_folder,
transform=None,
target_transform=None):
super().__init__(
root, transform=transform, target_transform=target_transform)
super().__init__(root,
transform=transform,
target_transform=target_transform)
self.img_folder = img_folder
self.mask_folder = mask_folder
self.images = list(
Expand Down Expand Up @@ -72,8 +73,9 @@ def __getitem__(self, index):

if self.target_transform is not None:
labels = self.target_transform(labels)
data_samples = dict(
labels=labels, img_path=img_path, mask_path=mask_path)
data_samples = dict(labels=labels,
img_path=img_path,
mask_path=mask_path)
return img, data_samples

def __len__(self):
Expand Down Expand Up @@ -102,8 +104,8 @@ def process(self, data_batch, data_samples):
intersect = (labels == preds).sum()
union = (torch.logical_or(preds, labels)).sum()
iou = (intersect / union).cpu()
self.results.append(
dict(batch_size=len(labels), iou=iou * len(labels)))
self.results.append(dict(batch_size=len(labels),
iou=iou * len(labels)))

def compute_metrics(self, results):
total_iou = sum(result['iou'] for result in self.results)
Expand Down Expand Up @@ -151,18 +153,16 @@ def after_val_iter(self,
osp.join(saved_dir, osp.basename(img_path)))
shutil.copyfile(mask_path,
osp.join(saved_dir, osp.basename(mask_path)))
cv2.imwrite(
osp.join(saved_dir, f'pred_{osp.basename(img_path)}'),
pred_mask)
cv2.imwrite(osp.join(saved_dir, f'pred_{osp.basename(img_path)}'),
pred_mask)


def parse_args():
parser = argparse.ArgumentParser(description='Distributed Training')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)

args = parser.parse_args()
Expand All @@ -181,37 +181,33 @@ def main():
target_transform = transforms.Lambda(
lambda x: torch.tensor(np.array(x), dtype=torch.long))

train_set = CamVid(
'data/CamVid',
img_folder='train',
mask_folder='train_labels',
transform=transform,
target_transform=target_transform)

valid_set = CamVid(
'data/CamVid',
img_folder='val',
mask_folder='val_labels',
transform=transform,
target_transform=target_transform)

train_dataloader = dict(
batch_size=3,
dataset=train_set,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'))
val_dataloader = dict(
batch_size=3,
dataset=valid_set,
sampler=dict(type='DefaultSampler', shuffle=False),
collate_fn=dict(type='default_collate'))
train_set = CamVid('data/CamVid',
img_folder='train',
mask_folder='train_labels',
transform=transform,
target_transform=target_transform)

valid_set = CamVid('data/CamVid',
img_folder='val',
mask_folder='val_labels',
transform=transform,
target_transform=target_transform)

train_dataloader = dict(batch_size=3,
dataset=train_set,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'))
val_dataloader = dict(batch_size=3,
dataset=valid_set,
sampler=dict(type='DefaultSampler', shuffle=False),
collate_fn=dict(type='default_collate'))

runner = Runner(
model=MMDeeplabV3(num_classes),
work_dir='./work_dir',
train_dataloader=train_dataloader,
optim_wrapper=dict(
type=AmpOptimWrapper, optimizer=dict(type=AdamW, lr=2e-4)),
optim_wrapper=dict(type=AmpOptimWrapper,
optimizer=dict(type=AdamW, lr=2e-4)),
train_cfg=dict(by_epoch=True, max_epochs=10, val_interval=10),
val_dataloader=val_dataloader,
val_cfg=dict(),
Expand Down
17 changes: 8 additions & 9 deletions examples/test_time_augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,14 @@ def _merge_single_sample(self, data_samples):
cfg.work_dir = 'work_dirs/resnet50_8xb16_cifar10'
cfg.model = dict(type='ClsTTAModel', module=cfg.model)
test_pipeline = deepcopy(cfg.test_dataloader.dataset.pipeline)
flip_tta = dict(
type='TestTimeAug',
transforms=[
[
dict(type='RandomFlip', prob=1.),
dict(type='RandomFlip', prob=0.)
],
[test_pipeline[-1]],
])
flip_tta = dict(type='TestTimeAug',
transforms=[
[
dict(type='RandomFlip', prob=1.),
dict(type='RandomFlip', prob=0.)
],
[test_pipeline[-1]],
])
# Replace the last transform with `TestTimeAug`
cfg.test_dataloader.dataset.pipeline[-1] = flip_tta
cfg.load_from = 'https://download.openmmlab.com/mmclassification/v0' \
Expand Down
Loading
Loading