Merge pull request #10 from open-mmlab/main

merge
bowenroom · Sep 21, 2023 · 4178c1e · 4178c1e
2 parents 7b6d587 + 1471d1e
commit 4178c1e
Show file tree

Hide file tree

Showing 134 changed files with 9,979 additions and 398 deletions.
diff --git a/.circleci/test.yml b/.circleci/test.yml
@@ -73,7 +73,7 @@ jobs:
       - run:
           name: Skip timm unittests and generate coverage report
           command: |
-            python -m coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
+            python -m coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py --ignore tests/test_apis/test_rs_inferencer.py
             python -m coverage xml
             python -m coverage report -m
   build_cuda:
@@ -110,6 +110,8 @@ jobs:
             docker exec mmseg mim install mmcv>=2.0.0
             docker exec mmseg pip install mmpretrain>=1.0.0rc7
             docker exec mmseg mim install mmdet>=3.0.0
+            docker exec mmseg apt-get update
+            docker exec mmseg apt-get install -y git
             docker exec mmseg pip install -r requirements/tests.txt -r requirements/optional.txt
             docker exec mmseg python -m pip install albumentations>=0.3.2 --no-binary qudida,albumentations
       - run:
@@ -119,7 +121,7 @@ jobs:
       - run:
           name: Run unittests but skip timm unittests
           command: |
-            docker exec mmseg pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
+            docker exec mmseg pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py --ignore tests/test_models/test_backbones/test_timm_backbone.py --ignore tests/test_apis/test_rs_inferencer.py
 workflows:
   pr_stage_lint:
     when: << pipeline.parameters.lint_only >>

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -42,15 +42,17 @@ repos:
     hooks:
       - id: docformatter
         args: ["--in-place", "--wrap-descriptions", "79"]
-  - repo: local
-    hooks:
-      - id: update-model-index
-        name: update-model-index
-        description: Collect model information and update model-index.yml
-        entry: .dev_scripts/update_model_index.py
-        additional_dependencies: [pyyaml]
-        language: python
-        require_serial: true
+  # temporarily remove update-model-index to avoid conflict raised
+  # by depth estimator models
+  # - repo: local
+  #   hooks:
+  #     - id: update-model-index
+  #       name: update-model-index
+  #       description: Collect model information and update model-index.yml
+  #       entry: .dev_scripts/update_model_index.py
+  #       additional_dependencies: [pyyaml]
+  #       language: python
+  #       require_serial: true
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.0.0
     hooks:

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,10 +1,14 @@
 version: 2
 
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.7"
+
 formats:
     - epub
 
 python:
-  version: 3.7
   install:
     - requirements: requirements/docs.txt
     - requirements: requirements/readthedocs.txt
diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@
 [![license](https://img.shields.io/github/license/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/blob/main/LICENSE)
 [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues)
 [![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues)
+[![Open in OpenXLab](https://cdn-static.openxlab.org.cn/app-center/openxlab_demo.svg)](https://openxlab.org.cn/apps?search=mmseg)
 
 Documentation: <https://mmsegmentation.readthedocs.io/en/latest/>
 
@@ -88,12 +89,11 @@ MMSegmentation v1.x brings remarkable improvements over the 0.x release, offerin
 
 ## What's New
 
-v1.1.1 was released on 07/24/2023.
+v1.1.2 was released on 09/20/2023.
 Please refer to [changelog.md](docs/en/notes/changelog.md) for details and release history.
 
-- Support 24 medical image datasets in [projects](./projects/medical/).
-- Add GDAL backend and support remote sensing datasets [LEVIR-CD](https://github.com/open-mmlab/mmsegmentation/pull/2903).
-- Support [DDRNet](https://github.com/open-mmlab/mmsegmentation/pull/2855).
+- Support monocular depth estimation task, please refer to [VPD](configs/vpd/README.md) for more details.
+- Add new projects: [CAT-Seg](projects/CAT-Seg/README.md), [PP-MobileSeg](projects/pp_mobileseg/README.md), [AdaBins](projects/Adabins/README.md)
 
 ## Installation
 
@@ -229,6 +229,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
 - [x] [Mask2Former (CVPR'2022)](configs/mask2former)
 - [x] [PIDNet (ArXiv'2022)](configs/pidnet)
 - [x] [DDRNet (T-ITS'2022)](configs/ddrnet)
+- [x] [VPD (ICCV'2023)](configs/vpd)
 
 </details>
 
@@ -254,6 +255,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
 - [x] [Mapillary Vistas](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md#mapillary-vistas-datasets)
 - [x] [LEVIR-CD](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md#levir-cd)
 - [x] [BDD100K](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md#bdd100K)
+- [x] [NYU](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md#nyu)
 
 </details>
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -26,6 +26,7 @@
 [![license](https://img.shields.io/github/license/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/blob/main/LICENSE)
 [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues)
 [![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues)
+[![Open in OpenXLab](https://cdn-static.openxlab.org.cn/app-center/openxlab_demo.svg)](https://openxlab.org.cn/apps?search=mmseg)
 
 文档: <https://mmsegmentation.readthedocs.io/zh_CN/latest>
 
@@ -87,7 +88,7 @@ MMSegmentation v1.x 在 0.x 版本的基础上有了显著的提升，提供了
 
 ## 更新日志
 
-最新版本 v1.1.1 在 2023.07.24 发布。
+最新版本 v1.1.2 在 2023.09.20 发布。
 如果想了解更多版本更新细节和历史信息，请阅读[更新日志](docs/en/notes/changelog.md)。
 
 ## 安装
@@ -223,6 +224,7 @@ MMSegmentation v1.x 在 0.x 版本的基础上有了显著的提升，提供了
 - [x] [Mask2Former (CVPR'2022)](configs/mask2former)
 - [x] [PIDNet (ArXiv'2022)](configs/pidnet)
 - [x] [DDRNet (T-ITS'2022)](configs/ddrnet)
+- [x] [VPD (ICCV'2023)](configs/vpd)
 
 </details>
 
@@ -248,6 +250,7 @@ MMSegmentation v1.x 在 0.x 版本的基础上有了显著的提升，提供了
 - [x] [Mapillary Vistas](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/zh_cn/user_guides/2_dataset_prepare.md#mapillary-vistas-datasets)
 - [x] [LEVIR-CD](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/zh_cn/user_guides/2_dataset_prepare.md#levir-cd)
 - [x] [BDD100K](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/zh_cn/user_guides/2_dataset_prepare.md#bdd100K)
+- [x] [NYU](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md#nyu)
 
 </details>
 
@@ -308,11 +311,11 @@ MMSegmentation 是一个由来自不同高校和企业的研发人员共同参
 
 ## 欢迎加入 OpenMMLab 社区
 
-扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab)，加入 [OpenMMLab 团队](https://jq.qq.com/?_wv=1027&k=aCvMxdr3) 以及 [MMSegmentation](https://jq.qq.com/?_wv=1027&k=ukevz6Ie) 的 QQ 群。
+扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab)，扫描下方微信二维码添加喵喵好友，进入 MMSegmentation 微信交流社群。【加好友申请格式：研究方向+地区+学校/公司+姓名】
 
 <div align="center">
- <img src="docs/zh_cn/imgs/zhihu_qrcode.jpg" height="400" />  <img src="docs/zh_cn/imgs/qq_group_qrcode.jpg" height="400" />  <img src="docs/zh_cn/imgs/seggroup_qrcode.jpg" height="400" />
- </div>
+<img src="docs/zh_cn/imgs/zhihu_qrcode.jpg" height="400" />  <img src="resources/miaomiao_qrcode.jpg" height="400" />
+</div>
 
 我们会在 OpenMMLab 社区为大家
 

diff --git a/configs/_base_/datasets/nyu.py b/configs/_base_/datasets/nyu.py
@@ -0,0 +1,67 @@
+# dataset settings
+dataset_type = 'NYUDataset'
+data_root = 'data/nyu'
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
+    dict(type='RandomDepthMix', prob=0.25),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='RandomCrop', crop_size=(480, 480)),
+    dict(
+        type='Albu',
+        transforms=[
+            dict(type='RandomBrightnessContrast'),
+            dict(type='RandomGamma'),
+            dict(type='HueSaturationValue'),
+        ]),
+    dict(
+        type='PackSegInputs',
+        meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
+                   'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+                   'category_id')),
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(2000, 480), keep_ratio=True),
+    dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
+    dict(
+        type='PackSegInputs',
+        meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
+                   'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+                   'category_id'))
+]
+
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=8,
+    persistent_workers=True,
+    sampler=dict(type='InfiniteSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img_path='images/train', depth_map_path='annotations/train'),
+        pipeline=train_pipeline))
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        test_mode=True,
+        data_prefix=dict(
+            img_path='images/test', depth_map_path='annotations/test'),
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='DepthMetric',
+    min_depth_eval=0.001,
+    max_depth_eval=10.0,
+    crop_type='nyu_crop')
+test_evaluator = val_evaluator
diff --git a/configs/_base_/datasets/nyu_512x512.py b/configs/_base_/datasets/nyu_512x512.py
@@ -0,0 +1,72 @@
+# dataset settings
+dataset_type = 'NYUDataset'
+data_root = 'data/nyu'
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
+    dict(type='RandomDepthMix', prob=0.25),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='RandomResize',
+        scale=(768, 512),
+        ratio_range=(0.8, 1.5),
+        keep_ratio=True),
+    dict(type='RandomCrop', crop_size=(512, 512)),
+    dict(
+        type='Albu',
+        transforms=[
+            dict(type='RandomBrightnessContrast'),
+            dict(type='RandomGamma'),
+            dict(type='HueSaturationValue'),
+        ]),
+    dict(
+        type='PackSegInputs',
+        meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
+                   'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+                   'category_id')),
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(2048, 512), keep_ratio=True),
+    dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
+    dict(
+        type='PackSegInputs',
+        meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
+                   'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+                   'category_id'))
+]
+
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=8,
+    persistent_workers=True,
+    sampler=dict(type='InfiniteSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img_path='images/train', depth_map_path='annotations/train'),
+        pipeline=train_pipeline))
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        test_mode=True,
+        data_prefix=dict(
+            img_path='images/test', depth_map_path='annotations/test'),
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='DepthMetric',
+    min_depth_eval=0.001,
+    max_depth_eval=10.0,
+    crop_type='nyu_crop')
+test_evaluator = val_evaluator
diff --git a/configs/_base_/models/vpd_sd.py b/configs/_base_/models/vpd_sd.py
@@ -0,0 +1,86 @@
+# model settings
+data_preprocessor = dict(
+    type='SegDataPreProcessor',
+    mean=[127.5, 127.5, 127.5],
+    std=[127.5, 127.5, 127.5],
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=0)
+
+# adapted from stable-diffusion/configs/stable-diffusion/v1-inference.yaml
+stable_diffusion_cfg = dict(
+    base_learning_rate=0.0001,
+    target='ldm.models.diffusion.ddpm.LatentDiffusion',
+    checkpoint='https://download.openmmlab.com/mmsegmentation/v0.5/'
+    'vpd/stable_diffusion_v1-5_pretrain_third_party.pth',
+    params=dict(
+        linear_start=0.00085,
+        linear_end=0.012,
+        num_timesteps_cond=1,
+        log_every_t=200,
+        timesteps=1000,
+        first_stage_key='jpg',
+        cond_stage_key='txt',
+        image_size=64,
+        channels=4,
+        cond_stage_trainable=False,
+        conditioning_key='crossattn',
+        monitor='val/loss_simple_ema',
+        scale_factor=0.18215,
+        use_ema=False,
+        scheduler_config=dict(
+            target='ldm.lr_scheduler.LambdaLinearScheduler',
+            params=dict(
+                warm_up_steps=[10000],
+                cycle_lengths=[10000000000000],
+                f_start=[1e-06],
+                f_max=[1.0],
+                f_min=[1.0])),
+        unet_config=dict(
+            target='ldm.modules.diffusionmodules.openaimodel.UNetModel',
+            params=dict(
+                image_size=32,
+                in_channels=4,
+                out_channels=4,
+                model_channels=320,
+                attention_resolutions=[4, 2, 1],
+                num_res_blocks=2,
+                channel_mult=[1, 2, 4, 4],
+                num_heads=8,
+                use_spatial_transformer=True,
+                transformer_depth=1,
+                context_dim=768,
+                use_checkpoint=True,
+                legacy=False)),
+        first_stage_config=dict(
+            target='ldm.models.autoencoder.AutoencoderKL',
+            params=dict(
+                embed_dim=4,
+                monitor='val/rec_loss',
+                ddconfig=dict(
+                    double_z=True,
+                    z_channels=4,
+                    resolution=256,
+                    in_channels=3,
+                    out_ch=3,
+                    ch=128,
+                    ch_mult=[1, 2, 4, 4],
+                    num_res_blocks=2,
+                    attn_resolutions=[],
+                    dropout=0.0),
+                lossconfig=dict(target='torch.nn.Identity'))),
+        cond_stage_config=dict(
+            target='ldm.modules.encoders.modules.AbstractEncoder')))
+
+model = dict(
+    type='DepthEstimator',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='VPD',
+        diffusion_cfg=stable_diffusion_cfg,
+    ),
+)
+
+# some of the parameters in stable-diffusion model will not be updated
+# during training
+find_unused_parameters = True