From c088a191ec6840295b3852c4ce7f05aa51a4d6b9 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh <40604544+ashutoshsingh0223@users.noreply.github.com> Date: Sun, 25 Feb 2024 23:00:24 +0100 Subject: [PATCH 1/3] Update simmim_neck.py Update SimMIMLinearDecoder with `target_channels`. The downstream loss for SimMIM i.e. the `PixelReconstructionLoss` already allows user to set the number of channels through the `channel` argument. Useful in cases when reconstructing non-rgb images. --- mmpretrain/models/necks/simmim_neck.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mmpretrain/models/necks/simmim_neck.py b/mmpretrain/models/necks/simmim_neck.py index cb1e29bcf19..4f12312e2e4 100644 --- a/mmpretrain/models/necks/simmim_neck.py +++ b/mmpretrain/models/necks/simmim_neck.py @@ -15,14 +15,15 @@ class SimMIMLinearDecoder(BaseModule): Args: in_channels (int): Channel dimension of the feature map. encoder_stride (int): The total stride of the encoder. + target_channels (int): Channel dimensions of original image. """ - def __init__(self, in_channels: int, encoder_stride: int) -> None: + def __init__(self, in_channels: int, encoder_stride: int, target_channels: int = 3) -> None: super().__init__() self.decoder = nn.Sequential( nn.Conv2d( in_channels=in_channels, - out_channels=encoder_stride**2 * 3, + out_channels=encoder_stride**2 * target_channels, kernel_size=1), nn.PixelShuffle(encoder_stride), ) From ecf83c40076246097d5aa7f33e0d6357b20165f5 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh <40604544+ashutoshsingh0223@users.noreply.github.com> Date: Mon, 26 Feb 2024 15:48:38 +0100 Subject: [PATCH 2/3] Update simmim_neck.py Lint errors --- mmpretrain/models/necks/simmim_neck.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmpretrain/models/necks/simmim_neck.py b/mmpretrain/models/necks/simmim_neck.py index 4f12312e2e4..e75a4a2ad26 100644 --- a/mmpretrain/models/necks/simmim_neck.py +++ b/mmpretrain/models/necks/simmim_neck.py @@ -18,7 +18,8 @@ class SimMIMLinearDecoder(BaseModule): target_channels (int): Channel dimensions of original image. """ - def __init__(self, in_channels: int, encoder_stride: int, target_channels: int = 3) -> None: + def __init__(self, in_channels: int, encoder_stride: int, + target_channels: int = 3) -> None: super().__init__() self.decoder = nn.Sequential( nn.Conv2d( From 8d027df229c4961b4661d803e7e39ccebb0019c1 Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Mon, 26 Feb 2024 15:56:13 +0100 Subject: [PATCH 3/3] Pre-commit fixes --- mmpretrain/models/necks/simmim_neck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mmpretrain/models/necks/simmim_neck.py b/mmpretrain/models/necks/simmim_neck.py index e75a4a2ad26..5e8ced2fd67 100644 --- a/mmpretrain/models/necks/simmim_neck.py +++ b/mmpretrain/models/necks/simmim_neck.py @@ -18,7 +18,9 @@ class SimMIMLinearDecoder(BaseModule): target_channels (int): Channel dimensions of original image. """ - def __init__(self, in_channels: int, encoder_stride: int, + def __init__(self, + in_channels: int, + encoder_stride: int, target_channels: int = 3) -> None: super().__init__() self.decoder = nn.Sequential(