From da9c3ccb0ff7dd9f301873803ef9335f1363a50c Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 13 Nov 2024 04:29:21 +0900 Subject: [PATCH 1/2] fix linter --- pyproject.toml | 2 +- .../instance_segmentation/heads/__init__.py | 2 +- .../heads/maskdino_encoder.py | 123 +++++++++--------- 3 files changed, 63 insertions(+), 64 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 00d2e3c2ba..778a266ee2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ docs = [ xpu = [ "torch==2.6.0.dev20241104+xpu", "torchvision==0.20.0.dev20241104+xpu", - "pytorch-triton-xpu==3.1.0+91b14bf559" + "pytorch-triton-xpu==3.1.0+91b14bf559", "oneccl_bind_pt==2.1.300+xpu", "lightning==2.2", "pytorchcv==0.0.67", diff --git a/src/otx/algo/instance_segmentation/heads/__init__.py b/src/otx/algo/instance_segmentation/heads/__init__.py index 42bf288ed9..c121071928 100644 --- a/src/otx/algo/instance_segmentation/heads/__init__.py +++ b/src/otx/algo/instance_segmentation/heads/__init__.py @@ -24,4 +24,4 @@ "MaskDINODecoderHeadModule", "MaskDINOEncoderHeadModule", "MaskDINOHead", -] \ No newline at end of file +] diff --git a/src/otx/algo/instance_segmentation/heads/maskdino_encoder.py b/src/otx/algo/instance_segmentation/heads/maskdino_encoder.py index 52767e5ec1..f3dcb6b7ef 100644 --- a/src/otx/algo/instance_segmentation/heads/maskdino_encoder.py +++ b/src/otx/algo/instance_segmentation/heads/maskdino_encoder.py @@ -6,12 +6,11 @@ from __future__ import annotations -from typing import Callable +from typing import Any, Callable, ClassVar import numpy as np import torch from torch import Tensor, nn -from torch.amp import autocast from torch.nn import functional as f from torch.nn.init import normal_ @@ -310,66 +309,66 @@ def __init__( def forward_features(self, features: dict[str, Tensor]) -> tuple[Tensor, Tensor, list[Tensor]]: """Forward pass of the encoder.""" - with autocast(device_type=features[self.transformer_in_features[0]].device.type, enabled=False): - # backbone features - srcs = [] - pos = [] - # additional downsampled features - srcsl: list[Tensor] = [] - posl = [] - if self.total_num_feature_levels > self.transformer_num_feature_levels: - smallest_feat = features[self.transformer_in_features[self.low_resolution_index]].float() - _len_srcs = self.transformer_num_feature_levels - for lvl in range(_len_srcs, self.total_num_feature_levels): - src = self.input_proj[lvl](smallest_feat) if lvl == _len_srcs else self.input_proj[lvl](srcsl[-1]) - srcsl.append(src) - posl.append(self.pe_layer(src)) - srcsl = srcsl[::-1] - # Reverse feature maps - for idx, feat in enumerate(self.transformer_in_features[::-1]): - x = features[feat].float() # deformable detr does not support half precision - srcs.append(self.input_proj[idx](x)) - pos.append(self.pe_layer(x)) - srcs.extend(srcsl) - pos.extend(posl) - y, spatial_shapes, level_start_index = self.transformer(srcs, pos) - bs = y.shape[0] - - split_size_or_sections = [None] * self.total_num_feature_levels - for i in range(self.total_num_feature_levels): - if i < self.total_num_feature_levels - 1: - split_size_or_sections[i] = level_start_index[i + 1] - level_start_index[i] - else: - split_size_or_sections[i] = y.shape[1] - level_start_index[i] - y = torch.split(y, split_size_or_sections, dim=1) - - out = [] - multi_scale_features = [] - num_cur_levels = 0 - for i, z in enumerate(y): - out.append(z.transpose(1, 2).view(bs, -1, spatial_shapes[i][0], spatial_shapes[i][1])) - - # append `out` with extra FPN levels - # Reverse feature maps into top-down order (from low to high resolution) - for idx, feat in enumerate(self.in_features[: self.num_fpn_levels][::-1]): - x = features[feat].float() - lateral_conv = self.lateral_convs[idx] - output_conv = self.output_convs[idx] - cur_fpn = lateral_conv(x) - # Following FPN implementation, we use nearest upsampling here - y = cur_fpn + f.interpolate( - out[self.high_resolution_index], - size=cur_fpn.shape[-2:], - mode="bilinear", - align_corners=False, - ) - y = output_conv(y) - out.append(y) - for o in out: - if num_cur_levels < self.total_num_feature_levels: - multi_scale_features.append(o) - num_cur_levels += 1 - return self.mask_features(out[-1]), out[0], multi_scale_features + # backbone features + srcs = [] + pos = [] + # additional downsampled features + srcsl: list[Tensor] = [] + posl = [] + if self.total_num_feature_levels > self.transformer_num_feature_levels: + smallest_feat = features[self.transformer_in_features[self.low_resolution_index]].float() + _len_srcs = self.transformer_num_feature_levels + for lvl in range(_len_srcs, self.total_num_feature_levels): + src = self.input_proj[lvl](smallest_feat) if lvl == _len_srcs else self.input_proj[lvl](srcsl[-1]) + srcsl.append(src) + posl.append(self.pe_layer(src)) + srcsl = srcsl[::-1] + # Reverse feature maps + for idx, feat in enumerate(self.transformer_in_features[::-1]): + x = features[feat].float() # deformable detr does not support half precision + srcs.append(self.input_proj[idx](x)) + pos.append(self.pe_layer(x)) + srcs.extend(srcsl) + pos.extend(posl) + y, spatial_shapes, level_start_index = self.transformer(srcs, pos) + bs = y.shape[0] + + split_size_or_sections = [None] * self.total_num_feature_levels + for i in range(self.total_num_feature_levels): + if i < self.total_num_feature_levels - 1: + split_size_or_sections[i] = level_start_index[i + 1] - level_start_index[i] + else: + split_size_or_sections[i] = y.shape[1] - level_start_index[i] + y = torch.split(y, split_size_or_sections, dim=1) + + out = [] + multi_scale_features = [] + num_cur_levels = 0 + for i, z in enumerate(y): + out.append(z.transpose(1, 2).view(bs, -1, spatial_shapes[i][0], spatial_shapes[i][1])) + + # append `out` with extra FPN levels + # Reverse feature maps into top-down order (from low to high resolution) + for idx, feat in enumerate(self.in_features[: self.num_fpn_levels][::-1]): + x = features[feat].float() + lateral_conv = self.lateral_convs[idx] + output_conv = self.output_convs[idx] + cur_fpn = lateral_conv(x) + # Following FPN implementation, we use nearest upsampling here + y = cur_fpn + f.interpolate( + out[self.high_resolution_index], + size=cur_fpn.shape[-2:], + mode="bilinear", + align_corners=False, + ) + y = output_conv(y) + out.append(y) + for o in out: + if num_cur_levels < self.total_num_feature_levels: + multi_scale_features.append(o) + num_cur_levels += 1 + return self.mask_features(out[-1]), out[0], multi_scale_features + class MaskDINOEncoderHead: """MaskDINO Encoder Head Factory Selector.""" From 7be2712e17a6bbd3f5ffa597ff5e8e96f1c50937 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 13 Nov 2024 04:31:16 +0900 Subject: [PATCH 2/2] remove warning --- src/otx/engine/engine.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py index ff22512a06..9e6664cbcb 100644 --- a/src/otx/engine/engine.py +++ b/src/otx/engine/engine.py @@ -1147,8 +1147,6 @@ def _build_trainer(self, **kwargs) -> None: self._cache.update(strategy="xpu_single") # add plugin for Automatic Mixed Precision on XPU if self._cache.args.get("precision", 32) == 16: - msg = "XPU doesn't support fp16 now, so bfp16 will be used instead." - warn(msg, stacklevel=1) self._cache.update( plugins=[ MixedPrecision(