Skip to content

Commit

Permalink
fix unite tests 2
Browse files Browse the repository at this point in the history
  • Loading branch information
kprokofi committed Dec 18, 2024
1 parent 045e3ed commit c92bda3
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 90 deletions.
9 changes: 6 additions & 3 deletions src/otx/algo/classification/efficientnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from otx.algo.classification.backbones.efficientnet import EFFICIENTNET_VERSION, EfficientNetBackbone
from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier
from otx.algo.classification.heads import (
HierarchicalLinearClsHead,
HierarchicalCBAMClsHead,
LinearClsHead,
MultiLabelLinearClsHead,
SemiSLLinearClsHead,
Expand Down Expand Up @@ -272,8 +272,11 @@ def _build_model(self, head_config: dict) -> nn.Module:

return HLabelClassifier(
backbone=backbone,
neck=GlobalAveragePooling(dim=2),
head=HierarchicalLinearClsHead(**copied_head_config, in_channels=backbone.num_features),
neck=nn.Identity(),
head=HierarchicalCBAMClsHead(
in_channels=backbone.num_features,
**copied_head_config,
),
multiclass_loss=nn.CrossEntropyLoss(),
multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"),
)
Expand Down
9 changes: 6 additions & 3 deletions src/otx/algo/classification/torchvision_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
from otx.algo.classification.backbones.torchvision import TorchvisionBackbone, TVModelType
from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier
from otx.algo.classification.heads import (
HierarchicalCBAMClsHead,
LinearClsHead,
MultiLabelLinearClsHead,
SemiSLLinearClsHead,
)
from otx.algo.classification.losses import AsymmetricAngularLossWithIgnore
from otx.algo.classification.mobilenet_v3 import HierarchicalCBAMClsHead
from otx.algo.classification.necks.gap import GlobalAveragePooling
from otx.algo.classification.utils import get_classification_layers
from otx.core.data.entity.classification import (
Expand Down Expand Up @@ -315,8 +315,11 @@ def _build_model(self, head_config: dict) -> nn.Module:
backbone = TorchvisionBackbone(backbone=self.backbone, pretrained=self.pretrained)
return HLabelClassifier(
backbone=backbone,
neck=GlobalAveragePooling(dim=2),
head=HierarchicalCBAMClsHead(**head_config, in_channels=backbone.in_features),
neck=nn.Identity(),
head=HierarchicalCBAMClsHead(
in_channels=backbone.in_features,
**head_config,
),
multiclass_loss=nn.CrossEntropyLoss(),
multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"),
)
Expand Down
8 changes: 6 additions & 2 deletions src/otx/algo/classification/vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
from otx.algo.classification.backbones.vision_transformer import VIT_ARCH_TYPE, VisionTransformer
from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier
from otx.algo.classification.heads import (
HierarchicalCBAMClsHead,
MultiLabelLinearClsHead,
SemiSLVisionTransformerClsHead,
VisionTransformerClsHead,
)
from otx.algo.classification.losses import AsymmetricAngularLossWithIgnore
from otx.algo.classification.mobilenet_v3 import HierarchicalCBAMClsHead
from otx.algo.classification.utils import get_classification_layers
from otx.algo.explain.explain_algo import ViTReciproCAM, feature_vector_fn
from otx.algo.utils.support_otx_v1 import OTXv1Helper
Expand Down Expand Up @@ -466,7 +466,11 @@ def _build_model(self, head_config: dict) -> nn.Module:
return HLabelClassifier(
backbone=vit_backbone,
neck=None,
head=HierarchicalCBAMClsHead(**head_config, in_channels=vit_backbone.embed_dim),
head=HierarchicalCBAMClsHead(
in_channels=vit_backbone.embed_dim,
step_size=1,
**head_config,
),
multiclass_loss=nn.CrossEntropyLoss(),
multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"),
init_cfg=init_cfg,
Expand Down
2 changes: 2 additions & 0 deletions src/otx/core/data/dataset/keypoint_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
stack_images: bool = True,
to_tv_image: bool = True,
data_format: str = "",
) -> None:
super().__init__(
dm_subset,
Expand All @@ -49,6 +50,7 @@ def __init__(
image_color_channel,
stack_images,
to_tv_image,
data_format
)

self.dm_subset = self._get_single_bbox_dataset(dm_subset)
Expand Down
2 changes: 2 additions & 0 deletions src/otx/core/data/dataset/object_detection_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def __init__(
image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
stack_images: bool = True,
to_tv_image: bool = False,
data_format: str = "",
max_objects: int = 50,
) -> None:
super().__init__(
Expand All @@ -51,6 +52,7 @@ def __init__(
image_color_channel,
stack_images,
to_tv_image,
data_format
)
self.max_objects = max_objects
self.subset_type = list(self.dm_subset.get_subset_info())[-1].split(":")[0]
Expand Down
116 changes: 34 additions & 82 deletions src/otx/core/data/dataset/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,16 @@

import logging as log
import operator
import warnings
from collections import defaultdict
from copy import deepcopy
from itertools import product
from typing import TYPE_CHECKING, Callable

import numpy as np
import shapely.geometry as sg
import torch
from datumaro import Bbox, DatasetItem, Ellipse, Image, Polygon
from datumaro import Dataset as DmDataset
from datumaro.components.annotation import AnnotationType
from datumaro import DatasetItem, Image
from datumaro.components.annotation import AnnotationType, Bbox, ExtractedMask, Polygon
from datumaro.plugins.tiling import Tile
from datumaro.plugins.tiling.tile import _apply_offset
from datumaro.plugins.tiling.util import (
Expand Down Expand Up @@ -98,7 +96,7 @@ def __init__(
)
self._tile_size = tile_size
self._tile_ann_func_map[AnnotationType.polygon] = OTXTileTransform._tile_polygon
self._tile_ann_func_map[AnnotationType.ellipse] = OTXTileTransform._tile_ellipse
self._tile_ann_func_map[AnnotationType.mask] = OTXTileTransform._tile_masks
self.with_full_img = with_full_img

@staticmethod
Expand Down Expand Up @@ -140,42 +138,27 @@ def _tile_polygon(
)

@staticmethod
def _tile_ellipse(
ann: Ellipse,
roi_box: sg.Polygon,
threshold_drop_ann: float = 0.8,
def _tile_masks(
ann: ExtractedMask,
roi_int: BboxIntCoords,
*args, # noqa: ARG004
**kwargs, # noqa: ARG004
) -> Polygon | None:
polygon = sg.Polygon(ann.get_points(num_points=10))

# NOTE: polygon may be invalid, e.g. self-intersecting
if not roi_box.intersects(polygon) or not polygon.is_valid:
return None

# NOTE: intersection may return a GeometryCollection or MultiPolygon
inter = polygon.intersection(roi_box)
if isinstance(inter, (sg.GeometryCollection, sg.MultiPolygon)):
shapes = [(geom, geom.area) for geom in list(inter.geoms) if geom.is_valid]
if not shapes:
return None

inter, _ = max(shapes, key=operator.itemgetter(1))

if not isinstance(inter, sg.Polygon) and not inter.is_valid:
return None
) -> ExtractedMask:
"""Extracts a tile mask from the given annotation.
prop_area = inter.area / polygon.area
Note: Original Datumaro _tile_masks does not work with ExtractedMask.
if prop_area < threshold_drop_ann:
return None

inter = _apply_offset(inter, roi_box)
Args:
ann (ExtractedMask): datumaro ExtractedMask annotation.
roi_int (BboxIntCoords): ROI coordinates.
return Polygon(
points=[p for xy in inter.exterior.coords for p in xy],
Returns:
ExtractedMask: ExtractedMask annotation.
"""
x, y, w, h = roi_int
return ann.wrap(
index_mask=ann.index_mask()[y : y + h, x : x + w],
attributes=deepcopy(ann.attributes),
label=ann.label,
)

def _extract_rois(self, image: Image) -> list[BboxIntCoords]:
Expand Down Expand Up @@ -271,7 +254,6 @@ def __init__(self, dataset: OTXDataset, tile_config: TileConfig) -> None:
dataset.image_color_channel,
dataset.stack_images,
dataset.to_tv_image,
data_format=dataset.data_format,
)
self.tile_config = tile_config
self._dataset = dataset
Expand Down Expand Up @@ -434,17 +416,14 @@ def _get_item_impl(self, index: int) -> TileDetDataEntity: # type: ignore[overr
img = item.media_as(Image)
img_data, img_shape, _ = self._get_img_data_and_shape(img)

gt_bboxes = [ann for ann in item.annotations if isinstance(ann, Bbox)]

if empty_anno := len(gt_bboxes) == 0:
warnings.warn(f"Empty annotation for image {item.id}!", stacklevel=2)
bbox_anns = [ann for ann in item.annotations if isinstance(ann, Bbox)]

bboxes = (
np.empty((0, 4), dtype=np.float32)
if empty_anno
else np.stack([ann.points for ann in gt_bboxes], axis=0).astype(np.float32)
np.stack([ann.points for ann in bbox_anns], axis=0).astype(np.float32)
if len(bbox_anns) > 0
else np.zeros((0, 4), dtype=np.float32)
)
labels = torch.as_tensor([ann.label for ann in gt_bboxes])
labels = torch.as_tensor([ann.label for ann in bbox_anns])

tile_entities, tile_attrs = self.get_tiles(img_data, item, index)

Expand Down Expand Up @@ -528,51 +507,24 @@ def _get_item_impl(self, index: int) -> TileInstSegDataEntity: # type: ignore[o
img = item.media_as(Image)
img_data, img_shape, _ = self._get_img_data_and_shape(img)

anno_collection: dict[str, list] = defaultdict(list)
for anno in item.annotations:
anno_collection[anno.__class__.__name__].append(anno)

gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], []

# TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363
# Temporary solution to handle multiple annotation types.
# Ideally, we should pre-filter annotations during initialization of the dataset.

if Polygon.__name__ in anno_collection: # Polygon for InstSeg has higher priority
for poly in anno_collection[Polygon.__name__]:
bbox = Bbox(*poly.get_bbox()).points
for annotation in item.annotations:
if isinstance(annotation, Polygon):
bbox = np.array(annotation.get_bbox(), dtype=np.float32)
gt_bboxes.append(bbox)
gt_labels.append(poly.label)
gt_labels.append(annotation.label)

if self._dataset.include_polygons:
gt_polygons.append(poly)
else:
gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
elif Bbox.__name__ in anno_collection:
boxes = anno_collection[Bbox.__name__]
gt_bboxes = [ann.points for ann in boxes]
gt_labels = [ann.label for ann in boxes]
for box in boxes:
poly = Polygon(box.as_polygon())
if self._dataset.include_polygons:
gt_polygons.append(poly)
gt_polygons.append(annotation)
else:
gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
elif Ellipse.__name__ in anno_collection:
for ellipse in anno_collection[Ellipse.__name__]:
bbox = Bbox(*ellipse.get_bbox()).points
gt_bboxes.append(bbox)
gt_labels.append(ellipse.label)
poly = Polygon(ellipse.as_polygon(num_points=10))
if self._dataset.include_polygons:
gt_polygons.append(poly)
else:
gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
else:
warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2)
gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0])

# convert xywh to xyxy format
bboxes = np.array(gt_bboxes, dtype=np.float32)
bboxes[:, 2:] += bboxes[:, :2]

bboxes = np.stack(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4), dtype=np.float32)
masks = np.stack(gt_masks, axis=0) if gt_masks else np.empty((0, *img_shape), dtype=bool)
masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool)
labels = np.array(gt_labels, dtype=np.int64)

tile_entities, tile_attrs = self.get_tiles(img_data, item, index)
Expand Down

0 comments on commit c92bda3

Please sign in to comment.