From c1db46471eac6e23f03da22fc832cf5743bceaef Mon Sep 17 00:00:00 2001
From: ktro2828 <kotaro.uetake@tier4.jp>
Date: Mon, 3 Feb 2025 09:22:08 +0900
Subject: [PATCH] fix: update segmentation mask to (h, w) order

Signed-off-by: ktro2828 <kotaro.uetake@tier4.jp>
---
 .../t4_dataset/annotation_files_generator.py           | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/perception_dataset/t4_dataset/annotation_files_generator.py b/perception_dataset/t4_dataset/annotation_files_generator.py
index 4d6ad554..3013e6ca 100644
--- a/perception_dataset/t4_dataset/annotation_files_generator.py
+++ b/perception_dataset/t4_dataset/annotation_files_generator.py
@@ -126,10 +126,10 @@ def convert_one_scene(
                         {frame_index: sample_nuim["token"]}
                     )
 
-                    wid_hgt = (sample_nuim["width"], sample_nuim["height"])
-                    if wid_hgt != prev_wid_hgt:
-                        prev_wid_hgt = wid_hgt
-                        object_mask = np.zeros(wid_hgt, dtype=np.uint8)
+                    hgt_wid = (sample_nuim["height"], sample_nuim["width"])
+                    if hgt_wid != prev_wid_hgt:
+                        prev_wid_hgt = hgt_wid
+                        object_mask = np.zeros(hgt_wid, dtype=np.uint8)
                         object_mask = cocomask.encode(np.asfortranarray(object_mask))
                         object_mask["counts"] = base64.b64encode(object_mask["counts"]).decode(
                             "ascii"
@@ -338,7 +338,7 @@ def _convert_to_t4_format(
     def _clip_bbox(self, bbox: List[float], mask: Dict[str, Any]) -> List[float]:
         """Clip the bbox to the image size."""
         try:
-            width, height = mask["size"]
+            height, width = mask["size"]
             bbox[0] = max(0, bbox[0])
             bbox[1] = max(0, bbox[1])
             bbox[2] = min(width, bbox[2])