feature(caption): import caption from coco format and generate image …

…caption dynamiclly
IDEA-Research · Sep 22, 2023 · 184ae40 · 184ae40
1 parent 9388b0b
commit 184ae40
Show file tree

Hide file tree

Showing 8 changed files with 158 additions and 58 deletions.
diff --git a/deepdataspace/constants.py b/deepdataspace/constants.py
@@ -239,15 +239,16 @@ class ContentEncoding:
     ALL_ = {Plain, Base64}
 
 
-class TSVFileType:
+class DatasetFileType:
     """
     | TSV dataset related file types.
     | TSV dataset format may contain multiple files, each of these types:
     """
 
-    Embedding = "Embedding"  #: .embd file, used by :class:`deepdataspace.plugins.tsv.process.RankByFlags`.
-    Prediction = "Pred"  #: .pred file, used by :class:`deepdataspace.plugins.tsv.importer.TSVImporter`.
-    GroundTruth = LabelName.GroundTruth  #: .tsv file, used by :class:`deepdataspace.plugins.tsv.importer.TSVImporter`.
+    GroundTruth = LabelName.GroundTruth
+    Prediction = "Pred"
+    Embedding = "Embedding"
+    Meta = "Meta"
 
 
 class LabelProjectStatus:
@@ -286,12 +287,12 @@ class LabelProjectRoles:
     ReviewKinds_ = {Reviewer, ReviewLeader}  #: Roles that take part in the reviewing process.
 
     Levels_ = {
-        Owner: 0,
-        Manager: 1,
-        LabelLeader: 2,
+        Owner       : 0,
+        Manager     : 1,
+        LabelLeader : 2,
         ReviewLeader: 3,
-        Labeler: 4,
-        Reviewer: 5
+        Labeler     : 4,
+        Reviewer    : 5
     }  #: The level of every role, smaller number means higher level.
 
 

diff --git a/deepdataspace/io/importer.py b/deepdataspace/io/importer.py
@@ -18,6 +18,7 @@
 from tqdm import tqdm
 
 from deepdataspace import constants
+from deepdataspace.constants import DatasetFileType
 from deepdataspace.constants import LabelName
 from deepdataspace.constants import LabelType
 from deepdataspace.model import Category
@@ -66,6 +67,7 @@ def format_annotation(category: str,
                           keypoint_colors: List[int] = None,
                           keypoint_skeleton: List[int] = None,
                           keypoint_names: List[str] = None,
+                          caption: str = None,
                           confirm_type: int = 0, ):
         """
         A helper function to format annotation data.
@@ -83,6 +85,7 @@ def format_annotation(category: str,
                     keypoint_colors=keypoint_colors,
                     keypoint_skeleton=keypoint_skeleton,
                     keypoint_names=keypoint_names,
+                    caption=caption,
                     confirm_type=confirm_type, )
 
 
@@ -284,7 +287,7 @@ def collect_files(self) -> dict:
         Collect the files related to this dataset, {file_tag: file_path}.
         """
 
-        return {LabelName.GroundTruth: self.path}
+        return {DatasetFileType.GroundTruth: self.path}
 
     @staticmethod
     @abc.abstractmethod

diff --git a/deepdataspace/model/image.py b/deepdataspace/model/image.py
@@ -404,6 +404,7 @@ def batch_add_annotation(self,
                              keypoint_colors: List[int] = None,
                              keypoint_skeleton: List[int] = None,
                              keypoint_names: List[str] = None,
+                             caption: str = None,
                              confirm_type: int = 0, ):
         """
         The batch version of add_annotation.
@@ -448,7 +449,8 @@ def batch_add_annotation(self,
             alpha_uri = create_file_url(file_path=alpha_path,
                                         read_mode=FileReadMode.Binary)
 
-        anno_obj = Object(label_name=label, label_type=label_type, category_name=category,
+        anno_obj = Object(label_name=label, label_type=label_type,
+                          category_name=category, caption=caption,
                           bounding_box=bbox, segmentation=segmentation, alpha=alpha_uri,
                           points=points, lines=lines, point_colors=colors, point_names=names,
                           conf=conf, is_group=is_group, confirm_type=confirm_type)

diff --git a/deepdataspace/model/object.py b/deepdataspace/model/object.py
@@ -50,6 +50,8 @@ class Object(BaseModel):
         The point colors of the object.
     point_names: list
         The point names of the object.
+    caption: str
+        The caption of the object.
     confirm_type: int
         The image confirm type, 0 for unconfirmed, 1 for confirmed, 2 for rejected.
     compare_result: dict
@@ -81,8 +83,9 @@ def get_collection(cls, *args, **kwargs):
     alpha: Optional[str] = ""
     points: Optional[List[Union[float, int]]] = []
     lines: Optional[List[int]] = []
-    point_colors: Optional[List[str]] = []
+    point_colors: Optional[List[int]] = []
     point_names: Optional[List[str]] = []
+    caption: Optional[str] = ""
     confirm_type: Optional[int] = 0  # the image confirm type, 0 no confirm required, 1 gt may be fn, 2 pred may be fp
     compare_result: Optional[Dict[str, str]] = {}  # {"90": "FP", ..., "10": "OK"}
     matched_det_idx: Optional[int] = None  # The matched ground truth index, for prediction objects only.

diff --git a/deepdataspace/plugins/coco2017/importer.py b/deepdataspace/plugins/coco2017/importer.py
@@ -5,12 +5,11 @@
 import json
 import logging
 import os
-from multiprocessing import Manager
-from multiprocessing import Process
 from typing import Dict
 from typing import List
 from typing import Tuple
 
+from deepdataspace.constants import DatasetFileType
 from deepdataspace.constants import DatasetType
 from deepdataspace.constants import LabelName
 from deepdataspace.constants import LabelType
@@ -80,10 +79,12 @@ def _parse_meta(meta_path: str):
             assert os.path.isdir(image_root) and os.path.exists(image_root)
 
         info = {
-            "dataset_name": dataset_name,
-            "ground_truth": ground_truth,
-            "predictions" : predictions,
-            "image_root"  : image_root
+            "dataset_name"     : dataset_name,
+            "ground_truth"     : ground_truth,
+            "predictions"      : predictions,
+            "image_root"       : image_root,
+            "dynamic_caption"  : getattr(module, "dynamic_caption", False),
+            "caption_generator": getattr(module, "caption_generator", None),
         }
 
         return info
@@ -102,9 +103,9 @@ def load_ground_truth(self):
             coco_data = json.load(fp)
 
         images = coco_data["images"]
-        images = {i["id"]: i for i in images}
+        self._images = {i["id"]: i for i in images}
 
-        categories = coco_data["categories"]
+        categories = coco_data.get("categories", [])
         self._categories = {c["id"]: c for c in categories}
 
         annotations = coco_data["annotations"]
@@ -115,8 +116,6 @@ def load_ground_truth(self):
             anno_list = self._annotations.setdefault(image_id, [])
             anno_list.append(annotation)
 
-            self._images[image_id] = images[image_id]
-
     def load_predictions(self):
         for file_tag, file_path in self.dataset.files.items():
             if not file_tag.startswith("PRED/"):
@@ -158,25 +157,23 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
             image_id = coco_image_data["id"]
             coco_anno_list = self._annotations.get(image_id, [])
             # list_sample = [
-            #     {'segmentation'  : [
-            #         [240.86, 211.31, 240.16, 197.19, 236.98, 192.26, 237.34, 187.67, 245.8, 188.02, 243.33, 176.02,
-            #          250.39,
-            #          186.96, 251.8, 166.85, 255.33, 142.51, 253.21, 190.49, 261.68, 183.08, 258.86, 191.2, 260.98,
-            #          206.37,
-            #          254.63, 199.66, 252.51, 201.78, 251.8, 212.01]],
-            #         'area'       : 531.8071000000001,
-            #         'iscrowd'    : 0,
-            #         'image_id'   : 139,
-            #         'bbox'       : [236.98, 142.51, 24.7, 69.5],
-            #         'category_id': 64,
-            #         'id'         : 26547,
-            #         # 'label_name' : 'GroundTruth',
-            #         # 'label_type' : 'GT'
-            #     }
+            #     {
+            #       'segmentation'  : [
+            #           [x1, y1, x2, y2 ...],
+            #       ],
+            #       'area'          : 531.8071000000001,
+            #       'iscrowd'       : 0,
+            #       'image_id'      : 139,
+            #       'bbox'          : [x, y, w, h],
+            #       'category_id'   : 64,
+            #       'keypoints'     : [x1, y1, v1, conf1, x2, y2, v2, conf2, ...],
+            #       'caption'       : 'A giraffe eating food from the top of a tree.',
+            #       'id'            : 26547,
+            #     },
+            #     ...
             # ]
 
             # prepare image uri
-
             uri = None
 
             # trying to find the image file in local file system
@@ -211,9 +208,13 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
                 label_type = anno_data.pop("label_type", LabelType.GroundTruth)
 
                 # prepare category
-                category_id = anno_data.pop("category_id")
-                category = self._categories[category_id]
-                category_name = category["name"]
+                category_id = anno_data.pop("category_id", None)
+                if category_id:
+                    category = self._categories[category_id]
+                    category_name = category["name"]
+                else:
+                    category = {}
+                    category_name = ""
 
                 # prepare bbox
                 bbox = anno_data.pop("bbox", None)
@@ -250,7 +251,8 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
                         length = len(raw_keypoints) // 4
                         for idx in range(length):
                             idx *= 4
-                            x, y, v, conf = raw_keypoints[idx], raw_keypoints[idx + 1], raw_keypoints[idx + 2], raw_keypoints[idx + 3]
+                            x, y, v, conf = raw_keypoints[idx], raw_keypoints[idx + 1], raw_keypoints[idx + 2], \
+                                raw_keypoints[idx + 3]
                             keypoints.extend([float(x), float(y), int(v), conf])  # x, y, v, conf
 
                 # prepare is_group
@@ -261,6 +263,9 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
                 if label_type == LabelType.GroundTruth:
                     conf = 1.0
 
+                # prepare caption
+                caption = anno_data.pop("caption", None)
+
                 # finally, add the annotation
                 anno_data = self.format_annotation(category_name,
                                                    label_name,
@@ -273,6 +278,7 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
                                                    keypoint_colors=keypoint_colors,
                                                    keypoint_skeleton=keypoint_skeleton,
                                                    keypoint_names=keypoint_names,
+                                                   caption=caption
                                                    )
                 anno_list.append(anno_data)
             yield image, anno_list
@@ -298,6 +304,7 @@ def collect_files(self) -> dict:
         for pred in self.predictions:
             pred_name = pred["name"]
             pred_file = pred["file"]
-            files[f"PRED/{pred_name}"] = pred_file
+            files[f"{DatasetFileType.Prediction}/{pred_name}"] = pred_file
 
+        files[DatasetFileType.Meta] = self.meta_path
         return files
diff --git a/deepdataspace/plugins/tsv/importer.py b/deepdataspace/plugins/tsv/importer.py
@@ -16,7 +16,7 @@
 from deepdataspace.constants import DatasetType
 from deepdataspace.constants import LabelName
 from deepdataspace.constants import LabelType
-from deepdataspace.constants import TSVFileType
+from deepdataspace.constants import DatasetFileType
 from deepdataspace.io.importer import FileImporter
 from deepdataspace.utils.file import create_file_range_url
 
@@ -43,14 +43,14 @@ def __init__(self, dataset_path: str, enforce: bool = False):
 
     def open_files(self):
         for file_tag, file_path in self.dataset.files.items():
-            if file_tag == TSVFileType.GroundTruth or file_tag.startswith(f"{TSVFileType.Prediction}/"):
+            if file_tag == DatasetFileType.GroundTruth or file_tag.startswith(f"{DatasetFileType.Prediction}/"):
                 self._files[file_tag] = {
                     "fp": open(file_path, "r", encoding="utf8"),
                     "line_idx": 0,
                     "byte_idx": 0,
                     "path": file_path
                 }
-            elif file_tag == TSVFileType.Embedding:
+            elif file_tag == DatasetFileType.Embedding:
                 self._files[file_tag] = {
                     "path": file_path
                 }
@@ -189,7 +189,7 @@ def read_line(file_data: dict):
         return image_data_str, image_content_str, line_idx, byte_idx, image_data_off
 
     def load_groundtruth(self) -> Tuple[Union[Dict, None], Union[List[Dict], None]]:
-        file = self._files[TSVFileType.GroundTruth]
+        file = self._files[DatasetFileType.GroundTruth]
         image_data_str, image_content_str, line_idx, byte_idx, image_data_off = self.read_line(file)
         if image_data_str is None:
             return None, None
@@ -247,7 +247,7 @@ def load_prediction(self, image: Dict, pred_name: str):
     def load_predictions(self, image: Dict) -> List[Dict]:
         objects = []
         for file_key in self._files.keys():
-            if not file_key.startswith(f"{TSVFileType.Prediction}/"):
+            if not file_key.startswith(f"{DatasetFileType.Prediction}/"):
                 continue
             obj_list = self.load_prediction(image, file_key)
             objects.extend(obj_list)
@@ -281,10 +281,10 @@ def collect_files(self) -> dict:
             if item.endswith(".pred"):
                 pred_name = item.replace(self.dataset.name, "")[1:]
                 pred_name = os.path.splitext(pred_name)[0]
-                pred_name = f"{TSVFileType.Prediction}/{pred_name}"
+                pred_name = f"{DatasetFileType.Prediction}/{pred_name}"
                 files[pred_name] = file_path
 
             if item.endswith(".embd"):
-                files[TSVFileType.Embedding] = file_path
+                files[DatasetFileType.Embedding] = file_path
 
         return files
diff --git a/deepdataspace/server/resources/api_v1/images.py b/deepdataspace/server/resources/api_v1/images.py
@@ -5,17 +5,23 @@
 """
 
 import json
+import logging
 
+from deepdataspace.constants import DatasetFileType
 from deepdataspace.constants import DatasetStatus
 from deepdataspace.constants import ErrCode
 from deepdataspace.constants import LabelType
 from deepdataspace.model import DataSet
 from deepdataspace.model.image import Image
+from deepdataspace.plugins.coco2017 import COCO2017Importer
 from deepdataspace.utils.http import Argument
 from deepdataspace.utils.http import BaseAPIView
 from deepdataspace.utils.http import format_response
 from deepdataspace.utils.http import parse_arguments
 from deepdataspace.utils.http import raise_exception
+from deepdataspace.constants import DatasetType
+
+logger = logging.getLogger("django")
 
 
 def concat_url(prefix, path):
@@ -26,6 +32,33 @@ def concat_url(prefix, path):
     return f"{prefix}/{path}"
 
 
+def get_meta_module(dataset):
+    is_coco_dataset = dataset.type == DatasetType.COCO2017
+    if not is_coco_dataset:
+        return None
+
+    meta_file = dataset.files.get(DatasetFileType.Meta, None)
+    if meta_file is None:
+        return None
+
+    try:
+        meta_module = COCO2017Importer.parse_meta(meta_file)
+    except Exception as err:
+        logger.error(f"parse meta file[{meta_file}] failed: {err}")
+    else:
+        return meta_module
+
+
+def get_caption_func(dataset):
+    meta_module = get_meta_module(dataset)
+    if meta_module is None:
+        return None
+
+    if meta_module["dynamic_caption"]:
+        return meta_module["caption_generator"]
+    return None
+
+
 class ImagesView(BaseAPIView):
     """
     - GET /api/v1/images
@@ -56,6 +89,8 @@ def get(self, request):
             raise_exception(ErrCode.DatasetNotReadable,
                             f"dataset_id[{dataset_id}] is in status [{dataset.status}] now, try again later")
 
+        caption_generator = get_caption_func(dataset)
+
         filters = {}
         if category_id is not None:
             filters = {"objects": {
@@ -104,6 +139,8 @@ def get(self, request):
                     if obj["segmentation"] is None:
                         obj["segmentation"] = ""
 
+                    obj["caption"] = obj["caption"] or ""
+
                     obj.pop("compare_result", None)
 
                 image_url = image["url"]
@@ -121,6 +158,10 @@ def get(self, request):
                     "url_full_res": image_url_full_res
                 })
 
+                image["caption"] = ""
+                if caption_generator:
+                    image["caption"] = caption_generator(image)
+
                 image_list.append(image)
 
         data = {