diff --git a/deepdataspace/constants.py b/deepdataspace/constants.py index a8a191c..d11686b 100644 --- a/deepdataspace/constants.py +++ b/deepdataspace/constants.py @@ -239,15 +239,16 @@ class ContentEncoding: ALL_ = {Plain, Base64} -class TSVFileType: +class DatasetFileType: """ | TSV dataset related file types. | TSV dataset format may contain multiple files, each of these types: """ - Embedding = "Embedding" #: .embd file, used by :class:`deepdataspace.plugins.tsv.process.RankByFlags`. - Prediction = "Pred" #: .pred file, used by :class:`deepdataspace.plugins.tsv.importer.TSVImporter`. - GroundTruth = LabelName.GroundTruth #: .tsv file, used by :class:`deepdataspace.plugins.tsv.importer.TSVImporter`. + GroundTruth = LabelName.GroundTruth + Prediction = "Pred" + Embedding = "Embedding" + Meta = "Meta" class LabelProjectStatus: @@ -286,12 +287,12 @@ class LabelProjectRoles: ReviewKinds_ = {Reviewer, ReviewLeader} #: Roles that take part in the reviewing process. Levels_ = { - Owner: 0, - Manager: 1, - LabelLeader: 2, + Owner : 0, + Manager : 1, + LabelLeader : 2, ReviewLeader: 3, - Labeler: 4, - Reviewer: 5 + Labeler : 4, + Reviewer : 5 } #: The level of every role, smaller number means higher level. diff --git a/deepdataspace/io/importer.py b/deepdataspace/io/importer.py index 3ddf0d7..0ebe96d 100644 --- a/deepdataspace/io/importer.py +++ b/deepdataspace/io/importer.py @@ -18,6 +18,7 @@ from tqdm import tqdm from deepdataspace import constants +from deepdataspace.constants import DatasetFileType from deepdataspace.constants import LabelName from deepdataspace.constants import LabelType from deepdataspace.model import Category @@ -66,6 +67,7 @@ def format_annotation(category: str, keypoint_colors: List[int] = None, keypoint_skeleton: List[int] = None, keypoint_names: List[str] = None, + caption: str = None, confirm_type: int = 0, ): """ A helper function to format annotation data. @@ -83,6 +85,7 @@ def format_annotation(category: str, keypoint_colors=keypoint_colors, keypoint_skeleton=keypoint_skeleton, keypoint_names=keypoint_names, + caption=caption, confirm_type=confirm_type, ) @@ -284,7 +287,7 @@ def collect_files(self) -> dict: Collect the files related to this dataset, {file_tag: file_path}. """ - return {LabelName.GroundTruth: self.path} + return {DatasetFileType.GroundTruth: self.path} @staticmethod @abc.abstractmethod diff --git a/deepdataspace/model/image.py b/deepdataspace/model/image.py index 2471d05..3ff89fd 100644 --- a/deepdataspace/model/image.py +++ b/deepdataspace/model/image.py @@ -404,6 +404,7 @@ def batch_add_annotation(self, keypoint_colors: List[int] = None, keypoint_skeleton: List[int] = None, keypoint_names: List[str] = None, + caption: str = None, confirm_type: int = 0, ): """ The batch version of add_annotation. @@ -448,7 +449,8 @@ def batch_add_annotation(self, alpha_uri = create_file_url(file_path=alpha_path, read_mode=FileReadMode.Binary) - anno_obj = Object(label_name=label, label_type=label_type, category_name=category, + anno_obj = Object(label_name=label, label_type=label_type, + category_name=category, caption=caption, bounding_box=bbox, segmentation=segmentation, alpha=alpha_uri, points=points, lines=lines, point_colors=colors, point_names=names, conf=conf, is_group=is_group, confirm_type=confirm_type) diff --git a/deepdataspace/model/object.py b/deepdataspace/model/object.py index 4b27b01..55133a9 100644 --- a/deepdataspace/model/object.py +++ b/deepdataspace/model/object.py @@ -50,6 +50,8 @@ class Object(BaseModel): The point colors of the object. point_names: list The point names of the object. + caption: str + The caption of the object. confirm_type: int The image confirm type, 0 for unconfirmed, 1 for confirmed, 2 for rejected. compare_result: dict @@ -81,8 +83,9 @@ def get_collection(cls, *args, **kwargs): alpha: Optional[str] = "" points: Optional[List[Union[float, int]]] = [] lines: Optional[List[int]] = [] - point_colors: Optional[List[str]] = [] + point_colors: Optional[List[int]] = [] point_names: Optional[List[str]] = [] + caption: Optional[str] = "" confirm_type: Optional[int] = 0 # the image confirm type, 0 no confirm required, 1 gt may be fn, 2 pred may be fp compare_result: Optional[Dict[str, str]] = {} # {"90": "FP", ..., "10": "OK"} matched_det_idx: Optional[int] = None # The matched ground truth index, for prediction objects only. diff --git a/deepdataspace/plugins/coco2017/importer.py b/deepdataspace/plugins/coco2017/importer.py index 6815f65..c300d57 100644 --- a/deepdataspace/plugins/coco2017/importer.py +++ b/deepdataspace/plugins/coco2017/importer.py @@ -5,12 +5,11 @@ import json import logging import os -from multiprocessing import Manager -from multiprocessing import Process from typing import Dict from typing import List from typing import Tuple +from deepdataspace.constants import DatasetFileType from deepdataspace.constants import DatasetType from deepdataspace.constants import LabelName from deepdataspace.constants import LabelType @@ -80,10 +79,12 @@ def _parse_meta(meta_path: str): assert os.path.isdir(image_root) and os.path.exists(image_root) info = { - "dataset_name": dataset_name, - "ground_truth": ground_truth, - "predictions" : predictions, - "image_root" : image_root + "dataset_name" : dataset_name, + "ground_truth" : ground_truth, + "predictions" : predictions, + "image_root" : image_root, + "dynamic_caption" : getattr(module, "dynamic_caption", False), + "caption_generator": getattr(module, "caption_generator", None), } return info @@ -102,9 +103,9 @@ def load_ground_truth(self): coco_data = json.load(fp) images = coco_data["images"] - images = {i["id"]: i for i in images} + self._images = {i["id"]: i for i in images} - categories = coco_data["categories"] + categories = coco_data.get("categories", []) self._categories = {c["id"]: c for c in categories} annotations = coco_data["annotations"] @@ -115,8 +116,6 @@ def load_ground_truth(self): anno_list = self._annotations.setdefault(image_id, []) anno_list.append(annotation) - self._images[image_id] = images[image_id] - def load_predictions(self): for file_tag, file_path in self.dataset.files.items(): if not file_tag.startswith("PRED/"): @@ -158,25 +157,23 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]: image_id = coco_image_data["id"] coco_anno_list = self._annotations.get(image_id, []) # list_sample = [ - # {'segmentation' : [ - # [240.86, 211.31, 240.16, 197.19, 236.98, 192.26, 237.34, 187.67, 245.8, 188.02, 243.33, 176.02, - # 250.39, - # 186.96, 251.8, 166.85, 255.33, 142.51, 253.21, 190.49, 261.68, 183.08, 258.86, 191.2, 260.98, - # 206.37, - # 254.63, 199.66, 252.51, 201.78, 251.8, 212.01]], - # 'area' : 531.8071000000001, - # 'iscrowd' : 0, - # 'image_id' : 139, - # 'bbox' : [236.98, 142.51, 24.7, 69.5], - # 'category_id': 64, - # 'id' : 26547, - # # 'label_name' : 'GroundTruth', - # # 'label_type' : 'GT' - # } + # { + # 'segmentation' : [ + # [x1, y1, x2, y2 ...], + # ], + # 'area' : 531.8071000000001, + # 'iscrowd' : 0, + # 'image_id' : 139, + # 'bbox' : [x, y, w, h], + # 'category_id' : 64, + # 'keypoints' : [x1, y1, v1, conf1, x2, y2, v2, conf2, ...], + # 'caption' : 'A giraffe eating food from the top of a tree.', + # 'id' : 26547, + # }, + # ... # ] # prepare image uri - uri = None # trying to find the image file in local file system @@ -211,9 +208,13 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]: label_type = anno_data.pop("label_type", LabelType.GroundTruth) # prepare category - category_id = anno_data.pop("category_id") - category = self._categories[category_id] - category_name = category["name"] + category_id = anno_data.pop("category_id", None) + if category_id: + category = self._categories[category_id] + category_name = category["name"] + else: + category = {} + category_name = "" # prepare bbox bbox = anno_data.pop("bbox", None) @@ -250,7 +251,8 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]: length = len(raw_keypoints) // 4 for idx in range(length): idx *= 4 - x, y, v, conf = raw_keypoints[idx], raw_keypoints[idx + 1], raw_keypoints[idx + 2], raw_keypoints[idx + 3] + x, y, v, conf = raw_keypoints[idx], raw_keypoints[idx + 1], raw_keypoints[idx + 2], \ + raw_keypoints[idx + 3] keypoints.extend([float(x), float(y), int(v), conf]) # x, y, v, conf # prepare is_group @@ -261,6 +263,9 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]: if label_type == LabelType.GroundTruth: conf = 1.0 + # prepare caption + caption = anno_data.pop("caption", None) + # finally, add the annotation anno_data = self.format_annotation(category_name, label_name, @@ -273,6 +278,7 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]: keypoint_colors=keypoint_colors, keypoint_skeleton=keypoint_skeleton, keypoint_names=keypoint_names, + caption=caption ) anno_list.append(anno_data) yield image, anno_list @@ -298,6 +304,7 @@ def collect_files(self) -> dict: for pred in self.predictions: pred_name = pred["name"] pred_file = pred["file"] - files[f"PRED/{pred_name}"] = pred_file + files[f"{DatasetFileType.Prediction}/{pred_name}"] = pred_file + files[DatasetFileType.Meta] = self.meta_path return files diff --git a/deepdataspace/plugins/tsv/importer.py b/deepdataspace/plugins/tsv/importer.py index 36950ad..e243ecb 100644 --- a/deepdataspace/plugins/tsv/importer.py +++ b/deepdataspace/plugins/tsv/importer.py @@ -16,7 +16,7 @@ from deepdataspace.constants import DatasetType from deepdataspace.constants import LabelName from deepdataspace.constants import LabelType -from deepdataspace.constants import TSVFileType +from deepdataspace.constants import DatasetFileType from deepdataspace.io.importer import FileImporter from deepdataspace.utils.file import create_file_range_url @@ -43,14 +43,14 @@ def __init__(self, dataset_path: str, enforce: bool = False): def open_files(self): for file_tag, file_path in self.dataset.files.items(): - if file_tag == TSVFileType.GroundTruth or file_tag.startswith(f"{TSVFileType.Prediction}/"): + if file_tag == DatasetFileType.GroundTruth or file_tag.startswith(f"{DatasetFileType.Prediction}/"): self._files[file_tag] = { "fp": open(file_path, "r", encoding="utf8"), "line_idx": 0, "byte_idx": 0, "path": file_path } - elif file_tag == TSVFileType.Embedding: + elif file_tag == DatasetFileType.Embedding: self._files[file_tag] = { "path": file_path } @@ -189,7 +189,7 @@ def read_line(file_data: dict): return image_data_str, image_content_str, line_idx, byte_idx, image_data_off def load_groundtruth(self) -> Tuple[Union[Dict, None], Union[List[Dict], None]]: - file = self._files[TSVFileType.GroundTruth] + file = self._files[DatasetFileType.GroundTruth] image_data_str, image_content_str, line_idx, byte_idx, image_data_off = self.read_line(file) if image_data_str is None: return None, None @@ -247,7 +247,7 @@ def load_prediction(self, image: Dict, pred_name: str): def load_predictions(self, image: Dict) -> List[Dict]: objects = [] for file_key in self._files.keys(): - if not file_key.startswith(f"{TSVFileType.Prediction}/"): + if not file_key.startswith(f"{DatasetFileType.Prediction}/"): continue obj_list = self.load_prediction(image, file_key) objects.extend(obj_list) @@ -281,10 +281,10 @@ def collect_files(self) -> dict: if item.endswith(".pred"): pred_name = item.replace(self.dataset.name, "")[1:] pred_name = os.path.splitext(pred_name)[0] - pred_name = f"{TSVFileType.Prediction}/{pred_name}" + pred_name = f"{DatasetFileType.Prediction}/{pred_name}" files[pred_name] = file_path if item.endswith(".embd"): - files[TSVFileType.Embedding] = file_path + files[DatasetFileType.Embedding] = file_path return files diff --git a/deepdataspace/server/resources/api_v1/images.py b/deepdataspace/server/resources/api_v1/images.py index 9aaa91c..da93987 100644 --- a/deepdataspace/server/resources/api_v1/images.py +++ b/deepdataspace/server/resources/api_v1/images.py @@ -5,17 +5,23 @@ """ import json +import logging +from deepdataspace.constants import DatasetFileType from deepdataspace.constants import DatasetStatus from deepdataspace.constants import ErrCode from deepdataspace.constants import LabelType from deepdataspace.model import DataSet from deepdataspace.model.image import Image +from deepdataspace.plugins.coco2017 import COCO2017Importer from deepdataspace.utils.http import Argument from deepdataspace.utils.http import BaseAPIView from deepdataspace.utils.http import format_response from deepdataspace.utils.http import parse_arguments from deepdataspace.utils.http import raise_exception +from deepdataspace.constants import DatasetType + +logger = logging.getLogger("django") def concat_url(prefix, path): @@ -26,6 +32,33 @@ def concat_url(prefix, path): return f"{prefix}/{path}" +def get_meta_module(dataset): + is_coco_dataset = dataset.type == DatasetType.COCO2017 + if not is_coco_dataset: + return None + + meta_file = dataset.files.get(DatasetFileType.Meta, None) + if meta_file is None: + return None + + try: + meta_module = COCO2017Importer.parse_meta(meta_file) + except Exception as err: + logger.error(f"parse meta file[{meta_file}] failed: {err}") + else: + return meta_module + + +def get_caption_func(dataset): + meta_module = get_meta_module(dataset) + if meta_module is None: + return None + + if meta_module["dynamic_caption"]: + return meta_module["caption_generator"] + return None + + class ImagesView(BaseAPIView): """ - GET /api/v1/images @@ -56,6 +89,8 @@ def get(self, request): raise_exception(ErrCode.DatasetNotReadable, f"dataset_id[{dataset_id}] is in status [{dataset.status}] now, try again later") + caption_generator = get_caption_func(dataset) + filters = {} if category_id is not None: filters = {"objects": { @@ -104,6 +139,8 @@ def get(self, request): if obj["segmentation"] is None: obj["segmentation"] = "" + obj["caption"] = obj["caption"] or "" + obj.pop("compare_result", None) image_url = image["url"] @@ -121,6 +158,10 @@ def get(self, request): "url_full_res": image_url_full_res }) + image["caption"] = "" + if caption_generator: + image["caption"] = caption_generator(image) + image_list.append(image) data = { diff --git a/samples/coco_dataset_meta.py b/samples/coco_dataset_meta.py index 47a315d..d6eb890 100644 --- a/samples/coco_dataset_meta.py +++ b/samples/coco_dataset_meta.py @@ -1,7 +1,6 @@ """ -This file is a coco meta file, which is used to describe the basic information of a coco dataset. -Put it under the DATA_DIR directory specified when starting DDS, DDS will automatically scan this meta file -and import the dataset. +This file is a coco meta file instructing DDS how to import a coco dataset. +Put it under the DATA_DIR directory specified when starting DDS, DDS will recognize file and import the dataset. You can also use the ddsop command to import or delete the dataset: ```shell ddsop import_one /path/to/this/meta/file.py @@ -10,7 +9,7 @@ """ is_coco_meta = True # Mandatory. -# You MUST declare this variable in a coco meta file, otherwise DDS will ignore it. +# You must declare this variable, otherwise DDS will ignore it. dataset_name = "instances_val2017" # Mandatory. # The name of the dataset. @@ -35,17 +34,61 @@ # The DDS will try to locate the image file under this directory, according to the "file_name" field. dynamic_caption = False # Optional. + + # Indicating whether the caption is dynamic or not. # If it is True, DDS will call the `caption_generator` function to generate the caption for every image -# while your are browsing the dataset. +# while you are browsing the dataset. -def caption_generator(image, objects): +def caption_generator(image): """ This function is used to generate a caption for an image dynamically while browsing the dataset. It only works when `dynamic_caption = True` in this meta file. :param image: The image object. - :param objects: The objects in the image. + + ```json + { + "idx": 0, + "id": 179765, + "width": 640, + "height": 480, + "objects": [ + { + "label_name": "GroundTruth", + "label_id": "aa", + "category_id": "bb", + "category_name": "", + "conf": 1.0, + "is_group": null, + "bounding_box": { + "xmin": 0.0, + "ymin": 0.0, + "xmax": 1.0, + "ymax": 1.0 + }, + "segmentation": "", + "points": [x1, y1, x2, y2, x3, y3...], + "lines": [l1_beg, l1_end, l2_beg, l2_end, l3_beg, l3_end...], + "point_colors": [r1, g1, b1, r2, b2, g2, r3, g3, b3...], + "point_names": ["point1", "point2", "point3"...], + "caption": "A black Honda motorcycle parked in front of a garage." + } + ], + "url": "https://example.com/thumb.jpg", + "url_full_res": "https://example.com/picture.jpg", + "desc": "image description", + "metadata": { + "license": 3, + "date_captured": "2013-11-15 14:02:51", + "flickr_url": "http://farm3.staticflickr.com/2824/10213933686_6936eb402b_z.jpg", + "id": 179765 + } + } + ``` """ - return image["caption"] + objects = image["objects"] + if objects: + return objects[0]["caption"] + return "dynamic caption is working"