Skip to content

Commit

Permalink
Merge pull request #143 from IDEA-Research/dev
Browse files Browse the repository at this point in the history
release: v0.11.0
  • Loading branch information
imhuwq authored Mar 28, 2024
2 parents 802da5d + 042fffa commit afa5c2d
Show file tree
Hide file tree
Showing 64 changed files with 755 additions and 655 deletions.
27 changes: 20 additions & 7 deletions deepdataspace/io/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from typing import Type
from typing import Union

from tqdm import tqdm
from pymongo import WriteConcern
from tqdm import tqdm

from deepdataspace import constants
from deepdataspace.constants import AnnotationType
Expand Down Expand Up @@ -325,9 +325,22 @@ def pre_run(self):

def post_run(self):
"""
A post-run hook for subclass importers to clean up data.
A post-run hook for subclass importers.
"""
logger.info(f"Add cover to dataset [{self.dataset.name}]@[{self.dataset.id}]")
self.dataset.add_cover()

logger.info(f"Add indices to dataset [{self.dataset.name}]@[{self.dataset.id}]")
dataset_id = self.dataset.id
Image(dataset_id).get_collection().create_index([
("objects.category_id", 1),
])

Image(dataset_id).get_collection().create_index([
("idx", 1)
])

logger.info(f"Set status ready for dataset [{self.dataset.name}]@[{self.dataset.id}]")
DataSet.update_one({"id": self.dataset.id}, {"status": DatasetStatus.Ready})
self.dataset = DataSet.find_one({"id": self.dataset.id})

Expand All @@ -348,13 +361,13 @@ def load_existing_user_data(self):
"""

pipeline = [
{"$project": {"flag": 1,
{"$project": {"flag" : 1,
"flag_ts": 1,
"objects": {
"$filter": {
"input": "$objects",
"as": "object",
"cond": {
"as" : "object",
"cond" : {
"$eq": ["$$object.label_type", LabelType.User]
}
}
Expand All @@ -374,7 +387,7 @@ def load_existing_user_data(self):

self._user_data[image_id] = {
"objects": user_objects,
"flag": flag,
"flag" : flag,
"flag_ts": flag_ts,
}

Expand All @@ -400,7 +413,7 @@ def run_import(self):

desc = f"dataset[{self.dataset.name}@{self.dataset.id}] import progress"
for (image, anno_list) in tqdm(self, desc=desc, unit=" images"):
# for (image, anno_list) in self:
# for (image, anno_list) in self:
image = self.dataset_import_image(self.dataset, **image)
self.image_add_user_data(image)
for anno in anno_list:
Expand Down
7 changes: 4 additions & 3 deletions deepdataspace/plugins/coco2017/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def __init__(self, meta_path: str, enforce: bool = False):
info = self.parse_meta(meta_path)
if info is None:
raise RuntimeError(f"Cannot import coco dataset: {meta_path}")
else:
logger.info(f"Successfully parsed meta file {meta_path}: {info}")

dataset_name = info["dataset_name"]
self.ground_truth = info["ground_truth"]
Expand Down Expand Up @@ -100,9 +102,8 @@ def parse_meta(meta_path: str):
logger.error(traceback.format_exc())
logger.error(f"Failed to parse meta file {meta_path}: {err}")
return None

logger.info(f"Successfully parsed meta file {meta_path}: {info}")
return info
else:
return info

def load_ground_truth(self):
with open(self.ground_truth, "r", encoding="utf8") as fp:
Expand Down
127 changes: 81 additions & 46 deletions deepdataspace/server/resources/api_v1/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

import json
import logging
from random import randint

from deepdataspace.constants import DatasetFileType
from deepdataspace.constants import DatasetStatus
from deepdataspace.constants import DatasetType
from deepdataspace.constants import ErrCode
from deepdataspace.constants import LabelType
from deepdataspace.model import DataSet
from deepdataspace.model.image import Image
from deepdataspace.plugins.coco2017 import COCO2017Importer
Expand All @@ -19,7 +20,6 @@
from deepdataspace.utils.http import format_response
from deepdataspace.utils.http import parse_arguments
from deepdataspace.utils.http import raise_exception
from deepdataspace.constants import DatasetType

logger = logging.getLogger("django")

Expand Down Expand Up @@ -68,9 +68,9 @@ class ImagesView(BaseAPIView):
Argument("dataset_id", str, Argument.QUERY, required=True),
Argument("category_id", str, Argument.QUERY, required=False),
Argument("flag", int, Argument.QUERY, required=False),
Argument("label_id", str, Argument.QUERY, required=False),
Argument("page_num", Argument.PositiveInt, Argument.QUERY, default=1),
Argument("page_size", Argument.PositiveInt, Argument.QUERY, default=100)
Argument("page_size", Argument.PositiveInt, Argument.QUERY, default=100),
Argument("offset", int, Argument.QUERY, required=False, default=None),
]

def get(self, request):
Expand All @@ -79,7 +79,7 @@ def get(self, request):
- GET /api/v1/images
"""

dataset_id, category_id, flag, label_id, page_num, page_size = parse_arguments(request, self.get_args)
dataset_id, category_id, flag, page_num, page_size, offset = parse_arguments(request, self.get_args)

dataset = DataSet.find_one({"_id": dataset_id})
if dataset is None:
Expand All @@ -92,73 +92,108 @@ def get(self, request):

filters = {}
if category_id is not None:
filters = {"objects": {
"$elemMatch": {
"category_id": category_id,
"label_type" : {"$in": [LabelType.User, LabelType.GroundTruth]}}}
}
filters["objects.category_id"] = category_id

if flag is not None:
filters["flag"] = flag

total = Image(dataset_id).count_num(filters)

image_list = []
offset = max(0, page_size * (page_num - 1))
if offset is None:
skip = max(0, page_size * (page_num - 1))
else:
skip = 0
page_num = None
if offset == -1: # generate a random offset
includes = {"_id": 1, "idx": 1}
max_idx = Image(dataset_id).find_many(filters, includes,
sort=[("idx", -1)],
skip=0, size=1,
to_dict=True)
max_idx = list(max_idx)[0]["idx"]

min_idx = Image(dataset_id).find_many(filters, includes,
sort=[("idx", 1)],
skip=0, size=1,
to_dict=True)
min_idx = list(min_idx)[0]["idx"]

offset = randint(min_idx, max_idx)

# try the best to return at least page_size objects
if max_idx - offset + 1 < page_size:
offset = max(min_idx, max_idx - page_size + 1)
filters["idx"] = {"$gte": offset}
elif offset >= 0: # query by specified offset
filters["idx"] = {"$gte": offset}
else:
raise_exception(ErrCode.BadRequest, f"invalid offset value[{offset}]")

if skip > total:
data = {
"image_list": [],
"offset" : offset,
"page_size" : page_size,
"page_num" : page_num,
"total" : total
}
return format_response(data, enable_cache=True)

includes = {"id", "idx", "flag", "objects", "metadata", "type", "width", "height", "url",
"url_full_res"}
includes = {"id", "idx", "flag", "objects", "metadata",
"type", "width", "height", "url", "url_full_res"}
includes = {i: 1 for i in includes}

req_scheme = request.scheme
req_host = request.META["HTTP_HOST"]
req_prefix = f"{req_scheme}://{req_host}"

if offset <= total:
for image in Image(dataset_id).find_many(filters, includes,
sort=[("idx", 1)],
skip=offset,
size=page_size,
to_dict=True):
for obj in image["objects"]:
obj["source"] = obj["label_type"] # TODO keep for compatibility, delete this in the future
image_list = []
for image in Image(dataset_id).find_many(filters,
includes,
sort=[("idx", 1)],
skip=skip,
size=page_size,
to_dict=True):
for obj in image["objects"]:
obj["source"] = obj["label_type"] # TODO keep for compatibility, delete this in the future

alpha = obj.get("alpha", "")
if alpha is None:
obj["alpha"] = ""
elif not alpha.startswith("http"):
obj["alpha"] = f"{req_prefix}{alpha}"
alpha = obj.get("alpha", "")
if alpha is None:
obj["alpha"] = ""
elif not alpha.startswith("http"):
obj["alpha"] = f"{req_prefix}{alpha}"

if obj["segmentation"] is None:
obj["segmentation"] = ""
if obj["segmentation"] is None:
obj["segmentation"] = ""

obj["caption"] = obj["caption"] or ""
obj["caption"] = obj["caption"] or ""

obj.pop("compare_result", None)
obj.pop("compare_result", None)

image_url = image["url"]
image_url = concat_url(req_prefix, image_url)
image_url = image["url"]
image_url = concat_url(req_prefix, image_url)

image_url_full_res = image["url_full_res"] or image_url
image_url_full_res = concat_url(req_prefix, image_url_full_res)
image_url_full_res = image["url_full_res"] or image_url
image_url_full_res = concat_url(req_prefix, image_url_full_res)

desc = image.pop("metadata") or "{}"
desc = image.pop("metadata") or "{}"

image.update({
"desc" : desc,
"metadata" : json.loads(desc),
"url" : image_url,
"url_full_res": image_url_full_res
})
image.update({
"desc" : desc,
"metadata" : json.loads(desc),
"url" : image_url,
"url_full_res": image_url_full_res
})

image["caption"] = ""
if caption_generator:
image["caption"] = caption_generator(image)
image["caption"] = ""
if caption_generator:
image["caption"] = caption_generator(image)

image_list.append(image)
image_list.append(image)

data = {
"image_list": image_list,
"offset" : offset,
"page_size" : page_size,
"page_num" : page_num,
"total" : total
Expand Down
1 change: 0 additions & 1 deletion deepdataspace/server/static/20.4f772983.async.js.map

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion deepdataspace/server/static/222.a6c6168c.async.js.map

This file was deleted.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion deepdataspace/server/static/233.9b953a00.async.js.map

This file was deleted.

1 change: 0 additions & 1 deletion deepdataspace/server/static/422.622bc3b6.async.js.map

This file was deleted.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion deepdataspace/server/static/742.57cebfa0.async.js.map

This file was deleted.

Loading

0 comments on commit afa5c2d

Please sign in to comment.