Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release: v0.11.0 #143

Merged
merged 16 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions deepdataspace/io/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from typing import Type
from typing import Union

from tqdm import tqdm
from pymongo import WriteConcern
from tqdm import tqdm

from deepdataspace import constants
from deepdataspace.constants import AnnotationType
Expand Down Expand Up @@ -325,9 +325,22 @@ def pre_run(self):

def post_run(self):
"""
A post-run hook for subclass importers to clean up data.
A post-run hook for subclass importers.
"""
logger.info(f"Add cover to dataset [{self.dataset.name}]@[{self.dataset.id}]")
self.dataset.add_cover()

logger.info(f"Add indices to dataset [{self.dataset.name}]@[{self.dataset.id}]")
dataset_id = self.dataset.id
Image(dataset_id).get_collection().create_index([
("objects.category_id", 1),
])

Image(dataset_id).get_collection().create_index([
("idx", 1)
])

logger.info(f"Set status ready for dataset [{self.dataset.name}]@[{self.dataset.id}]")
DataSet.update_one({"id": self.dataset.id}, {"status": DatasetStatus.Ready})
self.dataset = DataSet.find_one({"id": self.dataset.id})

Expand All @@ -348,13 +361,13 @@ def load_existing_user_data(self):
"""

pipeline = [
{"$project": {"flag": 1,
{"$project": {"flag" : 1,
"flag_ts": 1,
"objects": {
"$filter": {
"input": "$objects",
"as": "object",
"cond": {
"as" : "object",
"cond" : {
"$eq": ["$$object.label_type", LabelType.User]
}
}
Expand All @@ -374,7 +387,7 @@ def load_existing_user_data(self):

self._user_data[image_id] = {
"objects": user_objects,
"flag": flag,
"flag" : flag,
"flag_ts": flag_ts,
}

Expand All @@ -400,7 +413,7 @@ def run_import(self):

desc = f"dataset[{self.dataset.name}@{self.dataset.id}] import progress"
for (image, anno_list) in tqdm(self, desc=desc, unit=" images"):
# for (image, anno_list) in self:
# for (image, anno_list) in self:
image = self.dataset_import_image(self.dataset, **image)
self.image_add_user_data(image)
for anno in anno_list:
Expand Down
7 changes: 4 additions & 3 deletions deepdataspace/plugins/coco2017/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def __init__(self, meta_path: str, enforce: bool = False):
info = self.parse_meta(meta_path)
if info is None:
raise RuntimeError(f"Cannot import coco dataset: {meta_path}")
else:
logger.info(f"Successfully parsed meta file {meta_path}: {info}")

dataset_name = info["dataset_name"]
self.ground_truth = info["ground_truth"]
Expand Down Expand Up @@ -100,9 +102,8 @@ def parse_meta(meta_path: str):
logger.error(traceback.format_exc())
logger.error(f"Failed to parse meta file {meta_path}: {err}")
return None

logger.info(f"Successfully parsed meta file {meta_path}: {info}")
return info
else:
return info

def load_ground_truth(self):
with open(self.ground_truth, "r", encoding="utf8") as fp:
Expand Down
127 changes: 81 additions & 46 deletions deepdataspace/server/resources/api_v1/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

import json
import logging
from random import randint

from deepdataspace.constants import DatasetFileType
from deepdataspace.constants import DatasetStatus
from deepdataspace.constants import DatasetType
from deepdataspace.constants import ErrCode
from deepdataspace.constants import LabelType
from deepdataspace.model import DataSet
from deepdataspace.model.image import Image
from deepdataspace.plugins.coco2017 import COCO2017Importer
Expand All @@ -19,7 +20,6 @@
from deepdataspace.utils.http import format_response
from deepdataspace.utils.http import parse_arguments
from deepdataspace.utils.http import raise_exception
from deepdataspace.constants import DatasetType

logger = logging.getLogger("django")

Expand Down Expand Up @@ -68,9 +68,9 @@ class ImagesView(BaseAPIView):
Argument("dataset_id", str, Argument.QUERY, required=True),
Argument("category_id", str, Argument.QUERY, required=False),
Argument("flag", int, Argument.QUERY, required=False),
Argument("label_id", str, Argument.QUERY, required=False),
Argument("page_num", Argument.PositiveInt, Argument.QUERY, default=1),
Argument("page_size", Argument.PositiveInt, Argument.QUERY, default=100)
Argument("page_size", Argument.PositiveInt, Argument.QUERY, default=100),
Argument("offset", int, Argument.QUERY, required=False, default=None),
]

def get(self, request):
Expand All @@ -79,7 +79,7 @@ def get(self, request):
- GET /api/v1/images
"""

dataset_id, category_id, flag, label_id, page_num, page_size = parse_arguments(request, self.get_args)
dataset_id, category_id, flag, page_num, page_size, offset = parse_arguments(request, self.get_args)

dataset = DataSet.find_one({"_id": dataset_id})
if dataset is None:
Expand All @@ -92,73 +92,108 @@ def get(self, request):

filters = {}
if category_id is not None:
filters = {"objects": {
"$elemMatch": {
"category_id": category_id,
"label_type" : {"$in": [LabelType.User, LabelType.GroundTruth]}}}
}
filters["objects.category_id"] = category_id

if flag is not None:
filters["flag"] = flag

total = Image(dataset_id).count_num(filters)

image_list = []
offset = max(0, page_size * (page_num - 1))
if offset is None:
skip = max(0, page_size * (page_num - 1))
else:
skip = 0
page_num = None
if offset == -1: # generate a random offset
includes = {"_id": 1, "idx": 1}
max_idx = Image(dataset_id).find_many(filters, includes,
sort=[("idx", -1)],
skip=0, size=1,
to_dict=True)
max_idx = list(max_idx)[0]["idx"]

min_idx = Image(dataset_id).find_many(filters, includes,
sort=[("idx", 1)],
skip=0, size=1,
to_dict=True)
min_idx = list(min_idx)[0]["idx"]

offset = randint(min_idx, max_idx)

# try the best to return at least page_size objects
if max_idx - offset + 1 < page_size:
offset = max(min_idx, max_idx - page_size + 1)
filters["idx"] = {"$gte": offset}
elif offset >= 0: # query by specified offset
filters["idx"] = {"$gte": offset}
else:
raise_exception(ErrCode.BadRequest, f"invalid offset value[{offset}]")

if skip > total:
data = {
"image_list": [],
"offset" : offset,
"page_size" : page_size,
"page_num" : page_num,
"total" : total
}
return format_response(data, enable_cache=True)

includes = {"id", "idx", "flag", "objects", "metadata", "type", "width", "height", "url",
"url_full_res"}
includes = {"id", "idx", "flag", "objects", "metadata",
"type", "width", "height", "url", "url_full_res"}
includes = {i: 1 for i in includes}

req_scheme = request.scheme
req_host = request.META["HTTP_HOST"]
req_prefix = f"{req_scheme}://{req_host}"

if offset <= total:
for image in Image(dataset_id).find_many(filters, includes,
sort=[("idx", 1)],
skip=offset,
size=page_size,
to_dict=True):
for obj in image["objects"]:
obj["source"] = obj["label_type"] # TODO keep for compatibility, delete this in the future
image_list = []
for image in Image(dataset_id).find_many(filters,
includes,
sort=[("idx", 1)],
skip=skip,
size=page_size,
to_dict=True):
for obj in image["objects"]:
obj["source"] = obj["label_type"] # TODO keep for compatibility, delete this in the future

alpha = obj.get("alpha", "")
if alpha is None:
obj["alpha"] = ""
elif not alpha.startswith("http"):
obj["alpha"] = f"{req_prefix}{alpha}"
alpha = obj.get("alpha", "")
if alpha is None:
obj["alpha"] = ""
elif not alpha.startswith("http"):
obj["alpha"] = f"{req_prefix}{alpha}"

if obj["segmentation"] is None:
obj["segmentation"] = ""
if obj["segmentation"] is None:
obj["segmentation"] = ""

obj["caption"] = obj["caption"] or ""
obj["caption"] = obj["caption"] or ""

obj.pop("compare_result", None)
obj.pop("compare_result", None)

image_url = image["url"]
image_url = concat_url(req_prefix, image_url)
image_url = image["url"]
image_url = concat_url(req_prefix, image_url)

image_url_full_res = image["url_full_res"] or image_url
image_url_full_res = concat_url(req_prefix, image_url_full_res)
image_url_full_res = image["url_full_res"] or image_url
image_url_full_res = concat_url(req_prefix, image_url_full_res)

desc = image.pop("metadata") or "{}"
desc = image.pop("metadata") or "{}"

image.update({
"desc" : desc,
"metadata" : json.loads(desc),
"url" : image_url,
"url_full_res": image_url_full_res
})
image.update({
"desc" : desc,
"metadata" : json.loads(desc),
"url" : image_url,
"url_full_res": image_url_full_res
})

image["caption"] = ""
if caption_generator:
image["caption"] = caption_generator(image)
image["caption"] = ""
if caption_generator:
image["caption"] = caption_generator(image)

image_list.append(image)
image_list.append(image)

data = {
"image_list": image_list,
"offset" : offset,
"page_size" : page_size,
"page_num" : page_num,
"total" : total
Expand Down
1 change: 0 additions & 1 deletion deepdataspace/server/static/20.4f772983.async.js.map

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion deepdataspace/server/static/222.a6c6168c.async.js.map

This file was deleted.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion deepdataspace/server/static/233.9b953a00.async.js.map

This file was deleted.

1 change: 0 additions & 1 deletion deepdataspace/server/static/422.622bc3b6.async.js.map

This file was deleted.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion deepdataspace/server/static/742.57cebfa0.async.js.map

This file was deleted.

Loading
Loading