Skip to content

Commit

Permalink
Merge pull request #90 from IDEA-Research/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
xifanii authored Sep 15, 2023
2 parents 189a9e8 + ed913a8 commit 72d20f5
Show file tree
Hide file tree
Showing 20 changed files with 175 additions and 195 deletions.
30 changes: 14 additions & 16 deletions deepdataspace/io/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,19 @@
import time
from typing import Dict
from typing import List
from typing import Literal
from typing import Tuple
from typing import Type
from typing import Union

from tqdm import tqdm

from deepdataspace import constants
from deepdataspace.constants import ContentEncoding
from deepdataspace.constants import FileReadMode
from deepdataspace.constants import LabelName
from deepdataspace.constants import LabelType
from deepdataspace.globals import Redis
from deepdataspace.model import Category
from deepdataspace.model import DataSet
from deepdataspace.model import Label
from deepdataspace.model.image import Image
from deepdataspace.utils.file import create_file_range_url
from deepdataspace.utils.function import count_block_time
from deepdataspace.utils.string import get_str_md5

Expand Down Expand Up @@ -141,14 +136,14 @@ def load_existing_user_data(self):
"""

pipeline = [
{"$project": {"flag" : 1,
"flag_ts" : 1,
{"$project": {"flag": 1,
"flag_ts": 1,
"label_confirm": 1,
"objects" : {
"objects": {
"$filter": {
"input": "$objects",
"as" : "object",
"cond" : {
"as": "object",
"cond": {
"$eq": ["$$object.label_type", LabelType.User]
}
}
Expand All @@ -170,9 +165,9 @@ def load_existing_user_data(self):
label_confirm = image.get("label_confirm", {})

self._user_data[image_id] = {
"objects" : user_objects,
"flag" : flag,
"flag_ts" : flag_ts,
"objects": user_objects,
"flag": flag,
"flag_ts": flag_ts,
"label_confirm": label_confirm,
}

Expand Down Expand Up @@ -200,12 +195,15 @@ def run_import(self):
desc = f"dataset[{self.dataset.name}@{self.dataset.id}] import progress"
for (image, anno_list) in tqdm(self, desc=desc, unit=" images"):
beg = int(time.time() * 1000)
image = self.dataset.batch_add_image(**image)
# image = self.dataset.add_image(**image)
image, saved = self.dataset._batch_add_image(**image)
self.add_user_data(image)
for anno in anno_list:
image.batch_add_annotation(**anno)
# image.add_annotation(**anno)
if image and anno_list:
# the image is already saved and out of the batch queue,
# but the annotations are not empty,
# so we have to save the image again
image.save()
logger.debug(f"time cost of import one image: {int(time.time() * 1000) - beg}ms")
logger.debug(f"imported image, id={image.id}, url={image.url}")
self.dataset.finish_batch_add_image()
Expand Down
62 changes: 46 additions & 16 deletions deepdataspace/model/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import time
import uuid
from typing import Dict
from typing import Tuple

from pymongo.collection import Collection
from pymongo.typings import _DocumentType
Expand Down Expand Up @@ -241,12 +242,12 @@ def add_image(self,
if image is None:
image_id = id or self.num_images
image = Model(
id=image_id, idx=self.num_images,
type=self.type, dataset_id=self.id,
url=thumb_uri, url_full_res=full_uri,
width=width, height=height,
flag=flag, flag_ts=flag_ts,
metadata=metadata,
id=image_id, idx=self.num_images,
type=self.type, dataset_id=self.id,
url=thumb_uri, url_full_res=full_uri,
width=width, height=height,
flag=flag, flag_ts=flag_ts,
metadata=metadata,
)
else:
# please don't change idx in this case
Expand All @@ -273,15 +274,15 @@ def add_image(self,

return image

def batch_add_image(self,
uri: str,
thumb_uri: str = None,
width: int = None,
height: int = None,
id_: int = None,
metadata: dict = None,
flag: int = 0,
flag_ts: int = 0, ) -> ImageModel:
def _batch_add_image(self,
uri: str,
thumb_uri: str = None,
width: int = None,
height: int = None,
id_: int = None,
metadata: dict = None,
flag: int = 0,
flag_ts: int = 0, ) -> Tuple[ImageModel, bool]:
"""
This is the batch version of add_image, which optimizes database performance.
But this method is not thread safe, please make sure only one thread is calling this method.
Expand All @@ -295,7 +296,7 @@ def batch_add_image(self,
:param metadata: any information data need to be stored.
:param flag: the image flag, 0 for not flagged, 1 for positive, 2 for negative.
:param flag_ts: the image flag timestamp.
:return: the image object.
:return: the image object, the flag indicating whether the batch is saved to db.
"""

full_uri = uri
Expand Down Expand Up @@ -325,6 +326,35 @@ def batch_add_image(self,

if len(self._batch_queue) >= self._batch_size:
self._save_image_batch()
return image, True
return image, False

def batch_add_image(self,
uri: str,
thumb_uri: str = None,
width: int = None,
height: int = None,
id_: int = None,
metadata: dict = None,
flag: int = 0,
flag_ts: int = 0, ) -> ImageModel:
"""
This is the batch version of add_image, which optimizes database performance.
But this method is not thread safe, please make sure only one thread is calling this method.
And after the batch add is finished, please call finish_batch_add_image to save the changes to database.
:param uri: the image uri, can be a local file path stars with "file://" or a remote url starts with "http://".
:param thumb_uri: the image thumbnail uri, also can be a local file path or a remote url.
:param width: the image width of full resolution.
:param height: the image height of full resolution.
:param id_: the image id, if not provided, the image id will be the current number of images in the dataset.
:param metadata: any information data need to be stored.
:param flag: the image flag, 0 for not flagged, 1 for positive, 2 for negative.
:param flag_ts: the image flag timestamp.
:return: the image object.
"""

image, batch_saved = self._batch_add_image(uri, thumb_uri, width, height, id_, metadata, flag, flag_ts)
return image

@staticmethod
Expand Down
42 changes: 21 additions & 21 deletions deepdataspace/model/label_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@ def _init_tasks(self):
continue

anno = {
"category_id": obj["category_name"],
"category_id" : obj["category_name"],
"category_name": obj["category_name"],
"bounding_box": obj["bounding_box"]
"bounding_box" : obj["bounding_box"]
}
annotations.append(anno)
category_names.add(obj["category_name"])
Expand Down Expand Up @@ -566,7 +566,7 @@ def export_project(self, label_set_name: str):

status = DatasetStatus.Ready
try:
update_data = {"status": DatasetStatus.Importing,
update_data = {"status" : DatasetStatus.Importing,
"detail_status.export_label_project": DatasetStatus.Importing}
DataSet.update_one({"id": dataset_id}, update_data)
logger.info(f"[{idx + 1}/{num}]exporting label project to dataset {dataset_id}")
Expand All @@ -580,7 +580,7 @@ def export_project(self, label_set_name: str):
status = DatasetStatus.Ready
logger.info(f"[{idx + 1}/{num}]export label project to dataset {dataset_id} success")
finally:
update_data = {"status": DatasetStatus.Ready,
update_data = {"status" : DatasetStatus.Ready,
"detail_status.export_label_project": status}
DataSet.update_one({"id": dataset_id}, update_data)

Expand Down Expand Up @@ -708,8 +708,8 @@ def is_leader(user: User, project_id: str):
"""

filters = {"project_id": project_id,
"user_id": user.id,
"role": {"$in": list(LabelProjectRoles.Leaders_)}}
"user_id" : user.id,
"role" : {"$in": list(LabelProjectRoles.Leaders_)}}
return ProjectRole.find_one(filters) is not None

@staticmethod
Expand All @@ -719,8 +719,8 @@ def is_gte_leader(user: User, project_id: str):
"""

filters = {"project_id": project_id,
"user_id": user.id,
"role": {"$in": list(LabelProjectRoles.GTELeaders_)}}
"user_id" : user.id,
"role" : {"$in": list(LabelProjectRoles.GTELeaders_)}}
return ProjectRole.find_one(filters) is not None

@staticmethod
Expand All @@ -730,8 +730,8 @@ def is_gt_leader(user: User, project_id: str):
"""

filters = {"project_id": project_id,
"user_id": user.id,
"role": {"$in": list(LabelProjectRoles.GTLeaders_)}}
"user_id" : user.id,
"role" : {"$in": list(LabelProjectRoles.GTLeaders_)}}
return ProjectRole.find_one(filters) is not None

@staticmethod
Expand Down Expand Up @@ -759,8 +759,8 @@ def is_worker(user: User, project_id: str):
"""

filters = {"project_id": project_id,
"user_id": user.id,
"role": {"$in": list(LabelProjectRoles.Workers_)}}
"user_id" : user.id,
"role" : {"$in": list(LabelProjectRoles.Workers_)}}
return ProjectRole.find_one(filters) is not None

@staticmethod
Expand Down Expand Up @@ -1080,7 +1080,7 @@ def _transfer_roles(task: "LabelTask", old_role: "TaskRole", new_role: "TaskRole

# delete old role from project
filters = {"project_id": task.project_id, "is_active": True,
"user_id": old_role.user_id, "role": old_role.role,
"user_id" : old_role.user_id, "role": old_role.role,
}
has_active_role = TaskRole.count_num(filters) > 0
if not has_active_role:
Expand Down Expand Up @@ -1116,13 +1116,13 @@ def _transfer_image_role_data(task: "LabelTask", old_role: "TaskRole", new_role:
f"labels.{new_user_id}": {
"$map": {
"input": f"$labels.{new_user_id}",
"as": "label",
"in": {
"as" : "label",
"in" : {
"$mergeObjects": [
"$$label",
{
"id": {"$concat": ["$$label.id", f"_{ts}", ts]},
"user_id": new_user_id,
"id" : {"$concat": ["$$label.id", f"_{ts}", ts]},
"user_id" : new_user_id,
"user_name": new_user_name
}
]
Expand All @@ -1137,13 +1137,13 @@ def _transfer_image_role_data(task: "LabelTask", old_role: "TaskRole", new_role:
f"reviews.{new_user_id}": {
"$map": {
"input": f"$reviews.{new_user_id}",
"as": "review",
"in": {
"as" : "review",
"in" : {
"$mergeObjects": [
"$$review",
{
"id": {"$concat": ["$$review.id", f"_{ts}"]},
"user_id": new_user_id,
"id" : {"$concat": ["$$review.id", f"_{ts}"]},
"user_id" : new_user_id,
"user_name": new_user_name
}
]
Expand Down
7 changes: 3 additions & 4 deletions deepdataspace/server/resources/api_v1/label_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@
from deepdataspace.constants import LabelImageQAActions
from deepdataspace.constants import LabelProjectQAActions
from deepdataspace.constants import LabelProjectRoles
from deepdataspace.constants import LabelProjectStatus
from deepdataspace.constants import LabelTaskImageStatus
from deepdataspace.constants import LabelTaskQAActions
from deepdataspace.constants import UserStatus
from deepdataspace.model.dataset import DataSet
from deepdataspace.model.label_task import LabelProject
from deepdataspace.model.label_task import LabelProjectError
from deepdataspace.model.label_task import LabelTask
from deepdataspace.model.label_task import LabelTaskError
from deepdataspace.model.label_task import LabelTaskImage
from deepdataspace.model.label_task import LabelTaskImageModel
from deepdataspace.model.label_task import ProjectRole
from deepdataspace.model.label_task import TaskRole
from deepdataspace.model.user import User
Expand Down Expand Up @@ -980,7 +979,7 @@ def post(self, request, task_image_id):
f"label_task[id={task_id}] is not found")

LTIModel = LabelTaskImage(task.dataset_id)
label_image = LTIModel.find_one({"id": task_image_id})
label_image: LabelTaskImageModel = LTIModel.find_one({"id": task_image_id})
if label_image is None:
raise_exception(ErrCode.LabelTaskImageNotFound,
f"label_image[id={task_image_id}] is not found")
Expand All @@ -990,7 +989,7 @@ def post(self, request, task_image_id):
raise_exception(ErrCode.UserCantLabelTaskImage,
f"user[id={user.id}] is not permitted to label image[id={task_image_id}]")

assert label_image.can_set_label(task, user)
assert label_image.ensure_status_for_labeling(task, user)

annotations = self._parse_annotations(request)
label_data = label_image.set_label(task, user, annotations)
Expand Down
2 changes: 1 addition & 1 deletion deepdataspace/server/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
</head>
<body>
<div id="root"></div>
<script src="/static/umi.1d7dce09.js"></script>
<script src="/static/umi.910c5012.js"></script>

</body></html>
Loading

0 comments on commit 72d20f5

Please sign in to comment.