Skip to content

Commit

Permalink
Merge pull request #112 from IDEA-Research/refactor/delete_dataset
Browse files Browse the repository at this point in the history
refactor(delete dataset): add model API to cascade delete dataset
  • Loading branch information
imhuwq authored Dec 13, 2023
2 parents f676242 + fe44e36 commit 4c191d9
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
23 changes: 23 additions & 0 deletions deepdataspace/model/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,26 @@ def eval_description(self):
except:
logger.warning(f"Failed to eval description_func[{self.description_func}] for dataset[{self.id}]")
return self.description or self.path

@staticmethod
def cascade_delete(dataset: "DataSet"):
"""
Cascade delete the dataset, along with all its images, labels, categories and objects.
"""
if dataset is None:
return

dataset_id = dataset.id
print(f"dataset [{dataset_id}] is found, deleting...")

print(f"dataset [{dataset_id}] is found, deleting categories...")
Category.delete_many({"dataset_id": dataset_id})

print(f"dataset [{dataset_id}] is found, deleting labels...")
Label.delete_many({"dataset_id": dataset_id})

print(f"dataset [{dataset_id}] is found, deleting images...")
Image(dataset_id).get_collection().drop()

DataSet.delete_many({"id": dataset_id})
print(f"dataset [{dataset_id}] is deleted.")
19 changes: 1 addition & 18 deletions deepdataspace/scripts/dataset_cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""

import os
import shutil

import pkg_resources

Expand All @@ -17,10 +16,7 @@
@ddsop.command("delete_one", help="Delete a dataset.")
@click.argument("dataset_dir")
def delete_one(dataset_dir):
from deepdataspace.model import Image
from deepdataspace.model import Label
from deepdataspace.model import DataSet
from deepdataspace.model import Category
from deepdataspace.utils.string import get_str_md5

dataset_dir = os.path.abspath(dataset_dir)
Expand All @@ -30,20 +26,7 @@ def delete_one(dataset_dir):
print(f"dataset [{dataset_dir}] is not imported before, skip...")
return

dataset_id = dataset.id
print(f"dataset [{dataset_id}] is found, deleting...")

print(f"dataset [{dataset_id}] is found, deleting categories...")
Category.delete_many({"dataset_id": dataset_id})

print(f"dataset [{dataset_id}] is found, deleting labels...")
Label.delete_many({"dataset_id": dataset_id})

print(f"dataset [{dataset_id}] is found, deleting images...")
Image(dataset_id).get_collection().drop()

DataSet.delete_many({"id": dataset_id})
print(f"dataset [{dataset_id}] is deleted.")
DataSet.cascade_delete(dataset)


@ddsop.command("import_all", help="Trigger a background task of importing all datasets in a data dir.")
Expand Down

0 comments on commit 4c191d9

Please sign in to comment.