Skip to content

Commit

Permalink
feature(dataset indices): create necessary indices on startup
Browse files Browse the repository at this point in the history
  • Loading branch information
imhuwq committed Mar 28, 2024
1 parent 0686b0e commit b437801
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions deepdataspace/task/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from celery.signals import worker_ready

from deepdataspace import environs
from deepdataspace.model.dataset import DataSet
from deepdataspace.model.dataset import DatasetStatus
from deepdataspace.model.image import Image
from deepdataspace.task.celery import app
from deepdataspace.task.celery import app
from deepdataspace.task.ping import ping

Expand Down Expand Up @@ -71,8 +75,38 @@ def import_and_process_data_dir(data_dir: str, enforce: bool = False, auto_trigg
logger.error(str(err))


@app.task
def create_dataset_index():
"""
Create index for all datasets.
"""

logger.info(f"create_dataset_index starts")

total = DataSet.count_num({})
datasets = DataSet.find_many({})
for idx, dataset in enumerate(datasets):
logger.info(f"[{idx + 1}/{total}] creating index for dataset {dataset.id}")
if dataset.status != DatasetStatus.Ready:
logger.info(f"dataset status is not {DatasetStatus.Ready}, skip it")
else:
ImageModel = Image(dataset.id)
ImageModel.get_collection().create_index([
("objects.category_id", 1),
])
logger.info(f"dataset created index on field: objects.category_id")

ImageModel.get_collection().create_index([
("idx", 1)
])
logger.info(f"dataset created index on field: idx")


@worker_ready.connect
def startup_jobs(sender=None, headers=None, body=None, **kwargs):
# ensure necessary indices
create_dataset_index.apply_async()

# import all datasets
data_dir = environs.DATASET_DIR
import_and_process_data_dir.apply_async(args=(data_dir, False, True))

0 comments on commit b437801

Please sign in to comment.