Skip to content

Commit

Permalink
fix: mark images as deleted in DB when deleted on Product Opener
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Oct 27, 2023
1 parent bd6a15a commit 997e989
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 2 deletions.
1 change: 1 addition & 0 deletions robotoff/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,7 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
settings.UPDATED_PRODUCT_WAIT,
job_kwargs={"result_ttl": 0},
product_id=product_id,
diffs=diffs,
)
elif action == "deleted":
enqueue_job(
Expand Down
69 changes: 68 additions & 1 deletion robotoff/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
from PIL import Image

from robotoff.models import ImageModel
from robotoff.models import ImageModel, Prediction, ProductInsight
from robotoff.off import generate_image_path, generate_image_url
from robotoff.types import JSONType, ProductIdentifier
from robotoff.utils import get_image_from_url, get_logger, http_session
Expand Down Expand Up @@ -167,3 +167,70 @@ def generate_image_fingerprint(image: Image.Image) -> int:
# convert the 64-bit array to a 64 bits integer
fingerprint = int_array.dot(2 ** np.arange(int_array.size)[::-1])
return fingerprint


def delete_images(product_id: ProductIdentifier, image_ids: list[str]):
"""Delete images and related items in DB.
This function must be called when Robotoff gets notified of an image
deletion. It proceeds as follow:
- mark the image as `deleted` in the `image` table
- delete all predictions associated with the image (`prediction` table)
- delete all non-annotated insights associated with the image
(`product_insight` table). Annotated insights are kept for reference.
:param product_id: identifier of the product
:param image_ids: a list of image IDs to delete.
Each image ID must be a digit.
"""
server_type = product_id.server_type.name
# Perform batching as we don't know the number of images to delete
updated_models = []
source_images = []
for image_id in image_ids:
source_image = generate_image_path(product_id, image_id)
image_model = ImageModel.get_or_none(
source_image=source_image, server_type=server_type
)

if image_model is None:
logger.info(
"image to delete %s for product %s not found in DB, skipping",
image_id,
product_id,
)
continue

# set the `deleted` flag to True: image models are always kept in DB
image_model.deleted = True
updated_models.append(image_model)
source_images.append(source_image)

updated_image_models: int = ImageModel.bulk_update(
updated_models, fields=["deleted"]
)
deleted_predictions: int = (
Prediction.delete()
.where(
Prediction.source_image.in_(source_images),
Prediction.server_type == server_type,
)
.execute()
)
deleted_insights: int = (
ProductInsight.delete()
.where(
ProductInsight.source_image.in_(source_images),
ProductInsight.server_type == server_type,
ProductInsight.annotation.is_null(),
)
.execute()
)

logger.info(
"deleted %s image in DB, %s deleted predictions, %s deleted insights",
updated_image_models,
deleted_predictions,
deleted_insights,
)
13 changes: 12 additions & 1 deletion robotoff/workers/tasks/product_updated.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests

from robotoff.images import delete_images
from robotoff.insights.extraction import get_predictions_from_product_name
from robotoff.insights.importer import import_insights, refresh_insights
from robotoff.models import with_db
Expand All @@ -14,14 +15,18 @@


@with_db
def update_insights_job(product_id: ProductIdentifier):
def update_insights_job(product_id: ProductIdentifier, diffs: JSONType) -> None:
"""This job is triggered by the webhook API, when product information has
been updated.
When a product is updated, Robotoff will:
1. Generate new predictions related to the product's category and name.
2. Regenerate all insights from the product associated predictions.
:param product_id: identifier of the product
:param diffs: a dict containing a diff of the update, the format is
defined by Product Opener
"""
logger.info("Running `update_insights` for %s", product_id)

Expand All @@ -36,6 +41,12 @@ def update_insights_job(product_id: ProductIdentifier):
# reprocessing with another task arriving concurrently.
# The expire is there only in case the lock is not released
# (process killed)
deleted_images = diffs.get("uploaded_images", {}).get("delete")
if deleted_images:
# deleted_images is a list of image IDs that have been deleted
logger.info("images deleted: %s, launching DB update", deleted_images)
delete_images(product_id, deleted_images)

product_dict = get_product(product_id)

if product_dict is None:
Expand Down
87 changes: 87 additions & 0 deletions tests/integration/test_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import pytest

from robotoff.images import delete_images
from robotoff.models import ImageModel, Prediction, ProductInsight
from robotoff.off import generate_image_path
from robotoff.types import ProductIdentifier, ServerType

from .models_utils import (
ImageModelFactory,
PredictionFactory,
ProductInsightFactory,
clean_db,
)

DEFAULT_SERVER_TYPE = ServerType.off


@pytest.fixture(autouse=True)
def _set_up_and_tear_down(peewee_db):
with peewee_db:
clean_db()
# Run the test case.
yield

with peewee_db:
clean_db()


def test_delete_images(peewee_db, mocker):
with peewee_db:
barcode = "1"
image_id = "1"
product_id = ProductIdentifier(barcode, DEFAULT_SERVER_TYPE)
source_image = generate_image_path(product_id, image_id)
# to be deleted
image_1 = ImageModelFactory(barcode=barcode, image_id=image_id)
# to be kept (different image ID)
image_2 = ImageModelFactory(barcode=barcode, image_id="2")
# to be kept (different barcode)
image_3 = ImageModelFactory(barcode="2", image_id=image_id)
# to be kept (same barcode/image ID, but different server type)
image_4 = ImageModelFactory(
barcode=barcode, image_id=image_id, server_type="obf"
)
# to be deleted
prediction_1 = PredictionFactory(barcode=barcode, source_image=source_image)
# to be kept (different barcode)
prediction_2 = PredictionFactory(barcode="2")
# to be kept (different server type)
prediction_3 = PredictionFactory(
barcode=barcode, server_type="obf", source_image=source_image
)
# to be deleted
insight_1 = ProductInsightFactory(
barcode=barcode, source_image=source_image, annotation=None
)
# to be kept
insight_2 = ProductInsightFactory(
barcode=barcode, source_image=source_image, annotation=1
)
# to be kept
insight_3 = ProductInsightFactory(barcode="2", annotation=None)
# to be kept (different server type)
insight_4 = ProductInsightFactory(
barcode="1", server_type="obf", source_image=source_image, annotation=None
)
delete_images(product_id, [image_id])

assert ImageModel.get_or_none(id=image_1.id).deleted
assert not ImageModel.get_or_none(id=image_2.id).deleted
assert not ImageModel.get_or_none(id=image_3.id).deleted
assert not ImageModel.get_or_none(id=image_4.id).deleted

# the prediction associated with the image should be deleted
assert Prediction.get_or_none(id=prediction_1.id) is None
# but not the unrelated ones
assert Prediction.get_or_none(id=prediction_2.id) is not None
assert Prediction.get_or_none(id=prediction_3.id) is not None

# the unannotated product insight associated with the image should be
# deleted
assert ProductInsight.get_or_none(id=insight_1.id) is None
# but not the annotated one
assert ProductInsight.get_or_none(id=insight_2.id) is not None
# nor the ones unrelated to the deleted image
assert ProductInsight.get_or_none(id=insight_3.id) is not None
assert ProductInsight.get_or_none(id=insight_4.id) is not None

0 comments on commit 997e989

Please sign in to comment.