diff --git a/Makefile b/Makefile index 5092715cef..d2d2fce357 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ livecheck: log: @echo "🥫 Reading logs (docker-compose) …" - ${DOCKER_COMPOSE} logs -f triton api scheduler worker_high_1 worker_high_2 worker_low_1 worker_low_2 + ${DOCKER_COMPOSE} logs -f api scheduler worker_high_1 worker_high_2 worker_low_1 worker_low_2 #------------# # Management # diff --git a/doc/references/api.yml b/doc/references/api.yml index ad8982a98c..9c421ba766 100644 --- a/doc/references/api.yml +++ b/doc/references/api.yml @@ -326,10 +326,12 @@ paths: is an base64-encoded string of `user:password`. Don't provide an authentication header for anonymous users. - The annotation is an integer that can take 3 values: `0`, `1`, `-1`. `0` means the insight is incorrect + The annotation is an integer that can take 4 values: `0`, `1`, `2`, `-1`. `0` means the insight is incorrect (so it won't be applied), `1` means it is correct (so it will be applied) and `-1` means the insight - won't be returned to the user (_skip_). We use the voting mecanism system to remember which insight - to skip for a user (authenticated or not). + won't be returned to the user (_skip_). `2` is used when user submit some data to the annotate endpoint + (for example in some cases of category annotation). + + We use the voting mecanism system to remember which insight to skip for a user (authenticated or not). requestBody: required: true content: diff --git a/robotoff/app/api.py b/robotoff/app/api.py index a2ba218992..1f1c395ad9 100644 --- a/robotoff/app/api.py +++ b/robotoff/app/api.py @@ -333,12 +333,27 @@ class AnnotateInsightResource: def on_post(self, req: falcon.Request, resp: falcon.Response): insight_id = req.get_param_as_uuid("insight_id", required=True) annotation = req.get_param_as_int( - "annotation", required=True, min_value=-1, max_value=1 + "annotation", required=True, min_value=-1, max_value=2 ) update = req.get_param_as_bool("update", default=True) # This field is only needed for nutritional table structure insights. - data = req.get_param_as_json("data") + data: JSONType | None = req.get_param_as_json("data") + + if annotation == 2: + if data is None: + raise falcon.HTTPBadRequest( + description="`data` must be provided when annotation == 2" + ) + if not update: + raise falcon.HTTPBadRequest( + description="`update` must be true when annotation == 2" + ) + + if data is not None and annotation != 2: + raise falcon.HTTPBadRequest( + description="`annotation` must be 2 when `data` is provided" + ) auth = parse_auth(req) if auth is not None and auth.get_username() == "null": @@ -347,6 +362,11 @@ def on_post(self, req: falcon.Request, resp: falcon.Response): trusted_annotator = auth is not None + if not trusted_annotator and annotation == 2: + raise falcon.HTTPBadRequest( + description="`data` cannot be provided when the user is not authenticated" + ) + device_id = device_id_from_request(req) logger.info( "New annotation received from %s (annotation: %s, insight: %s)", diff --git a/robotoff/app/core.py b/robotoff/app/core.py index 994bbe8e9d..0b55d040ad 100644 --- a/robotoff/app/core.py +++ b/robotoff/app/core.py @@ -373,7 +373,7 @@ def save_annotation( available for annotation again. :param insight_id: The ID of the insight - :param annotation: The annotation, either -1, 0, or 1 + :param annotation: The annotation, either -1, 0, 1 or 2 :param device_id: Unique identifier of the device, see `device_id_from_request` :param update: If True, perform the update on Product Opener if diff --git a/robotoff/insights/annotate.py b/robotoff/insights/annotate.py index c46a357918..c75a6617c4 100644 --- a/robotoff/insights/annotate.py +++ b/robotoff/insights/annotate.py @@ -50,6 +50,8 @@ class AnnotationStatus(Enum): error_invalid_image = 8 vote_saved = 9 error_failed_update = 10 + error_invalid_data = 11 + user_input_updated = 12 SAVED_ANNOTATION_RESULT = AnnotationResult( @@ -62,6 +64,11 @@ class AnnotationStatus(Enum): status=AnnotationStatus.updated.name, description="the annotation was saved and sent to OFF", ) +USER_INPUT_UPDATED_ANNOTATION_RESULT = AnnotationResult( + status_code=AnnotationStatus.user_input_updated.value, + status=AnnotationStatus.user_input_updated.name, + description="the data provided by the user was saved and sent to OFF", +) MISSING_PRODUCT_RESULT = AnnotationResult( status_code=AnnotationStatus.error_missing_product.value, status=AnnotationStatus.error_missing_product.name, @@ -109,7 +116,7 @@ def annotate( to Product Opener if `update=True`. :param insight: the insight to annotate - :param annotation: the annotation as an integer, either -1, 0 or 1 + :param annotation: the annotation as an integer, either -1, 0, 1 or 2 :param update: if True, a write query is sent to Product Opener with the update, defaults to True :param data: additional data sent by the client, defaults to None @@ -160,7 +167,7 @@ def _annotate( insight.annotation = annotation insight.completed_at = datetime.datetime.utcnow() - if annotation == 1 and update: + if annotation in (1, 2) and update: # Save insight before processing the annotation insight.save() annotation_result = cls.process_annotation( @@ -172,6 +179,7 @@ def _annotate( if annotation_result.status_code in ( AnnotationStatus.saved.value, AnnotationStatus.updated.value, + AnnotationStatus.user_input_updated.value, AnnotationStatus.error_invalid_image.value, AnnotationStatus.error_missing_product.value, AnnotationStatus.error_updated_product.value, @@ -292,10 +300,31 @@ def process_annotation( categories_tags: list[str] = product.get("categories_tags") or [] - if insight.value_tag in categories_tags: + user_input = False + if data is None: + category_tag = insight.value_tag + else: + value_tag = data.get("value_tag") + if isinstance(value_tag, str): + user_input = True + category_tag = value_tag + else: + return AnnotationResult( + status_code=AnnotationStatus.error_invalid_data.value, + status=AnnotationStatus.error_invalid_data.name, + description="`data` is invalid, expected a single `value_tag` string field with the category tag", + ) + + if category_tag in categories_tags: return ALREADY_ANNOTATED_RESULT - category_tag = insight.value_tag + if user_input: + insight.data["original_value_tag"] = insight.value_tag + insight.data["user_input"] = True + insight.value_tag = value_tag + insight.value = None + insight.save() + add_category( product_id, category_tag, @@ -303,7 +332,11 @@ def process_annotation( auth=auth, is_vote=is_vote, ) - return UPDATED_ANNOTATION_RESULT + return ( + USER_INPUT_UPDATED_ANNOTATION_RESULT + if user_input + else UPDATED_ANNOTATION_RESULT + ) class ProductWeightAnnotator(InsightAnnotator): @@ -656,6 +689,21 @@ def annotate( auth: Optional[OFFAuthentication] = None, is_vote: bool = False, ) -> AnnotationResult: + """Annotate an insight: save the annotation in DB and send the update + to Product Opener if `update=True`. + + :param insight: the insight to annotate + :param annotation: the annotation as an integer, either -1, 0, 1 or 2 + :param update: if True, a write query is sent to Product Opener with + the update, defaults to True + :param data: additional data sent by the client, defaults to None + :param auth: user authentication data, should be None if the + annotation was triggered by an anonymous vote (in which case + `is_vote=True`) or if the insight is applied automatically. + :param is_vote: True if the annotation was triggered by an anonymous + vote, defaults to False + :return: the result of the annotation process + """ return ANNOTATOR_MAPPING[insight.type].annotate( insight=insight, annotation=annotation, diff --git a/robotoff/models.py b/robotoff/models.py index 88af9bccb3..a899cdf876 100644 --- a/robotoff/models.py +++ b/robotoff/models.py @@ -102,9 +102,10 @@ class ProductInsight(BaseModel): # The annotation of the given insight. Four possible values are possible: # null = This insight has not been annotated - # -1 = Rejected + # -1 = Rejected # 0 = 'I don't know' # 1 = Validated + # 2 = value provided by user annotation = peewee.IntegerField(null=True, index=True) # Saves the value returned by Annotator.annotate diff --git a/robotoff/types.py b/robotoff/types.py index c1fcf5e872..fddf76a488 100644 --- a/robotoff/types.py +++ b/robotoff/types.py @@ -338,4 +338,4 @@ class PackagingElementProperty(enum.Enum): LogoLabelType = tuple[str, Optional[str]] -InsightAnnotation = Literal[-1, 0, 1] +InsightAnnotation = Literal[-1, 0, 1, 2] diff --git a/tests/integration/insights/test_annotate.py b/tests/integration/insights/test_annotate.py index 35d67e9838..4e0d607d7c 100644 --- a/tests/integration/insights/test_annotate.py +++ b/tests/integration/insights/test_annotate.py @@ -1,6 +1,6 @@ import pytest -from robotoff.insights.annotate import CategoryAnnotator +from robotoff.insights.annotate import AnnotationResult, CategoryAnnotator from robotoff.models import ProductInsight from ..models_utils import ProductInsightFactory, clean_db @@ -32,3 +32,63 @@ def test_annotation_fails_is_rolledback(mocker): assert insight.annotation is None # while process_annotation was called assert mocked.called + + +class TestCategoryAnnotator: + def test_process_annotation(self, mocker): + insight = ProductInsightFactory(type="category", value_tag="en:cookies") + add_category_mocked = mocker.patch("robotoff.insights.annotate.add_category") + get_product_mocked = mocker.patch( + "robotoff.insights.annotate.get_product", return_value={} + ) + result = CategoryAnnotator.process_annotation(insight, is_vote=False) + add_category_mocked.assert_called_once() + get_product_mocked.assert_called_once() + assert result == AnnotationResult( + status_code=2, + status="updated", + description="the annotation was saved and sent to OFF", + ) + + def test_process_annotation_with_user_input_data(self, mocker): + original_value_tag = "en:cookies" + insight = ProductInsightFactory( + type="category", value_tag=original_value_tag, data={} + ) + user_data = {"value_tag": "en:cookie-dough"} + add_category_mocked = mocker.patch("robotoff.insights.annotate.add_category") + get_product_mocked = mocker.patch( + "robotoff.insights.annotate.get_product", return_value={} + ) + result = CategoryAnnotator.process_annotation(insight, user_data, is_vote=False) + add_category_mocked.assert_called_once() + get_product_mocked.assert_called_once() + assert result == AnnotationResult( + status_code=12, + status="user_input_updated", + description="the data provided by the user was saved and sent to OFF", + ) + assert insight.value_tag == user_data["value_tag"] + assert insight.data == { + "user_input": True, + "original_value_tag": original_value_tag, + } + + @pytest.mark.parametrize("user_data", [{}, {"invalid_key": "v"}, {"value_tag": 1}]) + def test_process_annotation_with_invalid_user_input_data(self, user_data, mocker): + original_value_tag = "en:cookies" + insight = ProductInsightFactory( + type="category", value_tag=original_value_tag, data={} + ) + add_category_mocked = mocker.patch("robotoff.insights.annotate.add_category") + get_product_mocked = mocker.patch( + "robotoff.insights.annotate.get_product", return_value={} + ) + result = CategoryAnnotator.process_annotation(insight, user_data, is_vote=False) + assert not add_category_mocked.called + get_product_mocked.assert_called_once() + assert result == AnnotationResult( + status_code=11, + status="error_invalid_data", + description="`data` is invalid, expected a single `value_tag` string field with the category tag", + ) diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 5c491dfb53..3548a5f4df 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -666,6 +666,88 @@ def test_annotation_event(client, monkeypatch, httpserver): assert result.status_code == 200 +def test_annotate_category_with_user_input(client, mocker, peewee_db): + """We test category insight annotation with user input.""" + # mock because as we validate the insight, we will ask mongo for product + mocker.patch( + "robotoff.insights.annotate.get_product", return_value={"categories_tags": []} + ) + add_category = mocker.patch("robotoff.insights.annotate.add_category") + + with peewee_db: + insight = ProductInsightFactory(type="category", value_tag="en:seeds") + + # data must be provided when annotation == 2 + result = client.simulate_post( + "/api/v1/insights/annotate", + params={"insight_id": str(insight.id), "annotation": 2}, + ) + assert result.status_code == 400 + assert result.json == { + "description": "`data` must be provided when annotation == 2", + "title": "400 Bad Request", + } + + # update must be true when annotation == 2 + result = client.simulate_post( + "/api/v1/insights/annotate", + params={ + "insight_id": str(insight.id), + "annotation": 2, + "data": "{}", + "update": False, + }, + ) + assert result.status_code == 400 + assert result.json == { + "description": "`update` must be true when annotation == 2", + "title": "400 Bad Request", + } + + # user input during annotation is forbidden for unauthenticated users + result = client.simulate_post( + "/api/v1/insights/annotate", + params={ + "insight_id": str(insight.id), + "annotation": 2, + "data": '{"value_tag": "en:beef"}', + }, + ) + assert result.status_code == 400 + assert result.json == { + "description": "`data` cannot be provided when the user is not authenticated", + "title": "400 Bad Request", + } + + # authorized annotation with user input + auth = base64.b64encode(b"a:b").decode("ascii") + result = client.simulate_post( + "/api/v1/insights/annotate", + params={ + "insight_id": str(insight.id), + "annotation": 2, + "data": '{"value_tag": "en:beef"}', + }, + headers={"Authorization": "Basic " + auth}, + ) + assert result.status_code == 200 + assert result.json == { + "status_code": 12, + "status": "user_input_updated", + "description": "the data provided by the user was saved and sent to OFF", + } + add_category.assert_called_once() + + with peewee_db: + updated_insight = ProductInsight.get_or_none(id=insight.id) + assert updated_insight is not None + assert updated_insight.value_tag == "en:beef" + assert updated_insight.data == { + "original_value_tag": "en:seeds", + "user_input": True, + } + + def test_prediction_collection_no_result(client): result = client.simulate_get("/api/v1/predictions") assert result.status_code == 200