diff --git a/data/category/neural/v3/image_embeddings_model_thresholds.json.gz b/data/category/neural/v3/image_embeddings_model_thresholds.json.gz deleted file mode 100644 index 567b4959cb..0000000000 --- a/data/category/neural/v3/image_embeddings_model_thresholds.json.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a24c0f17bb63f4e1f1b047d10f18fc2228ce22c0cfe52f655f6e07334c05be49 -size 18783 diff --git a/robotoff/insights/importer.py b/robotoff/insights/importer.py index 81f8aa2002..3170802762 100644 --- a/robotoff/insights/importer.py +++ b/robotoff/insights/importer.py @@ -34,7 +34,6 @@ InsightImportResult, InsightType, JSONType, - NeuralCategoryClassifierModel, ObjectDetectionModel, PackagingElementProperty, Prediction, @@ -826,17 +825,9 @@ def generate_candidates( ] taxonomy = get_taxonomy(InsightType.category.name) - # Make sure we yield candidates with `above_threshold=True` even if - # there are candidates that are deepest in the category taxonomy yield from ( ProductInsight(**candidate.to_dict()) for candidate in select_deepest_taxonomized_candidates(candidates, taxonomy) - if candidate.data.get("above_threshold") is True - ) - yield from ( - ProductInsight(**candidate.to_dict()) - for candidate in select_deepest_taxonomized_candidates(candidates, taxonomy) - if candidate.data.get("above_threshold", False) is False ) @staticmethod @@ -869,17 +860,6 @@ def add_optional_fields(cls, insight: ProductInsight, product: Optional[Product] # campaign tag campaigns.append("agribalyse-category") - if ( - insight.predictor == "neural" - and insight.data.get("model_version") - == NeuralCategoryClassifierModel.keras_image_embeddings_3_0.value - and insight.data.get("above_threshold", False) - ): - # Add `v3-categorizer-automatic-processing` campaign to category - # insights that will be applied automatically soon - # (experimental phase) - campaigns.append("v3-categorizer-automatic-processing") - if product and not product.categories_tags: # Add a campaign to track products with no categories filled in campaigns.append("missing-category") diff --git a/robotoff/prediction/category/neural/category_classifier.py b/robotoff/prediction/category/neural/category_classifier.py index 013462be44..63d2073aeb 100644 --- a/robotoff/prediction/category/neural/category_classifier.py +++ b/robotoff/prediction/category/neural/category_classifier.py @@ -171,27 +171,12 @@ def predict( image_embeddings=image_embeddings, category_taxonomy=self.taxonomy, ) - - # Threshold for automatic detection, only available for - # `keras_image_embeddings_3_0` model. - # Currently we don't apply yet the category automatically, we only add - # a flag to add a specific annotation campaign during the insight - # import - thresholds = ( - (keras_category_classifier_3_0.get_automatic_processing_thresholds()) - if model_name.keras_image_embeddings_3_0 - else {} - ) - predictions = [] for category_id, score, neighbor_predictions in raw_predictions: if category_id not in self.taxonomy: # If the category no longer exist in the taxonomy, ignore it continue - # If the category is not in `thresholds` or if the score is - # below the threshold, set the above_threshold flag to False - above_threshold = score >= thresholds.get(category_id, 1.1) kwargs: dict[str, Any] = ( {} if neighbor_predictions is None @@ -203,13 +188,6 @@ def predict( score, model_name.value, product_id=product_id, - above_threshold=above_threshold, - # We need to set a higher priority (=lower digit) if - # above_threshold is True, as otherwise a deepest - # predicted category with `above_threshold=False` will - # take precedence, and we wouldn't generate any insight - # for the prediction with `above_threshold=True` - priority=0 if above_threshold else 1, **kwargs, ) ) diff --git a/robotoff/prediction/category/neural/keras_category_classifier_3_0/__init__.py b/robotoff/prediction/category/neural/keras_category_classifier_3_0/__init__.py index b60c34faed..01e74006f5 100644 --- a/robotoff/prediction/category/neural/keras_category_classifier_3_0/__init__.py +++ b/robotoff/prediction/category/neural/keras_category_classifier_3_0/__init__.py @@ -1,4 +1,3 @@ -import functools from typing import Literal, Optional import numpy as np @@ -16,13 +15,12 @@ serialize_byte_tensor, ) from robotoff.types import JSONType, NeuralCategoryClassifierModel, ProductIdentifier -from robotoff.utils import get_image_from_url, get_logger, http_session, load_json +from robotoff.utils import get_image_from_url, get_logger, http_session from .preprocessing import ( IMAGE_EMBEDDING_DIM, MAX_IMAGE_EMBEDDING, NUTRIMENT_NAMES, - V3_MODEL_DATA_DIR, generate_inputs_dict, ) @@ -225,20 +223,6 @@ def fetch_ocr_texts(product: JSONType, product_id: ProductIdentifier) -> list[st return ocr_texts -@functools.cache -def get_automatic_processing_thresholds() -> dict[str, float]: - """Return a dict mapping category ID to minimum detection threshold - required to be able to process the insight automatically. - Only available for the current default model, - `keras_image_embeddings_3_0`. - - The threshold was selected category-wise as the lowest threshold for which - we have a precision >= 0.99 on validation + test dataset for this - category. - """ - return load_json(V3_MODEL_DATA_DIR / "image_embeddings_model_thresholds.json.gz", compressed=True) # type: ignore - - # In NeighborPredictionType objects, we stores the score of parents, children # and sibling categories (relative to the predicted categories). Under each # type (siblings, children, parents), we store scores as a dict mapping the