Skip to content

Commit

Permalink
fix: remove above-threshold-campaign
Browse files Browse the repository at this point in the history
we have enough data if we wish to analyze it
  • Loading branch information
raphael0202 committed Aug 8, 2023
1 parent 460dae6 commit 326626a
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 62 deletions.

This file was deleted.

20 changes: 0 additions & 20 deletions robotoff/insights/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
InsightImportResult,
InsightType,
JSONType,
NeuralCategoryClassifierModel,
ObjectDetectionModel,
PackagingElementProperty,
Prediction,
Expand Down Expand Up @@ -826,17 +825,9 @@ def generate_candidates(
]
taxonomy = get_taxonomy(InsightType.category.name)

# Make sure we yield candidates with `above_threshold=True` even if
# there are candidates that are deepest in the category taxonomy
yield from (
ProductInsight(**candidate.to_dict())
for candidate in select_deepest_taxonomized_candidates(candidates, taxonomy)
if candidate.data.get("above_threshold") is True
)
yield from (
ProductInsight(**candidate.to_dict())
for candidate in select_deepest_taxonomized_candidates(candidates, taxonomy)
if candidate.data.get("above_threshold", False) is False
)

@staticmethod
Expand Down Expand Up @@ -869,17 +860,6 @@ def add_optional_fields(cls, insight: ProductInsight, product: Optional[Product]
# campaign tag
campaigns.append("agribalyse-category")

if (
insight.predictor == "neural"
and insight.data.get("model_version")
== NeuralCategoryClassifierModel.keras_image_embeddings_3_0.value
and insight.data.get("above_threshold", False)
):
# Add `v3-categorizer-automatic-processing` campaign to category
# insights that will be applied automatically soon
# (experimental phase)
campaigns.append("v3-categorizer-automatic-processing")

if product and not product.categories_tags:
# Add a campaign to track products with no categories filled in
campaigns.append("missing-category")
Expand Down
22 changes: 0 additions & 22 deletions robotoff/prediction/category/neural/category_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,27 +171,12 @@ def predict(
image_embeddings=image_embeddings,
category_taxonomy=self.taxonomy,
)

# Threshold for automatic detection, only available for
# `keras_image_embeddings_3_0` model.
# Currently we don't apply yet the category automatically, we only add
# a flag to add a specific annotation campaign during the insight
# import
thresholds = (
(keras_category_classifier_3_0.get_automatic_processing_thresholds())
if model_name.keras_image_embeddings_3_0
else {}
)

predictions = []

for category_id, score, neighbor_predictions in raw_predictions:
if category_id not in self.taxonomy:
# If the category no longer exist in the taxonomy, ignore it
continue
# If the category is not in `thresholds` or if the score is
# below the threshold, set the above_threshold flag to False
above_threshold = score >= thresholds.get(category_id, 1.1)
kwargs: dict[str, Any] = (
{}
if neighbor_predictions is None
Expand All @@ -203,13 +188,6 @@ def predict(
score,
model_name.value,
product_id=product_id,
above_threshold=above_threshold,
# We need to set a higher priority (=lower digit) if
# above_threshold is True, as otherwise a deepest
# predicted category with `above_threshold=False` will
# take precedence, and we wouldn't generate any insight
# for the prediction with `above_threshold=True`
priority=0 if above_threshold else 1,
**kwargs,
)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
from typing import Literal, Optional

import numpy as np
Expand All @@ -16,13 +15,12 @@
serialize_byte_tensor,
)
from robotoff.types import JSONType, NeuralCategoryClassifierModel, ProductIdentifier
from robotoff.utils import get_image_from_url, get_logger, http_session, load_json
from robotoff.utils import get_image_from_url, get_logger, http_session

from .preprocessing import (
IMAGE_EMBEDDING_DIM,
MAX_IMAGE_EMBEDDING,
NUTRIMENT_NAMES,
V3_MODEL_DATA_DIR,
generate_inputs_dict,
)

Expand Down Expand Up @@ -225,20 +223,6 @@ def fetch_ocr_texts(product: JSONType, product_id: ProductIdentifier) -> list[st
return ocr_texts


@functools.cache
def get_automatic_processing_thresholds() -> dict[str, float]:
"""Return a dict mapping category ID to minimum detection threshold
required to be able to process the insight automatically.
Only available for the current default model,
`keras_image_embeddings_3_0`.
The threshold was selected category-wise as the lowest threshold for which
we have a precision >= 0.99 on validation + test dataset for this
category.
"""
return load_json(V3_MODEL_DATA_DIR / "image_embeddings_model_thresholds.json.gz", compressed=True) # type: ignore


# In NeighborPredictionType objects, we stores the score of parents, children
# and sibling categories (relative to the predicted categories). Under each
# type (siblings, children, parents), we store scores as a dict mapping the
Expand Down

0 comments on commit 326626a

Please sign in to comment.