Skip to content

Commit

Permalink
Add RSPO code detection (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Jul 15, 2020
1 parent 38e7acf commit 9ffebab
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 13 deletions.
34 changes: 21 additions & 13 deletions robotoff/insights/ocr/packager_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,33 +63,41 @@ def process_fsc_match(match) -> str:
lowercase=True,
processing_func=process_de_packaging_match,
),
"rspo": OCRRegex(
re.compile(r"(?<!\w)RSPO-\d{7}(?!\d)"),
field=OCRField.full_text_contiguous,
lowercase=False,
),
}


def find_packager_codes_regex(ocr_result: Union[OCRResult, str]) -> List[RawInsight]:
results: List[RawInsight] = []

for regex_code, ocr_regex in PACKAGER_CODE.items():
text = get_text(ocr_result, ocr_regex)
text = get_text(ocr_result, ocr_regex, ocr_regex.lowercase)

if not text:
continue

for match in ocr_regex.regex.finditer(text):
if ocr_regex.processing_func is not None:
if ocr_regex.processing_func is None:
value = match.group(0)
else:
value = ocr_regex.processing_func(match)
results.append(
RawInsight(
value=value,
data={
"raw": match.group(0),
"type": regex_code,
"notify": ocr_regex.notify,
},
type=InsightType.packager_code,
automatic_processing=True,
)

results.append(
RawInsight(
value=value,
data={
"raw": match.group(0),
"type": regex_code,
"notify": ocr_regex.notify,
},
type=InsightType.packager_code,
automatic_processing=True,
)
)

return results

Expand Down
19 changes: 19 additions & 0 deletions tests/insights/ocr/test_packager_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import List

import pytest

from robotoff.insights.ocr.packager_code import find_packager_codes


@pytest.mark.parametrize(
"text,values",
[
("Sustainable palm oil RSPO-5068502 ", ["RSPO-5068502"]),
("RSPO-50685022", []),
("QRSPO-2404885", []),
],
)
def test_find_packager_codes(text: str, values: List[str]):
insights = find_packager_codes(text)
detected_values = set(i.value for i in insights)
assert detected_values == set(values)

0 comments on commit 9ffebab

Please sign in to comment.