diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm index e20274a5eaa71..698dbeb6a03c3 100644 --- a/lib/ProductOpener/Ingredients.pm +++ b/lib/ProductOpener/Ingredients.pm @@ -852,17 +852,120 @@ my %min_regexp = ( # Words that can be ignored after a percent # e.g. 50% du poids total, 30% of the total weight +# groups need to be non-capturing: prefixed with (?: my %ignore_strings_after_percent = ( - en => "of (the )?(?:total weight|grain is wholegrain rye)", - es => "(en el chocolate( con leche)?)", + en => "of (?:the )?(?:total weight|grain is wholegrain rye)", + es => "(?:en el chocolate(?: con leche)?)", fi => "jauhojen määrästä", - fr => "(dans le chocolat( (blanc|noir|au lait))?)|(du poids total|du poids)", + fr => "(?:dans le chocolat(?: (?:blanc|noir|au lait))?)|(?:du poids total|du poids)", sv => "fetthalt", ); +=head2 parse_specific_ingredients_text ( product_ref, $text ) + +Lists of ingredients sometime include extra mentions for specific ingredients +at the end of the ingredients list. e.g. "Prepared with 50g of fruits for 100g of finished product". + +This function extracts those mentions and adds them to a special specific_ingredients structure. + +=head3 Return values + +=head4 specific_ingredients structure + +Hash of specific ingredients. + +=head4 + +=cut + +sub parse_specific_ingredients_text($$$) { + + my $product_ref = shift; + my $text = shift; + my $percent_regexp = shift; + + my $product_lc = $product_ref->{lc}; + + $product_ref->{specific_ingredients} = {}; + + # Go through the ingredient lists multiple times + # as long as we have one match + my $ingredient = "start"; + + while ($ingredient) { + + # Initialize values + $ingredient = undef; + my $matched_text; + my $percent; + + # Note: in regular expressions below, use non-capturing groups (starting with (?: ) + # for all groups, except groups that capture actual data: ingredient name, percent, origins + + # Regexps should match until we reach a . ; or the end of the text + + if ($product_lc eq "en") { + # examples: + # Total Milk Content 73%. + + if ($text =~ /\s*(?:total |min |minimum )?([^,.;-]+?)\s+content(?::| )+$percent_regexp\s*(?:per 100\s*(?:g)(?:[^,.;-]*?))?(?:;|\.| - |$)/i) { + $percent = $2; # $percent_regexp + $ingredient = $1; + $matched_text = $&; + # Remove the matched text + $text = $` . ' ' . $'; + } + + } + elsif ($product_lc eq "fr") { + + # examples: + # Teneur en lait 25% minimum. + # Teneur en lactose < 0,01 g/100 g. + # Préparée avec 50 g de fruits pour 100 g de produit fini. + # Teneur totale en sucres : 60 g pour 100 g de produit fini. + + if ($text =~ /\s*(?:(?:préparé|prepare)(?:e|s|es)? avec)(?: au moins)?(?::| )+$percent_regexp (?:de |d')?([^,.;-]+?)\s*(?:pour 100\s*(?:g)(?:[^,.;-]*?))?(?:;|\.| - |$)/i) { + $percent = $1; # $percent_regexp + $ingredient = $2; + $matched_text = $&; + # Remove the matched text + $text = $` . ' ' . $'; + } + elsif ($text =~ /\s*teneur(?: min| minimum| minimale| totale)?(?: en | de | d'| du )([^,.;-]+?)\s*(?:pour 100\s*(?:g)(?: de produit(?: fini)?)?)?(?::| )+$percent_regexp\s*(?:pour 100\s*(?:g|gr|grammes)(?:[^,.;-]*?))?(?:;|\.| - |$)/i) { + $percent = $2; # $percent_regexp + $ingredient = $1; + $matched_text = $&; + # Remove the matched text + $text = $` . ' ' . $'; + } + } + + # If we found an ingredient, save it in specific_ingredients + if (defined $ingredient) { + my $ingredient_id = canonicalize_taxonomy_tag($product_lc, "ingredients", $ingredient); + + # We might have an ingredient specified multiple times (e.g. once for percent, another for origins or labels) + defined $product_ref->{specific_ingredients}{$ingredient_id} or $product_ref->{specific_ingredients}{$ingredient_id} = {}; + $product_ref->{specific_ingredients}{$ingredient_id}{ingredient} = $ingredient; + $product_ref->{specific_ingredients}{$ingredient_id}{text} = $matched_text; + + defined $percent and $product_ref->{specific_ingredients}{$ingredient_id}{percent} = $percent; + } + } + + # Delete specific ingredients if empty + if (scalar keys %{$product_ref->{specific_ingredients}} == 0) { + delete $product_ref->{specific_ingredients}; + } + + return $text; +} + + =head2 parse_ingredients_text ( product_ref ) Parse the ingredients_text field to extract individual ingredients. @@ -918,14 +1021,6 @@ sub parse_ingredients_text($) { my $level = 0; - # Farine de blé 56 g* ; beurre concentré 25 g* (soit 30 g* en beurre reconstitué); sucre 22 g* ; œufs frais 2 g - # 56 g -> 56% - $text =~ s/(\d| )g(\*)/$1g/ig; - - # transform 0,2% into 0.2% - $text =~ s/(\d),(\d+)( )?(\%|g\b)/$1.$2\%/ig; - $text =~ s/—/-/g; - # assume commas between numbers are part of the name # e.g. en:2-Bromo-2-Nitropropane-1,3-Diol, Bronopol # replace by a lower comma ‚ @@ -943,7 +1038,10 @@ sub parse_ingredients_text($) { $ignore_strings_after_percent = $ignore_strings_after_percent{$product_lc}; } - my $percent_regexp = '(<|' . $min_regexp . '|\s|\.|:)*(\d+((\,|\.)\d+)?)\s*(\%|g)\s*(' . $min_regexp . '|' . $ignore_strings_after_percent . '|\s|\)|\]|\}|\*)*'; + my $percent_regexp = '(?:<|' . $min_regexp . '|\s|\.|:)*(\d+(?:(?:\,|\.)\d+)?)\s*(?:\%|g)\s*(?:' . $min_regexp . '|' . $ignore_strings_after_percent . '|\s|\)|\]|\}|\*)*'; + + # Extract phrases related to specific ingredients at the end of the ingredients list + $text = parse_specific_ingredients_text($product_ref, $text, $percent_regexp); my $analyze_ingredients_function = sub($$$$) { @@ -1022,7 +1120,7 @@ sub parse_ingredients_text($) { if (($between =~ $separators) and ($` =~ /^$percent_regexp$/i)) { - $percent = $2; + $percent = $1; # remove what is before the first separator $between =~ s/(.*?)$separators//; $debug_ingredients and $log->debug("separator found after percent", { between => $between, percent => $percent }) if $log->is_debug(); @@ -1048,7 +1146,7 @@ sub parse_ingredients_text($) { if ($between =~ /^$percent_regexp$/i) { - $percent = $2; + $percent = $1; $debug_ingredients and $log->debug("between is a percent", { between => $between, percent => $percent }) if $log->is_debug(); $between = ''; } @@ -1146,7 +1244,7 @@ sub parse_ingredients_text($) { } if ($after =~ /^$percent_regexp($separators|$)/i) { - $percent = $2; + $percent = $1; $after = $'; $debug_ingredients and $log->debug("after started with a percent", { after => $after, percent => $percent }) if $log->is_debug(); } @@ -1243,7 +1341,7 @@ sub parse_ingredients_text($) { # Strawberry 10.3% if ($ingredient =~ /\s$percent_regexp$/i) { - $percent = $2; + $percent = $1; $debug_ingredients and $log->debug("percent found after", { ingredient => $ingredient, percent => $percent, new_ingredient => $`}) if $log->is_debug(); $ingredient = $`; } @@ -4298,6 +4396,20 @@ sub preparse_ingredients_text($$) { # turn & to and $text =~ s/ \& /$and/g; + # number + gr / grams -> g + $text =~ s/(\d\s*)(gr|gram|grams)\b/$1g/ig; + if ($product_lc eq 'fr') { + $text =~ s/(\d\s*)(gramme|grammes)\b/$1g/ig; + } + + # Farine de blé 56 g* ; beurre concentré 25 g* (soit 30 g* en beurre reconstitué); sucre 22 g* ; œufs frais 2 g + # 56 g -> 56% + $text =~ s/(\d| )g(\*)/$1g/ig; + + # transform 0,2% into 0.2% + $text =~ s/(\d),(\d+)( )?(\%|g\b)/$1.$2\%/ig; + $text =~ s/—/-/g; + # abbreviations, replace language specific abbreviations first foreach my $abbreviations_lc ($product_lc, "all") { if (defined $abbreviations{$abbreviations_lc}) { diff --git a/t/expected_test_results/ingredients/en-specific-ingredients.json b/t/expected_test_results/ingredients/en-specific-ingredients.json new file mode 100644 index 0000000000000..4e3bb994eb566 --- /dev/null +++ b/t/expected_test_results/ingredients/en-specific-ingredients.json @@ -0,0 +1,87 @@ +{ + "ingredients" : [ + { + "id" : "en:milk", + "percent_estimate" : 66.6666666666667, + "percent_max" : 100, + "percent_min" : 33.3333333333333, + "text" : "Milk", + "vegan" : "no", + "vegetarian" : "yes" + }, + { + "id" : "en:cream", + "percent_estimate" : 16.6666666666667, + "percent_max" : 50, + "percent_min" : 0, + "text" : "cream", + "vegan" : "no", + "vegetarian" : "yes" + }, + { + "id" : "en:sugar", + "percent_estimate" : 16.6666666666667, + "percent_max" : 33.3333333333333, + "percent_min" : 0, + "text" : "sugar", + "vegan" : "yes", + "vegetarian" : "yes" + } + ], + "ingredients_analysis_tags" : [ + "en:palm-oil-free", + "en:non-vegan", + "en:vegetarian" + ], + "ingredients_hierarchy" : [ + "en:milk", + "en:dairy", + "en:cream", + "en:sugar", + "en:added-sugar", + "en:disaccharide" + ], + "ingredients_n" : 3, + "ingredients_n_tags" : [ + "3", + "1-10" + ], + "ingredients_original_tags" : [ + "en:milk", + "en:cream", + "en:sugar" + ], + "ingredients_percent_analysis" : 1, + "ingredients_tags" : [ + "en:milk", + "en:dairy", + "en:cream", + "en:sugar", + "en:added-sugar", + "en:disaccharide" + ], + "ingredients_text" : "Milk, cream, sugar. Sugar content: 3 %. Total milk content: 75.2g", + "ingredients_with_specified_percent_n" : 0, + "ingredients_with_specified_percent_sum" : 0, + "ingredients_with_unspecified_percent_n" : 3, + "ingredients_with_unspecified_percent_sum" : 100, + "known_ingredients_n" : 6, + "lc" : "en", + "nutriments" : { + "fruits-vegetables-nuts-estimate-from-ingredients_100g" : 0, + "fruits-vegetables-nuts-estimate-from-ingredients_serving" : 0 + }, + "specific_ingredients" : { + "en:milk" : { + "ingredient" : "milk", + "percent" : "75.2", + "text" : " Total milk content: 75.2g" + }, + "en:sugar" : { + "ingredient" : "Sugar", + "percent" : "3", + "text" : " Sugar content: 3 %." + } + }, + "unknown_ingredients_n" : 0 +} diff --git a/t/expected_test_results/ingredients/fr-specific-ingredients.json b/t/expected_test_results/ingredients/fr-specific-ingredients.json new file mode 100644 index 0000000000000..6b5331ba388a1 --- /dev/null +++ b/t/expected_test_results/ingredients/fr-specific-ingredients.json @@ -0,0 +1,147 @@ +{ + "ingredients" : [ + { + "id" : "en:cane-sugar", + "labels" : "en:organic", + "percent_estimate" : 62.5, + "percent_max" : 100, + "percent_min" : 25, + "text" : "Sucre de canne", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:apricot", + "labels" : "en:organic", + "percent_estimate" : 18.75, + "percent_max" : 50, + "percent_min" : 0, + "text" : "abricots", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:concentrated-lemon-juice", + "labels" : "en:organic", + "percent_estimate" : 9.375, + "percent_max" : 33.3333333333333, + "percent_min" : 0, + "text" : "jus de citrons concentré", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:gelling-agent", + "ingredients" : [ + { + "id" : "en:fruit-pectin", + "percent_estimate" : 9.375, + "percent_max" : 25, + "percent_min" : 0, + "text" : "pectines de fruits", + "vegan" : "yes", + "vegetarian" : "yes" + } + ], + "percent_estimate" : 9.375, + "percent_max" : 25, + "percent_min" : 0, + "text" : "gélifiant" + } + ], + "ingredients_analysis_tags" : [ + "en:palm-oil-free", + "en:vegan", + "en:vegetarian" + ], + "ingredients_hierarchy" : [ + "en:cane-sugar", + "en:added-sugar", + "en:disaccharide", + "en:sugar", + "en:apricot", + "en:fruit", + "en:concentrated-lemon-juice", + "en:citrus-fruit", + "en:lemon", + "en:gelling-agent", + "en:fruit-pectin", + "en:e440a" + ], + "ingredients_n" : 5, + "ingredients_n_tags" : [ + "5", + "1-10" + ], + "ingredients_original_tags" : [ + "en:cane-sugar", + "en:apricot", + "en:concentrated-lemon-juice", + "en:gelling-agent", + "en:fruit-pectin" + ], + "ingredients_percent_analysis" : 1, + "ingredients_tags" : [ + "en:cane-sugar", + "en:added-sugar", + "en:disaccharide", + "en:sugar", + "en:apricot", + "en:fruit", + "en:concentrated-lemon-juice", + "en:citrus-fruit", + "en:lemon", + "en:gelling-agent", + "en:fruit-pectin", + "en:e440a" + ], + "ingredients_text" : "Sucre de canne*, abricots*, jus de citrons concentré*, gélifiant : pectines de fruits. *biologique.\nPréparée avec 50 grammes de fruits pour 100gr de produit fini.\nPréparé avec 32,5 % de légumes -\nPréparés avec 25,2g de tomates.\nPREPARE AVEC 30% DE TRUC INCONNU.\nTeneur totale en sucres : 60 g pour 100 g de produit fini.\nTeneur en lait: minimum 40%.\nTeneur minimum en jus de fruits 35 grammes pour 100 grammes de produit fini.\nPrésence exceptionnelle possible de noyaux ou de morceaux de noyaux.", + "ingredients_with_specified_percent_n" : 0, + "ingredients_with_specified_percent_sum" : 0, + "ingredients_with_unspecified_percent_n" : 4, + "ingredients_with_unspecified_percent_sum" : 100, + "known_ingredients_n" : 12, + "lc" : "fr", + "nutriments" : { + "fruits-vegetables-nuts-estimate-from-ingredients_100g" : 0, + "fruits-vegetables-nuts-estimate-from-ingredients_serving" : 0 + }, + "specific_ingredients" : { + "en:fruit" : { + "ingredient" : "fruits", + "percent" : "50", + "text" : " Préparée avec 50 g de fruits pour 100g de produit fini." + }, + "en:fruit-juice" : { + "ingredient" : "jus de fruits", + "percent" : "35", + "text" : " Teneur minimum en jus de fruits 35 g pour 100 g de produit fini." + }, + "en:milk" : { + "ingredient" : "lait", + "percent" : "40", + "text" : " Teneur en lait: minimum 40%." + }, + "en:sugar" : { + "ingredient" : "sucres", + "percent" : "60", + "text" : " Teneur totale en sucres : 60 g pour 100 g de produit fini." + }, + "en:tomato" : { + "ingredient" : "tomates", + "percent" : "25.2", + "text" : " Préparés avec 25.2% de tomates." + }, + "en:vegetable" : { + "ingredient" : "légumes", + "percent" : "32.5", + "text" : " Préparé avec 32.5% de légumes - " + }, + "fr:TRUC INCONNU" : { + "ingredient" : "TRUC INCONNU", + "percent" : "30", + "text" : " PREPARE AVEC 30% DE TRUC INCONNU." + } + }, + "unknown_ingredients_n" : 0 +} diff --git a/t/expected_test_results/nutriscore/fr-gaspacho.json b/t/expected_test_results/nutriscore/fr-gaspacho.json new file mode 100644 index 0000000000000..7c9db8507aed6 --- /dev/null +++ b/t/expected_test_results/nutriscore/fr-gaspacho.json @@ -0,0 +1,305 @@ +{ + "categories" : "gaspachos", + "categories_hierarchy" : [ + "en:plant-based-foods-and-beverages", + "en:plant-based-foods", + "en:fruits-and-vegetables-based-foods", + "en:meals", + "en:soups", + "en:vegetable-soups", + "en:cold-soups", + "en:gazpacho" + ], + "categories_lc" : "fr", + "categories_properties" : { + "agribalyse_food_code:en" : "25967", + "agribalyse_proxy_food_code:en" : "25903", + "ciqual_food_code:en" : "25967" + }, + "categories_properties_tags" : [ + "all-products", + "categories-known", + "agribalyse-food-code-25967", + "agribalyse-food-code-known", + "agribalyse-proxy-food-code-25903", + "agribalyse-proxy-food-code-known", + "ciqual-food-code-25967", + "ciqual-food-code-known", + "agribalyse-known", + "agribalyse-25967" + ], + "categories_tags" : [ + "en:plant-based-foods-and-beverages", + "en:plant-based-foods", + "en:fruits-and-vegetables-based-foods", + "en:meals", + "en:soups", + "en:vegetable-soups", + "en:cold-soups", + "en:gazpacho" + ], + "food_groups" : "en:soups", + "food_groups_tags" : [ + "en:fruits-and-vegetables", + "en:soups" + ], + "ingredients" : [ + { + "id" : "en:tomato", + "percent_estimate" : 51.7954545454545, + "percent_max" : 94.5, + "percent_min" : 9.09090909090909, + "text" : "Tomate", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:cucumber", + "percent_estimate" : 24.45, + "percent_max" : 47.8, + "percent_min" : 1.1, + "text" : "concombre", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:bell-pepper", + "percent_estimate" : 12.4272727272727, + "percent_max" : 32.2333333333333, + "percent_min" : 1.1, + "text" : "poivron", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:onion", + "percent_estimate" : 6.21363636363636, + "percent_max" : 24.45, + "percent_min" : 1.1, + "text" : "oignon", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:water", + "percent_estimate" : 3.10681818181818, + "percent_max" : 19.78, + "percent_min" : 1.1, + "text" : "eau", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "from_palm_oil" : "no", + "id" : "en:extra-virgin-olive-oil", + "percent" : 1.1, + "percent_estimate" : 1.1, + "percent_max" : 1.1, + "percent_min" : 1.1, + "text" : "huile d'olive vierge extra", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:wine-vinegar", + "percent_estimate" : 0.453409090909091, + "percent_max" : 1.1, + "percent_min" : 0, + "text" : "vinaigre de vin", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "fr:pain de riz", + "percent_estimate" : 0.226704545454545, + "percent_max" : 1.1, + "percent_min" : 0, + "text" : "pain de riz" + }, + { + "id" : "en:salt", + "percent_estimate" : 0.113352272727269, + "percent_max" : 1.1, + "percent_min" : 0, + "text" : "sel", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:garlic", + "percent_estimate" : 0.0566761363636346, + "percent_max" : 1.1, + "percent_min" : 0, + "text" : "ail", + "vegan" : "yes", + "vegetarian" : "yes" + }, + { + "id" : "en:lemon-juice", + "percent_estimate" : 0.0566761363636346, + "percent_max" : 1.1, + "percent_min" : 0, + "text" : "jus de citron", + "vegan" : "yes", + "vegetarian" : "yes" + } + ], + "ingredients_analysis_tags" : [ + "en:palm-oil-content-unknown", + "en:vegan-status-unknown", + "en:vegetarian-status-unknown" + ], + "ingredients_hierarchy" : [ + "en:tomato", + "en:vegetable", + "en:cucumber", + "en:bell-pepper", + "en:onion", + "en:root-vegetable", + "en:water", + "en:extra-virgin-olive-oil", + "en:oil-and-fat", + "en:vegetable-oil-and-fat", + "en:vegetable-oil", + "en:olive-oil", + "en:virgin-olive-oil", + "en:wine-vinegar", + "en:vinegar", + "fr:pain de riz", + "en:salt", + "en:garlic", + "en:lemon-juice", + "en:fruit", + "en:fruit-juice" + ], + "ingredients_n" : 11, + "ingredients_n_tags" : [ + "11", + "11-20" + ], + "ingredients_original_tags" : [ + "en:tomato", + "en:cucumber", + "en:bell-pepper", + "en:onion", + "en:water", + "en:extra-virgin-olive-oil", + "en:wine-vinegar", + "fr:pain de riz", + "en:salt", + "en:garlic", + "en:lemon-juice" + ], + "ingredients_percent_analysis" : 1, + "ingredients_tags" : [ + "en:tomato", + "en:vegetable", + "en:cucumber", + "en:bell-pepper", + "en:onion", + "en:root-vegetable", + "en:water", + "en:extra-virgin-olive-oil", + "en:oil-and-fat", + "en:vegetable-oil-and-fat", + "en:vegetable-oil", + "en:olive-oil", + "en:virgin-olive-oil", + "en:wine-vinegar", + "en:vinegar", + "fr:pain-de-riz", + "en:salt", + "en:garlic", + "en:lemon-juice", + "en:fruit", + "en:fruit-juice" + ], + "ingredients_text" : "Tomate,concombre,poivron,oignon,eau,huile d'olive vierge extra (1,1%),vinaigre de vin,pain de riz,sel,ail,jus de citron,teneur en légumes: 89%", + "ingredients_with_specified_percent_n" : 1, + "ingredients_with_specified_percent_sum" : 1.1, + "ingredients_with_unspecified_percent_n" : 10, + "ingredients_with_unspecified_percent_sum" : 98.9, + "known_ingredients_n" : 20, + "lc" : "fr", + "misc_tags" : [ + "en:nutrition-fruits-vegetables-nuts-estimate-from-ingredients", + "en:nutrition-all-nutriscore-values-known", + "en:nutriscore-computed" + ], + "nutriments" : { + "energy_100g" : 148, + "fat_100g" : 10, + "fiber_100g" : 1.1, + "fruits-vegetables-nuts-estimate-from-ingredients_100g" : 13.4909090909091, + "fruits-vegetables-nuts-estimate-from-ingredients_serving" : 13.4909090909091, + "nutrition-score-fr" : 1, + "nutrition-score-fr_100g" : 1, + "proteins_100g" : 0.9, + "saturated-fat_100g" : 0.2, + "sodium_100g" : 0.2, + "sugars_100g" : 3 + }, + "nutriscore_data" : { + "energy" : 148, + "energy_points" : 0, + "energy_value" : 148, + "fiber" : 1.1, + "fiber_points" : 1, + "fiber_value" : 1.1, + "fruits_vegetables_nuts_colza_walnut_olive_oils" : 13.4909090909091, + "fruits_vegetables_nuts_colza_walnut_olive_oils_points" : 0, + "fruits_vegetables_nuts_colza_walnut_olive_oils_value" : 13.5, + "grade" : "b", + "is_beverage" : 0, + "is_cheese" : 0, + "is_fat" : 0, + "is_water" : 0, + "negative_points" : 2, + "positive_points" : 1, + "proteins" : 0.9, + "proteins_points" : 0, + "proteins_value" : 0.9, + "saturated_fat" : 0.2, + "saturated_fat_points" : 0, + "saturated_fat_ratio" : 2, + "saturated_fat_ratio_points" : 0, + "saturated_fat_ratio_value" : 2, + "saturated_fat_value" : 0.2, + "score" : 1, + "sodium" : 200, + "sodium_points" : 2, + "sodium_value" : 200, + "sugars" : 3, + "sugars_points" : 0, + "sugars_value" : 3 + }, + "nutriscore_grade" : "b", + "nutriscore_score" : 1, + "nutriscore_score_opposite" : -1, + "nutrition_grade_fr" : "b", + "nutrition_grades" : "b", + "nutrition_grades_tags" : [ + "b" + ], + "nutrition_score_beverage" : 0, + "nutrition_score_warning_fruits_vegetables_nuts_estimate_from_ingredients" : 1, + "nutrition_score_warning_fruits_vegetables_nuts_estimate_from_ingredients_value" : 13.4909090909091, + "pnns_groups_1" : "Fruits and vegetables", + "pnns_groups_1_tags" : [ + "fruits-and-vegetables", + "known" + ], + "pnns_groups_2" : "Soups", + "pnns_groups_2_tags" : [ + "soups", + "known" + ], + "specific_ingredients" : { + "fr:légumes" : { + "ingredient" : "légumes", + "percent" : "89", + "text" : "teneur en légumes: 89%" + } + }, + "unknown_ingredients_n" : 1 +} diff --git a/t/ingredients.t b/t/ingredients.t index 4a7a8e4ac9201..f940e85092fba 100755 --- a/t/ingredients.t +++ b/t/ingredients.t @@ -365,6 +365,31 @@ my @tests = ( } ], + # Specific ingredients mentions + [ + "fr-specific-ingredients", + { + lc => "fr", + ingredients_text => "Sucre de canne*, abricots*, jus de citrons concentré*, gélifiant : pectines de fruits. *biologique. +Préparée avec 50 grammes de fruits pour 100gr de produit fini. +Préparé avec 32,5 % de légumes - +Préparés avec 25,2g de tomates. +PREPARE AVEC 30% DE TRUC INCONNU. +Teneur totale en sucres : 60 g pour 100 g de produit fini. +Teneur en lait: minimum 40%. +Teneur minimum en jus de fruits 35 grammes pour 100 grammes de produit fini. +Présence exceptionnelle possible de noyaux ou de morceaux de noyaux.", + } + ], + + [ + "en-specific-ingredients", + { + lc => "en", + ingredients_text => "Milk, cream, sugar. Sugar content: 3 %. Total milk content: 75.2g", + }, + ] + ); diff --git a/t/ingredients_parsing.t b/t/ingredients_parsing.t index 04432fad0acb9..b09cd748de5f0 100755 --- a/t/ingredients_parsing.t +++ b/t/ingredients_parsing.t @@ -84,7 +84,7 @@ my @lists =( ["es","colores E (120, 124 y 125)", "colores E120, E124, E125"], ["es","Leche desnatada de vaca, enzima lactasa y vitaminas A, D, E y ácido fólico.","Leche desnatada de vaca, enzima lactasa y vitaminas, vitamina A, vitamina D, vitamina E, ácido fólico."], ["es","Leche desnatada, leche desnatada en polvo, zumo de lima, almidón de maíz, extracto de ginseng 0,19%, aromas, fermentos lácticos con Lcasei, colorante: caramelo natural, edulcorantes: sucralosa y acesulfamo K, estabilizante: goma xantana, vitaminas: D, B6, ácido fólico y B12 Origen de la feche. España. Preparación: Agitar antes de abrir.", - "Leche desnatada, leche desnatada en polvo, zumo de lima, almidón de maíz, extracto de ginseng 0,19%, aromas, fermentos lácticos con Lcasei, colorante: caramelo natural, edulcorantes: sucralosa y acesulfamo K, estabilizante: goma xantana, vitaminas, vitamina D, vitamina B6, ácido fólico, vitamina B12 Origen de la feche. España. Preparación: Agitar antes de abrir."], + "Leche desnatada, leche desnatada en polvo, zumo de lima, almidón de maíz, extracto de ginseng 0.19%, aromas, fermentos lácticos con Lcasei, colorante: caramelo natural, edulcorantes: sucralosa y acesulfamo K, estabilizante: goma xantana, vitaminas, vitamina D, vitamina B6, ácido fólico, vitamina B12 Origen de la feche. España. Preparación: Agitar antes de abrir."], ["es","edulcorantes (acesulfamo K y sucralosa) y vitaminas (riboflavina (vitamina B2) y cianocobalamina vitamina B12))", "edulcorantes (acesulfamo K y sucralosa), vitaminas (riboflavina (vitamina B2), cianocobalamina vitamina B12))"], ["es","aceites vegetales [aceite de girasol (70%) y aceite de oliva virgen (30%)] y sal", @@ -168,10 +168,10 @@ my @lists =( ["fr","p\x{e2}te de cacao* de Madagascar 75%, sucre de canne*, beurre de cacao*. * issus du commerce \x{e9}quitable et de l'agriculture biologique (100% du poids total).","pâte de cacao Commerce équitable Bio de Madagascar 75%, sucre de canne Commerce équitable Bio, beurre de cacao Commerce équitable Bio."], - ["fr","Céleri - rave 21% - Eau, légumes 33,6% (carottes, céleri - rave, poivrons rouges 5,8% - haricots - petits pois bio - haricots verts - courge - radis, pommes de terre - patates - fenouil - cerfeuil tubéreux - persil plat)","Céleri-rave 21% - Eau, légumes 33,6% (carottes, céleri-rave, poivrons rouges 5,8% - haricots - petits pois bio - haricots verts - courge - radis, pommes de terre - patates - fenouil - cerfeuil tubéreux - persil plat)"], + ["fr","Céleri - rave 21% - Eau, légumes 33,6% (carottes, céleri - rave, poivrons rouges 5,8% - haricots - petits pois bio - haricots verts - courge - radis, pommes de terre - patates - fenouil - cerfeuil tubéreux - persil plat)","Céleri-rave 21% - Eau, légumes 33.6% (carottes, céleri-rave, poivrons rouges 5.8% - haricots - petits pois bio - haricots verts - courge - radis, pommes de terre - patates - fenouil - cerfeuil tubéreux - persil plat)"], ["fr","poudres à lever : carbonates d'ammonium - carbonates de sodium - phosphates de calcium, farine, sel","poudres à lever : carbonates d'ammonium - carbonates de sodium - phosphates de calcium, farine, sel"], ["en","FD&C Red #40 Lake and silicon dioxide","FD&C Red #40 Lake and silicon dioxide"], - ["fr","Lait pasteurisé à 1,1% de Mat. Gr.","Lait pasteurisé à 1,1% de Matières Grasses"], + ["fr","Lait pasteurisé à 1,1% de Mat. Gr.","Lait pasteurisé à 1.1% de Matières Grasses"], ["fr","matière grasse végétale (palme) raffinée","matière grasse végétale de palme raffinée"], ["fr","huile d'olive vierge, origan", "huile d'olive vierge, origan"], ["fr","huile de tournesol, cacao maigre en poudre 5.2%", "huile de tournesol, cacao maigre en poudre 5.2%"], @@ -180,8 +180,8 @@ my @lists =( ["de","Wasser, Kohlensäure, Farbstoff Zuckerkulör E 150d, Süßungsmittel Aspartam* und Acesulfam-K, Säuerungsmittel Phosphorsäure und Citronensäure, Säureregulator Natriumcitrat, Aroma Koffein, Aroma. enthält eine Phenylalaninquelle", "Wasser, Kohlensäure, Farbstoff : Zuckerkulör e150d, Süßungsmittel : Aspartam* und Acesulfam-K, Säuerungsmittel : Phosphorsäure und Citronensäure, Säureregulator : Natriumcitrat, Aroma Koffein, Aroma. enthält eine Phenylalaninquelle"], ["de","Farbstoffe Betenrot, Paprikaextrakt, Kurkumin","farbstoffe : betenrot, paprikaextrakt, kurkumin"], - ["de","Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8,5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs, weiß und gelb", "Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8,5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs weiß und gelb"], - ["de","Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8,5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs (weiß und gelb)", "Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8,5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs weiß und gelb"], + ["de","Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8,5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs, weiß und gelb", "Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8.5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs weiß und gelb"], + ["de","Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8,5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs (weiß und gelb)", "Zucker, Glukosesirup, Glukose-Fruktose-Sirup, Stärke, 8.5% Süßholzsaft, brauner Zuckersirup, modifizierte Stärke, Aromen, pflanzliches Öl (Sonnenblume), Überzugsmittel: Bienenwachs weiß und gelb"], ["fr","graisse végétale bio (colza)","graisse végétale bio de colza"], ["fr","huiles végétales* (huile de tournesol*, huile de colza*). *Ingrédients issus de l'agriculture biologique","huiles végétales bio (huile de tournesol bio, huile de colza bio )."], @@ -196,7 +196,7 @@ my @lists =( ["fr","riz de Camargue (1), sel. (1): IGP : Indication Géographique Protégée.", "riz de Camargue IGP, sel."], ["fr","cacao (1), sucre (2), beurre de cacao (1). (1) : Commerce équitable. (2) Issue de l'agriculture biologique.", "cacao Commerce équitable, sucre Bio, beurre de cacao Commerce équitable."], - ["fr","Céréales 63,7% (BLE complet 50,5%*, semoule de maïs*), sucre*, sirop de BLE*, cacao maigre en poudre 3,9%*, cacao en poudre 1,7%*, sel, arôme naturel. *Ingrédients issus de l'agriculture biologique.","Céréales 63,7% (BLE complet 50,5% Bio, semoule de maïs Bio ), sucre Bio, sirop de BLE Bio, cacao maigre en poudre 3,9% Bio, cacao en poudre 1,7% Bio, sel, arôme naturel."], + ["fr","Céréales 63,7% (BLE complet 50,5%*, semoule de maïs*), sucre*, sirop de BLE*, cacao maigre en poudre 3,9%*, cacao en poudre 1,7%*, sel, arôme naturel. *Ingrédients issus de l'agriculture biologique.","Céréales 63.7% (BLE complet 50.5% Bio, semoule de maïs Bio ), sucre Bio, sirop de BLE Bio, cacao maigre en poudre 3.9% Bio, cacao en poudre 1.7% Bio, sel, arôme naturel."], ["fr","émulsifiant : mono - et diglycérides d'acides gras.","émulsifiant : mono- et diglycérides d'acides gras."], @@ -207,7 +207,7 @@ my @lists =( ["en", "vegetable oil (coconut & rapeseed)", "vegetable oil (coconut and rapeseed)"], - ["fr", "Masse de cacao°, Quinoa° (1,8%). °Produits issus de l'agriculture biologique.", "Masse de cacao Bio, Quinoa Bio (1,8%)."], + ["fr", "Masse de cacao°, Quinoa° (1,8%). °Produits issus de l'agriculture biologique.", "Masse de cacao Bio, Quinoa Bio (1.8%)."], ["de", "Emulgator (Sojalecithine, Mono - und Diglyceride von Speisefettsäuren, Sorbitantristearat)", "Emulgator (Sojalecithine, mono- und Diglyceride von Speisefettsäuren, Sorbitantristearat)"], @@ -233,7 +233,7 @@ my @lists =( ["es", "Agua, aceite de girasol*. * Ingredientes ecológicos.", "Agua, aceite de girasol Ecológico."], ["es", "Agua, aceite de girasol*, arroz* (5 %). (*) Ingredientes ecológicos.", "Agua, aceite de girasol Ecológico, arroz Ecológico (5 %)."], ["es", "Tofu* 88% (agua, habas de soja*). *cumple con el reglamento de agricultura ecológica CE 2092/91", "Tofu Ecológico 88% (agua, habas de soja Ecológico )."], - ["es", "agua, almendra* (5,5%). *= procedentes de la agricultura ecológica", "agua, almendra Ecológico (5,5%)."], + ["es", "agua, almendra* (5,5%). *= procedentes de la agricultura ecológica", "agua, almendra Ecológico (5.5%)."], # test for bug #3273 that introduced unwanted separators before natural flavor ["en", "non-gmo natural flavor", "non-gmo natural flavor"], @@ -299,7 +299,7 @@ my @lists =( # ¹ and ² symbols ["fr", "Sel, sucre², graisse de palme¹, amidons¹ (maïs¹, pomme de terre¹), oignon¹ : 8,9%, ail¹, oignon grillé¹ : 1,4%, épices¹ et aromate¹ (livèche¹ : 0,4%, curcuma¹, noix de muscade¹), carotte¹ : 0,5%. Peut contenir : céleri, céréales contenant du gluten, lait, moutarde, œuf, soja. ¹Ingrédients issus de l'Agriculture Biologique. ² Ingrédients issus du commerce équitable", -"Sel, sucre Commerce équitable, graisse de palme Bio, amidons Bio (maïs Bio, pomme de terre Bio ), oignon Bio : 8,9%, ail Bio, oignon grillé Bio : 1,4%, épices Bio et aromate Bio (livèche Bio : 0,4%, curcuma Bio, noix de muscade Bio ), carotte Bio : 0,5%. Traces éventuelles : céleri, Traces éventuelles : céréales contenant du gluten, Traces éventuelles : lait, Traces éventuelles : moutarde, Traces éventuelles : œuf, Traces éventuelles : soja."], +"Sel, sucre Commerce équitable, graisse de palme Bio, amidons Bio (maïs Bio, pomme de terre Bio ), oignon Bio : 8.9%, ail Bio, oignon grillé Bio : 1.4%, épices Bio et aromate Bio (livèche Bio : 0.4%, curcuma Bio, noix de muscade Bio ), carotte Bio : 0.5%. Traces éventuelles : céleri, Traces éventuelles : céréales contenant du gluten, Traces éventuelles : lait, Traces éventuelles : moutarde, Traces éventuelles : œuf, Traces éventuelles : soja."], # Russian е character ["ru", "е322, Куркумины e100, е-1442, (е621)", "e322, куркумины e100, e1442, (e621)"], @@ -318,6 +318,9 @@ my @lists =( ["ru", "масло (Подсолнечное)", "масло Подсолнечное"], ["ru", "Масло (подсолнечное)", "Масло подсолнечное"], ["ru", "масло растительное (подсолнечное, соевое)","масло растительное подсолнечное, масло растительное соевое"], + + # grammes -> g + ["fr", "Teneur en fruits: 50gr pour 100 grammes", "Teneur en fruits: 50g pour 100 g"] ); foreach my $test_ref (@lists) { diff --git a/t/nutriscore.t b/t/nutriscore.t index 536b8ca2790d0..98f4540051aed 100644 --- a/t/nutriscore.t +++ b/t/nutriscore.t @@ -71,6 +71,18 @@ my @tests = ( ["mushrooms", { lc=>"fr", categories=>"meals", nutriments=>{energy_100g=>667, fat_100g=>8.4, "saturated-fat_100g"=>1.2, sugars_100g=>1.1, sodium_100g=>0.4, fiber_100g=>10.9, proteins_100g=>2.4}, ingredients_text=>"Pleurotes* 69% (Origine UE), chapelure de mais"}], +# fruit content indicated at the end of the ingredients list +[ + "fr-gaspacho", + { + lc => "fr", + categories => "gaspachos", + ingredients_text => "Tomate,concombre,poivron,oignon,eau,huile d'olive vierge extra (1,1%),vinaigre de vin,pain de riz,sel,ail,jus de citron,teneur en légumes: 89%", + nutriments=>{energy_100g=>148, fat_100g=>10, "saturated-fat_100g"=>0.2, sugars_100g=>3, sodium_100g=>0.2, fiber_100g=>1.1, proteins_100g=>0.9}, + } + +], + );