diff --git a/Makefile b/Makefile index 4164c97bd8652..08af01b288a8a 100644 --- a/Makefile +++ b/Makefile @@ -381,17 +381,19 @@ check_critic: @echo "🥫 Checking with perlcritic" test -z "${TO_CHECK}" || ${DOCKER_COMPOSE} run --rm --no-deps backend perlcritic ${TO_CHECK} -TAXONOMIES_TO_CHECK := $(shell [ -x "`which git 2>/dev/null`" ] && git diff origin/main --name-only | grep 'taxonomies*/*\.txt$$' | grep -v '\.result.txt' | xargs ls -d 2>/dev/null | grep -v "^.$$") +TAXONOMIES_TO_CHECK := $(shell [ -x "`which git 2>/dev/null`" ] && git diff origin/main --name-only | grep 'taxonomies.*/.*\.txt$$' | grep -v '\.result.txt' | xargs ls -d 2>/dev/null | grep -v "^.$$") +# TODO remove --no-sort as soon as we have sorted taxonomies check_taxonomies: @echo "🥫 Checking taxonomies" test -z "${TAXONOMIES_TO_CHECK}" || \ - ${DOCKER_COMPOSE} run --rm --no-deps backend scripts/taxonomies/sort_each_taxonomy_entry.sh --check ${TAXONOMIES_TO_CHECK} + ${DOCKER_COMPOSE} run --rm --no-deps backend scripts/taxonomies/lint_taxonomy.pl --verbose --check --no-sort ${TAXONOMIES_TO_CHECK} +# TODO remove --no-sort as soon as we have sorted taxonomies lint_taxonomies: @echo "🥫 Linting taxonomies" test -z "${TAXONOMIES_TO_CHECK}" || \ - ${DOCKER_COMPOSE} run --rm --no-deps backend scripts/taxonomies/sort_each_taxonomy_entry.sh ${TAXONOMIES_TO_CHECK} + ${DOCKER_COMPOSE} run --rm --no-deps backend scripts/taxonomies/lint_taxonomy.pl --verbose --no-sort ${TAXONOMIES_TO_CHECK} check_openapi_v2: @@ -415,7 +417,11 @@ build_taxonomies: create_folders # GITHUB_TOKEN might be empty, but if it's a valid token it enables pushing taxonomies to build cache repository ${DOCKER_COMPOSE} run --no-deps --rm -e GITHUB_TOKEN=${GITHUB_TOKEN} backend /opt/product-opener/scripts/taxonomies/build_tags_taxonomy.pl ${name} -rebuild_taxonomies: build_taxonomies +# a version where we force building without using cache +# use it when you are developing in Tags.pm and want to iterate +# at the end, change the $BUILD_TAGS_VERSION in Tags.pm +rebuild_taxonomies: + ${DOCKER_COMPOSE} run --no-deps --rm -e TAXONOMY_NO_GET_FROM_CACHE=1 backend /opt/product-opener/scripts/taxonomies/build_tags_taxonomy.pl ${name} build_taxonomies_test: create_folders @echo "🥫 build taxonomies" diff --git a/lib/ProductOpener/Config_obf.pm b/lib/ProductOpener/Config_obf.pm index 4d058f7a46777..1970e3050838d 100644 --- a/lib/ProductOpener/Config_obf.pm +++ b/lib/ProductOpener/Config_obf.pm @@ -113,6 +113,8 @@ use ProductOpener::Config2; # lowercase: # - useful when the same word appears in lowercase, with a first capital letter, or in all caps. +# IMPORTANT: if you change it, you need to change $BUILD_TAGS_VERSION in Tags.pm + %string_normalization_for_lang = ( # no_language is used for strings that are not in a specific language (e.g. user names) no_language => { diff --git a/lib/ProductOpener/Config_off.pm b/lib/ProductOpener/Config_off.pm index 05a3fb2cc3863..f236f9a644a3b 100644 --- a/lib/ProductOpener/Config_off.pm +++ b/lib/ProductOpener/Config_off.pm @@ -112,6 +112,8 @@ use ProductOpener::Config2; # lowercase: # - useful when the same word appears in lowercase, with a first capital letter, or in all caps. +# IMPORTANT: if you change it, you need to change $BUILD_TAGS_VERSION in Tags.pm + %string_normalization_for_lang = ( # no_language is used for strings that are not in a specific language (e.g. user names) no_language => { diff --git a/lib/ProductOpener/Config_opf.pm b/lib/ProductOpener/Config_opf.pm index e2f76af9916e3..e344bc0332168 100644 --- a/lib/ProductOpener/Config_opf.pm +++ b/lib/ProductOpener/Config_opf.pm @@ -112,6 +112,8 @@ use ProductOpener::Config2; # lowercase: # - useful when the same word appears in lowercase, with a first capital letter, or in all caps. +# IMPORTANT: if you change it, you need to change $BUILD_TAGS_VERSION in Tags.pm + %string_normalization_for_lang = ( # no_language is used for strings that are not in a specific language (e.g. user names) no_language => { diff --git a/lib/ProductOpener/Config_opff.pm b/lib/ProductOpener/Config_opff.pm index 42a032988d50b..14782c49081ab 100644 --- a/lib/ProductOpener/Config_opff.pm +++ b/lib/ProductOpener/Config_opff.pm @@ -112,6 +112,8 @@ use ProductOpener::Config2; # lowercase: # - useful when the same word appears in lowercase, with a first capital letter, or in all caps. +# IMPORTANT: if you change it, you need to change $BUILD_TAGS_VERSION in Tags.pm + %string_normalization_for_lang = ( # no_language is used for strings that are not in a specific language (e.g. user names) no_language => { diff --git a/lib/ProductOpener/Store.pm b/lib/ProductOpener/Store.pm index fdf76c987ab0c..a0ba10a07eaf9 100644 --- a/lib/ProductOpener/Store.pm +++ b/lib/ProductOpener/Store.pm @@ -85,6 +85,9 @@ sub unac_string_perl ($s) { # 4. keep other UTF-8 characters (e.g. Chinese, Japanese, Korean, Arabic, Hebrew etc.) untouched # 5. remove leading and trailing -, turn multiple - to - +# IMPORTANT: if you change the behaviour of this method, +# you need to change $BUILD_TAGS_VERSION in Tags.pm + sub get_string_id_for_lang ($lc, $string) { defined $lc or die("Undef \$lc in call to get_string_id_for_lang (string: $string)\n"); diff --git a/lib/ProductOpener/Tags.pm b/lib/ProductOpener/Tags.pm index 14bc66a2a35a7..eadc689fa1436 100644 --- a/lib/ProductOpener/Tags.pm +++ b/lib/ProductOpener/Tags.pm @@ -53,6 +53,8 @@ BEGIN { &canonicalize_tag2 &canonicalize_tag_link + &sanitize_taxonomy_line + &has_tag &has_one_of_the_tags_from_the_list &add_tag @@ -73,6 +75,7 @@ BEGIN { %tags_texts %level %special_tags + %translations_from &get_taxonomyid @@ -179,6 +182,7 @@ use Clone qw(clone); use List::MoreUtils qw(uniq); use URI::Escape::XS; +use List::Util qw(first); use Log::Any qw($log); use Digest::SHA1; use File::Copy; @@ -261,8 +265,10 @@ To this initial list, taxonomized fields will be added by retrieve_tags_taxonomy weighers => 1, ); -# Fields that have an associated taxonomy -%taxonomy_fields = (); # populated by retrieve_tags_taxonomy +# Fields, that is property, that have an associated taxonomy +# most of the time it associate the taxonomy name with itself, +# but their might be other property refering to a taxonomy under an other name +%taxonomy_fields = (); # populated by retrieve_tags_taxonomy and init_taxonomies # Fields that can have different values by language %language_fields = ( @@ -304,7 +310,7 @@ my %just_tags = (); # does not include synonyms that are only synonyms my %synonyms = (); %synonyms_for = (); my %synonyms_for_extended = (); -my %translations_from = (); +%translations_from = (); %translations_to = (); %level = (); my %direct_parents = (); @@ -1036,6 +1042,12 @@ sub get_file_from_cache ($source, $target) { return 0; } +# Add a version string to the taxonomy data +# Change this version string if you want to force the taxonomies to be rebuilt +# e.g. if the taxonomy building algorithm or configuration has changed +# This needs to be done also when the unaccenting parameters for languages set in Config.pm are changed +my $BUILD_TAGS_VERSION = "20240329 - better taxonomy errors handling"; + sub get_from_cache ($tagtype, @files) { # If the full set of cached files can't be found then returns the hash to be used # when saving the new cached files. @@ -1044,12 +1056,8 @@ sub get_from_cache ($tagtype, @files) { my $sha1 = Digest::SHA1->new; - # Add a version string to the taxonomy data - # Change this version string if you want to force the taxonomies to be rebuilt - # e.g. if the taxonomy building algorithm or configuration has changed - # This needs to be done also when the unaccenting parameters for languages set in Config.pm are changed - - $sha1->add("20230316 - made xx: unaccented"); + # marker for code version + $sha1->add($BUILD_TAGS_VERSION); foreach my $source_file (@files) { # The source file can be prefixed by the product type @@ -1064,6 +1072,10 @@ sub get_from_cache ($tagtype, @files) { my $hash = $sha1->hexdigest; my $cache_prefix = "$tagtype.$hash"; + + # disable by env variable, useful when iterating over Tags.pm (see make rebuild_taxonomies) + return $cache_prefix if $ENV{TAXONOMY_NO_GET_FROM_CACHE}; + my $got_from_cache = get_file_from_cache("$cache_prefix.result.sto", "$tag_data_root.result.sto"); if ($got_from_cache) { $got_from_cache = get_file_from_cache("$cache_prefix.result.txt", "$tag_data_root.result.txt"); @@ -1124,11 +1136,31 @@ sub put_to_cache ($tagtype, $cache_prefix) { put_file_to_cache("$tag_www_root.full.json", "$cache_prefix.full.json"); put_file_to_cache("$tag_data_root.result.txt", "$cache_prefix.result.txt"); put_file_to_cache("$tag_data_root.result.sto", "$cache_prefix.result.sto"); + # note: we don't put errors to cache as it is a non sense, errors are to be fixed before + # and you need them only if you touch the taxonomy hence rebuild it (and thus have them locally) return; } -=head2 build_tags_taxonomy( $tagtype, $file, $publish ) +# create a struct for a taxonomy error +sub _taxonomy_error($severity, $type, $msg, $line = undef) { + return { + severity => $severity, + type => $type, + msg => $msg, + line_num => $line, + }; +} + +# error to string +sub _taxonomy_error_display($error_ref) { + my %error = %$error_ref; + my $line = ""; + $line = "$error{line_num} - " if $error{line_num}; + return "$error{severity} - $error{type} - $line$error{msg}"; +} + +=head2 build_tags_taxonomy( $tagtype, $file, $publish) Build taxonomy from the taxonomy file @@ -1275,7 +1307,7 @@ sub build_tags_taxonomy ($tagtype, $publish) { # $tagtype -> $canon_tagid -> "$property:$lc" stores the value for property $properties{$tagtype} = {}; - my $errors = ''; + my @taxonomy_errors = (); if (open(my $IN, "<:encoding(UTF-8)", $file_path)) { @@ -1403,7 +1435,7 @@ sub build_tags_taxonomy ($tagtype, $publish) { . $translations_from{$tagtype}{"$lc:$lc_tagid"} . " (" . $tagtype . ")" . " - $lc:$lc_tagid cannot be mapped to entry $canon_tagid\n"; - $errors .= "ERROR - " . $msg; + push(@taxonomy_errors, _taxonomy_error("ERROR", "duplicate_synonym", $msg, $line_number)); next; } @@ -1443,7 +1475,7 @@ sub build_tags_taxonomy ($tagtype, $publish) { . $translations_from{$tagtype}{$lc . ":" . $synonyms{$tagtype}{$lc}{$tagid}} . " ($tagtype)" . " - $lc:$tagid cannot be mapped to entry $canon_tagid / $lc:$lc_tagid\n"; - $errors .= "ERROR - " . $msg; + push(@taxonomy_errors, _taxonomy_error("ERROR", "duplicate_synonym", $msg, $line_number)); next; } # add synonym to both tracking lists @@ -1465,7 +1497,7 @@ sub build_tags_taxonomy ($tagtype, $publish) { . $nutriscore_grade . " is incorrect\n"; - $errors .= "ERROR - " . $msg; + push(@taxonomy_errors, _taxonomy_error("ERROR", "unknown_nutriscore", $msg, $line_number)); } } elsif ($line =~ /^expected_ingredients:en:/) { @@ -1476,11 +1508,12 @@ sub build_tags_taxonomy ($tagtype, $publish) { my $msg = "expected_ingredients:en: in " . $tagtype - . " should contain a single letter " + . " should contain a single value " . $expected_ingredients . " is incorrect\n"; - $errors .= "ERROR - " . $msg; + push(@taxonomy_errors, + _taxonomy_error("ERROR", "multiple_expected_ingredients", $msg, $line_number)); } } else { @@ -1491,13 +1524,18 @@ sub build_tags_taxonomy ($tagtype, $publish) { close($IN); - if ($errors ne "") { + if (scalar @taxonomy_errors) { print STDERR "Errors in the $tagtype taxonomy definition:\n"; - print STDERR $errors; + print STDERR join("", map {_taxonomy_error_display($_)} @taxonomy_errors); + # do we only have duplicate synonyms errors ? + my $only_duplicate_errors = !(first {$_->{type} ne "duplicate_synonym"} @taxonomy_errors); # Disable die for the ingredients taxonomy that is merged with additives, minerals etc. # Disable die for the packaging taxonomy as some legit material and shape might have same name - unless (($tagtype eq "ingredients") or ($tagtype eq "packaging") or ($tagtype eq "inci_functions")) { + my $taxonomy_with_duplicate_tolerated + = (($tagtype eq "ingredients") or ($tagtype eq "packaging") or ($tagtype eq "inci_functions")); + unless ($only_duplicate_errors and $taxonomy_with_duplicate_tolerated) { + store("$result_dir/$tagtype.errors.sto", {errors => \@taxonomy_errors}); die("Errors in the $tagtype taxonomy definition"); } } @@ -1736,9 +1774,12 @@ sub build_tags_taxonomy ($tagtype, $publish) { $canon_tagid = undef; + $line_number = 0; + while (<$IN>) { my $line = sanitize_taxonomy_line($_); + $line_number++; # consider parenthesis as spaces $line =~ s/\(|\)/-/g; @@ -1807,7 +1848,8 @@ sub build_tags_taxonomy ($tagtype, $publish) { foreach my $parentid (sort keys %parents) { # Make sure the parent is not equal to the child if ($parentid eq $canon_tagid) { - $errors .= "ERROR - $canon_tagid is a parent of itself\n"; + my $msg = "$canon_tagid is a parent of itself\n"; + push(@taxonomy_errors, _taxonomy_error("ERROR", "circular_parent", $msg, $line_number)); next; } defined $direct_parents{$tagtype}{$canon_tagid} or $direct_parents{$tagtype}{$canon_tagid} = {}; @@ -1866,9 +1908,11 @@ sub build_tags_taxonomy ($tagtype, $publish) { $canon_tagid = undef; + $line_number = 0; while (<$IN>) { my $line = sanitize_taxonomy_line($_); + $line_number++; # consider parenthesis as spaces $line =~ s/\(|\)/-/g; @@ -1948,7 +1992,8 @@ sub build_tags_taxonomy ($tagtype, $publish) { #print "- $parentid\n"; if ($parentid eq $tagid) { - $errors .= "ERROR - $tagid is a parent of itself\n"; + my $msg = "$tagid is a parent of itself\n"; + push(@taxonomy_errors, _taxonomy_error("ERROR", "circular_parent", $msg)); } elsif (not defined $seen{$parentid}) { defined $all_parents{$tagtype}{$tagid} or $all_parents{$tagtype}{$tagid} = []; @@ -2028,7 +2073,9 @@ sub build_tags_taxonomy ($tagtype, $publish) { my $lc = $parentid; $lc =~ s/^(\w\w):.*/$1/; if (not exists $translations_to{$tagtype}{$parentid}{$lc}) { - $errors .= "ERROR - $tagid has an undefined parent $parentid\n"; + my $msg = "$tagid has an undefined parent $parentid\n"; + push(@taxonomy_errors, _taxonomy_error("ERROR", "unknown_parent", $msg)); + } else { print $OUT "< $lc:" . $translations_to{$tagtype}{$parentid}{$lc} . "\n"; @@ -2125,13 +2172,18 @@ sub build_tags_taxonomy ($tagtype, $publish) { close $OUT; - if ($errors ne "") { + if (scalar @taxonomy_errors) { print STDERR "Errors in the $tagtype taxonomy definition:\n"; - print STDERR $errors; + print STDERR join("", map {_taxonomy_error_display($_)} @taxonomy_errors); + # do we only have duplicate synonyms errors ? + my $only_duplicate_errors = !(first {%{$_}{type} ne "duplicate_synonym"} @taxonomy_errors); # Disable die for the ingredients taxonomy that is merged with additives, minerals etc. - # Disable also for packaging taxonomy for some shapes and materials shares same names - unless (($tagtype eq "ingredients") or ($tagtype eq "packaging") or ($tagtype eq "inci_functions")) { + # Disable die for the packaging taxonomy as some legit material and shape might have same name + my $taxonomy_with_duplicate_tolerated + = (($tagtype eq "ingredients") or ($tagtype eq "packaging") or ($tagtype eq "inci_functions")); + unless ($only_duplicate_errors and $taxonomy_with_duplicate_tolerated) { + store("$result_dir/$tagtype.errors.sto", {errors => \@taxonomy_errors}); die("Errors in the $tagtype taxonomy definition"); } } @@ -2163,7 +2215,7 @@ sub build_tags_taxonomy ($tagtype, $publish) { # system("gzip $BASE_DIRS{PUBLIC_DATA}/taxonomies/$tagtype.json"); } - $log->error("taxonomy errors", {errors => $errors}) if $log->is_error(); + $log->error("taxonomy errors", {errors => \@taxonomy_errors}) if $log->is_error(); my $taxonomy_ref = { stopwords => $stopwords{$tagtype}, @@ -2183,11 +2235,12 @@ sub build_tags_taxonomy ($tagtype, $publish) { if ($publish) { store("$result_dir/$tagtype.result.sto", $taxonomy_ref); + store("$result_dir/$tagtype.errors.sto", {errors => \@taxonomy_errors}); put_to_cache($tagtype, $cache_prefix); } } - return; + return @taxonomy_errors; } =head2 build_all_taxonomies ( $pubish) @@ -2201,15 +2254,17 @@ Build all taxonomies, including the test taxonomy =cut sub build_all_taxonomies ($publish) { + my %errors = (); foreach my $taxonomy (@taxonomy_fields, "test") { # traces and data_quality_xxx are not real taxonomy per se # (but built from allergens and data_quality) if ($taxonomy ne "traces" and rindex($taxonomy, 'data_quality_', 0) != 0) { - build_tags_taxonomy($taxonomy, $publish); + my @taxonomy_errors = build_tags_taxonomy($taxonomy, $publish); + $errors{$taxonomy} = \@taxonomy_errors if @taxonomy_errors; } } - return; + return \%errors; } =head2 generate_tags_taxonomy_extract ( $tagtype, $tags_ref, $options_ref, $lcs_ref) @@ -2661,6 +2716,8 @@ sub init_taxonomies($die_if_some_taxonomies_cannot_be_loaded = 0) { } # ingredients_original uses the ingredients taxonomy $taxonomy_fields{"ingredients_original"} = "ingredients"; + # mandatory_additive_class uses additives_classes + $taxonomy_fields{"mandatory_additive_class"} = "additives_classes"; init_languages(); init_countries(); diff --git a/scripts/taxonomies/build_tags_taxonomy.pl b/scripts/taxonomies/build_tags_taxonomy.pl index 0d4701d2f0de7..1584504cdfd5d 100755 --- a/scripts/taxonomies/build_tags_taxonomy.pl +++ b/scripts/taxonomies/build_tags_taxonomy.pl @@ -32,10 +32,20 @@ print STDERR "tagtype: $tagtype\n"; if ($tagtype eq '*') { - ProductOpener::Tags::build_all_taxonomies($publish); + my $errors_ref = ProductOpener::Tags::build_all_taxonomies($publish); + foreach my $taxonomy (keys %{$errors_ref}) { + if (@{$errors_ref->{$taxonomy}}) { + print STDERR (scalar @{$errors_ref->{$taxonomy}}) . " errors while building $taxonomy taxonomy\n"; + } + } } else { - ProductOpener::Tags::build_tags_taxonomy($tagtype, $publish); + my @errors = ProductOpener::Tags::build_tags_taxonomy($tagtype, $publish); + if (@errors) { + print STDERR (scalar @errors) . " errors while building $tagtype taxonomy\n"; + } } +print STDERR "done building tags taxonomy\n"; + exit(0); diff --git a/scripts/taxonomies/lint_taxonomy.pl b/scripts/taxonomies/lint_taxonomy.pl new file mode 100755 index 0000000000000..051daad3ad1c6 --- /dev/null +++ b/scripts/taxonomies/lint_taxonomy.pl @@ -0,0 +1,508 @@ +#!/usr/bin/perl -w + +# This file is part of Product Opener. +# +# Product Opener +# Copyright (C) 2011-2024 Association Open Food Facts +# Contact: contact@openfoodfacts.org +# Address: 21 rue des Iles, 94100 Saint-Maur des Fossés, France +# +# Product Opener is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +use ProductOpener::PerlStandards; + +use Data::Compare; +use File::Basename qw/basename dirname/; +use File::Copy qw/move/; +use File::Temp; +use Getopt::Long qw/GetOptions/; +use List::Util qw/first/; + +use ProductOpener::Tags qw/%taxonomy_fields %translations_from canonicalize_taxonomy_tag sanitize_taxonomy_line/; + +# compare synonyms entries on language prefix with "xx" > "en" then alpha order +# also work for property name + language prefix +sub cmp_on_language : prototype($$) ($a, $b) { + if ((!defined $a) || (!defined $b)) { + return $a cmp $b; + } + $a = lc($a); + $b = lc($b); + my $a_prefix = undef; + my $b_prefix = undef; + # case of property name: : + if ($a =~ /^(\w+):(\w+)$/) { + $a_prefix = $1; + $a = $2; + } + if ($b =~ /^(\w+):(\w+)$/) { + $b_prefix = $1; + $b = $2; + } + if ($a_prefix && $b_prefix) { + # property name is the first item to sort on + return $a_prefix cmp $b_prefix if ($a_prefix ne $b_prefix); + } + # compare on language code + return 0 if ($a eq $b); + # en and xx takes precedence over all others + return -1 if ($a eq "xx"); + return 1 if ($b eq "xx"); + return -1 if ($a eq "en"); # because of lines above, $b ne xx + return 1 if ($b eq "en"); # because of lines above, $a ne xx + return $a cmp $b; +} + +# simple iterator on lines, ensuring last line is empty (to simplify getting last entry) +sub iter_taxonomy_lines($fd) { + my $last_line; + # iterator + return sub { + while (my $line = <$fd>) { + $last_line = $line; + return $line; + } + # be sure to end with a blank line + unless ($last_line =~ /^\s*$/) { + $last_line = "\n"; # make next call terminate + return "\n"; + } + # end of iteratorsanitize_taxonomy_line + return; + } +} + +# iter over the taxonomy entry by entry +# return a ref to a hash map with entry infos +sub iter_taxonomy_entries ($lines_iter) { + my $line_num = 0; # this is global + return sub { + # re-init at start and after returning each entry + my @parents = (); # lines defining parents + my $entry_id_line = undef; # line defining entry id, we don't want to change it's position + my %entries = (); # lines defining synonyms + my %props = (); # lines defining properties + my @original_lines = (); + # non meaningful lines above a meaningful line (entry, parent or porperty) + my @previous_lines = (); + my @errors = (); + my $entry_start_line = $line_num + 1; + while (my $line = $lines_iter->()) { + $line_num += 1; + push @original_lines, $line; # collect lines for comparison + + # blank line means we are changing entry, so let's return collected data + if ($line =~ /^\s*$/) { + my $entry = { + parents => \@parents, + entry_id_line => $entry_id_line, + entries => \%entries, + props => \%props, + original_lines => \@original_lines, + tail_lines => \@previous_lines, + start_line => $entry_start_line, + end_line => $line_num, + errors => \@errors, + }; + add_entry_id($entry, \@errors); + # return $entry + return $entry; + } + # parents + elsif ($line =~ /^ "Error", + type => "Correctness", + line => $line_num, + message => ( + "Parent in the middle of an entry, might mean erroneous merge of two entries:\n" + . "- $line" + ) + } + ); + } + push @parents, {line => $line, previous => [@previous_lines], line_num => $line_num}; + @previous_lines = (); + } + # synonym + elsif ($line =~ /^(\w+):[^:]*(,.*)*$/) { + if (!defined $entry_id_line) { + $entry_id_line = {line => $line, previous => [@previous_lines], lc => $1,, line_num => $line_num}; + } + else { + my $lc = $1; + if ((defined $entries{$lc}) || ($entry_id_line->{lc} eq $lc)) { + my $previous_lc_line; + if (defined $entries{$lc}) { + $previous_lc_line = $entries{$lc}{line}; + } + else { + $previous_lc_line = $entry_id_line->{line}; + } + + push @errors, + { + severity => "Error", + type => "Correctness", + line => $line_num, + message => ("duplicate language line for $lc:\n" . "- $previous_lc_line" . "- $line") + }; + } + # but try to do our best and continue + if (defined $entries{$lc}) { + $entries{$lc}{line} = $entries{$lc}{line} . $line; + push @{$entries{$lc}{previous}}, @previous_lines; + } + else { + $entries{$lc} = {line => $line, previous => [@previous_lines],, line_num => $line_num}; + } + } + @previous_lines = (); + } + # property + elsif ($line =~ /^(\w+):(\w+):(.*)$/) { + my $prop = $1; + my $lc = $2; + if (defined $props{"$prop:$lc"}) { + push( + @errors, + { + severity => "Error", + type => "Correctness", + line => $line_num, + message => ( + "duplicate property language line for $prop:$lc:\n" . "- " + . $props{"$prop:$lc"}->{line} + . "- $line" + ) + } + ); + } + # override to continue + $props{"$prop:$lc"} = {line => $line, previous => [@previous_lines], line_num => $line_num}; + @previous_lines = (); + } + # comments or undefined + else { + push @previous_lines, $line; + } + } + # end of iterator + return; + } +} + +# make entry properties that reference a taxonomy use the canonical id +sub canonicalize_entry_properties($entry_ref, $is_check) { + return unless (defined $entry_ref->{entry_id_line}); # not a regular entry + my @errors = (); + my %props = %{$entry_ref->{props}}; + for my $prop_name (keys %props) { + # If the property name matches the name of an already loaded taxonomy, + # canonicalize the property values for the corresponding synonym + # e.g. if an additive has a class additives_classes:en: en:stabilizer (a synonym), + # we can map it to en:stabiliser (the canonical name in the additives_classes taxonomy) + my ($property, $lc) = split(/:/, $prop_name); + my $prop_tagtype = $taxonomy_fields{$property}; + if ((defined $prop_tagtype) && (exists $translations_from{$prop_tagtype})) { + my $prop_value = substr($props{$prop_name}{line}, length($prop_name) + 1); + my $value = $prop_value; + $value =~ s/^\s*//; + $value = sanitize_taxonomy_line($value); + my @values = split(/\s*,\s*/, $value); + # check values exists in taxonomy and canonicalize + my @canon_values = (); + my @not_found = (); + my %different = (); # better track it to display only differing values + foreach my $v (@values) { + my $exists; + my $canon_value = canonicalize_taxonomy_tag($lc, $prop_tagtype, $v, \$exists); + push(@canon_values, $canon_value); + push(@not_found, $canon_value) unless $exists; + $different{$v} = $canon_value if $canon_value ne $v; + } + if (@not_found) { + my $not_found = join(",", @not_found); + push( + @errors, + { + severity => "Warning", + type => "Consistency", + entry_start_line => $entry_ref->{start_line}, + entry_id_line => $entry_ref->{entry_id_line}, + message => ( + "Values $not_found do not exists in taxonomy, at $props{$prop_name}{line_num}\n" + . "- $props{$prop_name}{line}" + ), + } + ); + } + if (%different) { + if ($is_check) { + # values changed this is an error + push( + @errors, + { + severity => "Error", + type => "Linting", + entry_start_line => $entry_ref->{start_line}, + entry_id_line => $entry_ref->{entry_id_line}{line}, + message => ( + "Property $prop_name is not canonical, at $props{$prop_name}{line_num}\n" . "- " + . join(", ", keys %different) . "\n" . "- " + . join(", ", values %different) . "\n" + ), + } + ); + } + else { + # replace value to lint + $props{$prop_name}{line} = "$prop_name: " . join(", ", @canon_values) . "\n"; + } + } + } + } + return @errors; +} + +# add some info about entry in errors +sub add_entry_id($entry_ref, $errors_ref) { + my @errors = @$errors_ref; + foreach my $e (@errors) { + $e->{entry_start_line} = $entry_ref->{start_line}; + my $entry_id_line = $entry_ref->{entry_id_line}; + $e->{entry_id_line} = $entry_id_line->{line} if defined $entry_id_line; + } + return; +} + +# lint lines of an entry +sub lint_entry($entry_ref, $do_sort) { + my @parents = @{$entry_ref->{parents}}; + my $entry_id_line = $entry_ref->{entry_id_line}; + my %entries = %{$entry_ref->{entries}}; + my %props = %{$entry_ref->{props}}; + my @original_lines = @{$entry_ref->{original_lines}}; + my @tail_lines = @{$entry_ref->{tail_lines}}; + # eventual result + my @output_lines = (); + # sort items + my (@sorted_entries, @sorted_props); + if ($do_sort) { + @parents = sort {$a->{line} cmp $b->{line}} @parents; + @sorted_entries = sort cmp_on_language (keys %entries); + @sorted_props = sort cmp_on_language (keys %props); + } + else { + # simply sort by line number, no need to sort parents + @sorted_entries = sort {$entries{$a}{line_num} cmp $entries{$b}{line_num}} (keys %entries); + @sorted_props = sort {$props{$a}{line_num} cmp $props{$b}{line_num}} (keys %props); + } + # print parents, line id, synonyms, sorted props + for my $parent (@parents) { + push @output_lines, @{$parent->{previous}}; + push @output_lines, $parent->{line}; + } + if (defined $entry_id_line) { + push @output_lines, @{$entry_id_line->{previous}}; + push @output_lines, $entry_id_line->{line}; + } + for my $key (@sorted_entries) { + push @output_lines, @{$entries{$key}->{previous}}; + push @output_lines, $entries{$key}->{line}; + } + for my $key (@sorted_props) { + push @output_lines, @{$props{$key}->{previous}}; + push @output_lines, $props{$key}->{line}; + } + push @output_lines, @tail_lines; + # print a blank line + push @output_lines, "\n"; + return join("", @output_lines); +} + +# check that an entry is already sorted, compared to $sorted_output +sub check_linted($entry_ref, $linted_output) { + # compare with original lines + my $original = join("", @{$entry_ref->{original_lines}}); + my $entry_start_line = $entry_ref->{start_line}; + my $entry_end_line = $entry_ref->{end_line}; + # do not account for eventual added line at the end + my $trimed_original = $original; + $trimed_original =~ s/\n+$//; + my $trimed_linted = $linted_output; + $trimed_linted =~ s/\n+$//; + if ($trimed_original ne $trimed_linted) { + return { + severity => "Error", + type => "Linting", + entry_start_line => $entry_start_line, + entry_id_line => $entry_ref->{entry_id_line}, + message => ( + "output is not the same as original, line $entry_start_line..$entry_end_line\n" + . "Original --------------------\n" + . "$original\n" + . "Linted --------------------\n" + . "$linted_output\n" + ), + }; + } + return; +} + +# lint or check the taxonomy +sub lint_taxonomy($entries_iterator, $out, $is_check, $is_quiet, $do_sort) { + my @errors = (); + while (my $entry_ref = $entries_iterator->()) { + my @entry_errors = @{$entry_ref->{errors}}; + my @canon_errors = canonicalize_entry_properties($entry_ref, $is_check); + push(@entry_errors, @canon_errors) if @canon_errors; + # we will try to lint only if we don't have errors so far + my $linted_output; + if (!@entry_errors) { + $linted_output = lint_entry($entry_ref, $do_sort); + } + else { + # keep original lines + $linted_output = join("", @{$entry_ref->{original_lines}}); + } + if ($is_check) { + # search for linting error only if there is no othe errors + if (!@entry_errors) { + my $lint_error = check_linted($entry_ref, $linted_output); + push(@entry_errors, $lint_error) if $lint_error; + } + } + else { + # immediate output + print $out $linted_output; + if (@entry_errors) { + # signal it was not linted + push( + @entry_errors, + { + severity => "Warning", + type => "Linting", + entry_start_line => $entry_ref->{start_line}, + entry_id_line => $entry_ref->{entry_id_line}{line}, + message => ( + "Entry won't be linted because it as errors, " + . "line $entry_ref->{start_line}..$entry_ref->{end_line}\n" + ), + } + ); + } + } + # register errors globally + @errors = (@errors, @entry_errors); + display_errors(\@entry_errors) unless $is_quiet; + } + return \@errors; +} + +# display errors +sub display_errors($errors_ref) { + foreach my $error (@$errors_ref) { + my $entry_id_line = $error->{entry_id_line}; + my $entry_id = ""; + if ($entry_id_line) { + $entry_id = (split(/,/, $entry_id_line))[0]; + # trim + $entry_id =~ s/(^\s+|\s+$)//g; + if ($entry_id) { + $entry_id = " on $entry_id"; + } + } + my $entry_line = $error->{entry_start_line} ? " (line $error->{entry_start_line})" : ""; + print STDERR "$error->{severity}($error->{type}):$entry_id$entry_line\n"; + print STDERR "$error->{message}\n"; + } + return; +} + +# run the program only if called directly +unless (caller) { + # main + + my $usage = < \$is_check, "verbose" => \$is_verbose, "quiet" => \$is_quiet, "no-sort" => \$no_sort) + or die("Error in command line arguments.\n\n" . $usage); + + my @in_files = @ARGV; + my $tmp_dir = File::Temp->newdir(); + my $tmp_dirname = $tmp_dir->dirname(); + + if (!@in_files) { + # we will use stdin + @in_files = ("<",); + } + + binmode(STDIN, ":encoding(UTF-8)"); + binmode(STDOUT, ":encoding(UTF-8)"); + binmode(STDERR, ":encoding(UTF-8)"); + + my $error_code = 0; + + foreach my $file (@in_files) { + my $fd; + my $out; + my $out_path; + if ($file eq "<") { + $fd = *STDIN; + $out = *STDOUT; + } + else { + open($fd, "<:encoding(UTF-8)", $file) or die("can't open $file"); + # out to tempfile, will replace only if no errors + $out_path = "$tmp_dirname/" . basename($file); + open($out, ">:encoding(UTF-8)", $out_path) or die("can't write to $out_path"); + print("Processing $file =============\n\n") if $is_verbose; + } + my $entries_iterator = iter_taxonomy_entries(iter_taxonomy_lines($fd)); + my $errors_ref = lint_taxonomy($entries_iterator, $out, $is_check, $is_quiet, !$no_sort); + close($fd); + close($out); + if ((!$is_check) and $out_path) { + # $file = (getcwd() . "/$file") unless ($file =~ /^\//); + # replace file with the linted one + move($out_path, $file) or die("unable to move $out_path to $file: $!"); + } + # do we have errors (and not only warnings) + if ((first {lc($_->{severity}) eq "error"} @$errors_ref)) { + $error_code = 1; + } + } + exit($error_code); +} +1; diff --git a/scripts/taxonomies/sort_each_taxonomy_entry.pl b/scripts/taxonomies/sort_each_taxonomy_entry.pl deleted file mode 100755 index c677e33d6820d..0000000000000 --- a/scripts/taxonomies/sort_each_taxonomy_entry.pl +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/perl -w - -# This file is part of Product Opener. -# -# Product Opener -# Copyright (C) 2011-2023 Association Open Food Facts -# Contact: contact@openfoodfacts.org -# Address: 21 rue des Iles, 94100 Saint-Maur des Fossés, France -# -# Product Opener is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -use Modern::Perl '2017'; -use utf8; - -use ProductOpener::Store qw/:all/; -use ProductOpener::Config qw/:all/; - -my $is_check = grep {$_ eq "--check"} @ARGV; -my $is_verbose = grep {$_ eq "-v"} @ARGV; -my $has_changes = 0; - -binmode(STDIN, ":encoding(UTF-8)"); -binmode(STDOUT, ":encoding(UTF-8)"); -binmode(STDERR, ":encoding(UTF-8)"); - -sub cmp_on_language ($$) { - my $a = shift; - my $b = shift; - if ((!defined $a) || (!defined $b)) { - return $a cmp $b; - } - $a = lc($a); - $b = lc($b); - my $a_prefix = undef; - my $b_prefix = undef; - if ($a =~ /^(\w+):(\w+)$/) { - $a_prefix = $1; - $a = $2; - } - if ($b =~ /^(\w+):(\w+)$/) { - $b_prefix = $1; - $b = $2; - } - if ($a_prefix && $b_prefix) { - return $a_prefix cmp $b_prefix if ($a_prefix ne $b_prefix); - } - return 0 if ($a eq $b); - # en and xx takes precedence over all others - return -1 if ($a eq "xx"); - return 1 if ($b eq "xx"); - return -1 if ($a eq "en"); # because of lines above, $b ne xx - return 1 if ($b eq "en"); # because of lines above, $a ne xx - return $a cmp $b; -} - -# read all in memory to take care of last line in a simple way -my @lines = (); - -# be sure to end with a blank line -push @lines, "\n" unless $lines[-1] =~ /^\s*$/; - -# structures for one entry -my @parents = (); # lines defining parents -my $entry_id_line = undef; # line defining entry id, we don't want to change it's position -my %entries = (); # lines defining synonyms -my %props = (); # lines defining properties -my @original_lines = (); -# non meaningful lines above a meaningful line (entry, parent or porperty) -my @previous_lines = (); -my $line_num = 0; -my $entry_start_line = 1; # tracking line number of the first line of an entry -foreach my $line (@lines) { - $line_num += 1; - push @original_lines, $line; # collect lines for comparison - - # blank line means we are changing entry, so let's print collected lines - if ($line =~ /^\s*$/) { - my @output_lines = (); - # sort items - @parents = sort {$a->{line} cmp $b->{line}} @parents; - my @sorted_entries = sort cmp_on_language (keys %entries); - my @sorted_props = sort cmp_on_language (keys %props); - # print parents, line id, synonyms, sorted props - for my $parent (@parents) { - push @output_lines, @{$parent->{previous}}; - push @output_lines, $parent->{line}; - } - if (defined $entry_id_line) { - push @output_lines, @{$entry_id_line->{previous}}; - push @output_lines, $entry_id_line->{line}; - } - for my $key (@sorted_entries) { - push @output_lines, @{$entries{$key}->{previous}}; - push @output_lines, $entries{$key}->{line}; - } - for my $key (@sorted_props) { - push @output_lines, @{$props{$key}->{previous}}; - push @output_lines, $props{$key}->{line}; - } - # print remaining previous_lines (if any) - push @output_lines, @previous_lines; - # print this blank line - push @output_lines, $line; - my $original = join("", @original_lines); - my $output = join("", @output_lines); - if ($is_check) { - # compare with original lines - if (not $original eq $output) { - $has_changes = 1; - if ($is_verbose) { - print "Error: output is not the same as original, line $entry_start_line..$line_num\n"; - print "Original --------------------\n"; - print "$original\n"; - print "Sorted --------------------\n"; - print "$output\n"; - } - } - } - else { - print "$output"; - } - # re-init - $entry_id_line = undef; - @parents = (); - %entries = (); - %props = (); - @previous_lines = (); - @original_lines = (); - $entry_start_line = $line_num; - } - # parents - elsif ($line =~ /^ $line, previous => [@previous_lines]}; - @previous_lines = (); - } - # synonym - elsif ($line =~ /^(\w+):[^:]*(,.*)*$/) { - if (!defined $entry_id_line) { - $entry_id_line = {line => $line, previous => [@previous_lines], lc => $1}; - } - else { - my $lc = $1; - if ((defined $entries{$lc}) || ($entry_id_line->{lc} eq $lc)) { - # emit a warning as this seems like a strange case - print STDERR "Warning: duplicate synonym for $lc, on entry line $line_num\n"; - print STDERR "- " . ($entries{$lc}{line} // $entry_id_line->{line}); - print STDERR "- " . $line; - } - # but try to do our best and continue - if (defined $entries{$lc}) { - $entries{$lc}{line} = $entries{$lc}{line} . $line; - push @{$entries{$lc}{previous}}, @previous_lines; - } - else { - $entries{$lc} = {line => $line, previous => [@previous_lines]}; - } - } - @previous_lines = (); - } - # property - elsif ($line =~ /^(\w+):(\w+):(.*)$/) { - my $prop = $1; - my $lc = $2; - $props{"$prop:$lc"} = {line => $line, previous => [@previous_lines]}; - @previous_lines = (); - } - # comments or undefined - else { - push @previous_lines, $line; - } -} - -exit($is_check and $has_changes); diff --git a/scripts/taxonomies/sort_each_taxonomy_entry.sh b/scripts/taxonomies/sort_each_taxonomy_entry.sh deleted file mode 100755 index 2497bd87effd4..0000000000000 --- a/scripts/taxonomies/sort_each_taxonomy_entry.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -ARGS=(); -FILES=(); - -ACTION="Linting" -# options are passed as arguments to the script -for arg in "$@" -do - if [[ "$arg" = -* ]] - then - ARGS+=( "$arg" ); - else - FILES+=( "$arg" ); - fi - if [[ "$arg" = "--check" ]] - then - IS_CHECK=1; - ACTION="Checking" - fi -done - -script=$(dirname $0 )"/sort_each_taxonomy_entry.pl" -FINAL_EXIT=0; -for taxonomy in "${FILES[@]}" -do - echo "$ACTION $taxonomy ===============" - rm -f $taxonomy.tmp - # redirect output only if we're not checking - ( \ - [[ -z "$IS_CHECK" ]] && exec >$taxonomy.tmp; \ - $script "${ARGS[@]}" <$taxonomy; \ - ) - [[ -s $taxonomy.tmp ]] && mv $taxonomy.tmp $taxonomy - EXIT=$? - if [[ $EXIT -ne 0 ]] - then - echo "=> Error in $taxonomy" - FINAL_EXIT=$EXIT; - fi -done - -exit $FINAL_EXIT; \ No newline at end of file diff --git a/taxonomies/additives.txt b/taxonomies/additives.txt index 26caf286dddd8..f75e7822787d1 100644 --- a/taxonomies/additives.txt +++ b/taxonomies/additives.txt @@ -3354,12 +3354,12 @@ wikipedia:en:https://en.wikipedia.org/wiki/Calcium_carbonate # ingredient/calcium-carbonate has 4220 products in 10 languages @2019-02-03 # mineral/calcium-carbonate has 4301 products @2019-02-03 e_number:en:170 -mandatory_additive_class:en: en:acidity-regulator, en:anti-caking-agent, en:stabilizer, en:firming-agent, en:flour-treatment-agent, en:glazing-agent, en:colour +mandatory_additive_class:en: en:acidity-regulator, en:anti-caking-agent, en:stabiliser, en:firming-agent, en:flour-treatment-agent, en:glazing-agent, en:colour efsa_evaluation_url:en:http://www.efsa.europa.eu/fr/efsajournal/doc/2318.pdf efsa_evaluation_date:en:2011/07/26 efsa_evaluation:en:Scientific Opinion on re‐evaluation of calcium carbonate (E 170) as a food additive efsa_evaluation_overexposure_risk:en: en:no -additives_classes:en: en:colour, en:stabilizer +additives_classes:en: en:colour, en:stabiliser organic_eu:en:authorized #“Eggshells, snail shells and most seashells are predominantly calcium carbonate and can be used as industrial sources of that chemical.” https://en.wikipedia.org/wiki/Calcium_carbonate#Biological_sources #Oyster shell source may not be needed to label in USA @@ -3774,7 +3774,7 @@ sv:E181, Tannin, Garvsyra xx:E181 e_number:en:181 wikidata:en:Q187607 -additives_classes:en: en:colour, en:emulsifier, en:stabilizer, en:thickener +additives_classes:en: en:colour, en:emulsifier, en:stabiliser, en:thickener vegan:en:yes vegetarian:en:yes @@ -6033,7 +6033,7 @@ wikipedia:en:https://en.wikipedia.org/wiki/Calcium_acetate # ingredient/fr:acétate-de-calcium has 63 products @2019-04-06 # additive/e263-calcium-acetate has 293 products @2019-04-06 e_number:en:263 -additives_classes:en: en:preservative, en:stabilizer +additives_classes:en: en:preservative, en:stabiliser vegan:en:yes vegetarian:en:yes description:en:CALCIUM ACETATE is a chemical compound which is a calcium salt of acetic acid. Calcium acetate is used as a food additive, as a stabilizer, buffer and sequestrant, mainly in candy products. @@ -8200,7 +8200,7 @@ sv:E331, Natriumcitrater xx:E331 e_number:en:331 wikidata:en:Q6460572 -additives_classes:en: en:emulsifier, en:sequestrant, en:stabilizer +additives_classes:en: en:emulsifier, en:sequestrant, en:stabiliser organic_eu:en:authorized vegan:en:yes vegetarian:en:yes @@ -8233,7 +8233,7 @@ sv:E331(i), Mononatriumcitrat xx:E331(i) e_number:en:331 wikidata:en:Q6460572 -additives_classes:en: en:emulsifier, en:sequestrant, en:stabilizer +additives_classes:en: en:emulsifier, en:sequestrant, en:stabiliser organic_eu:en:authorized wikidata:en:Q4179018 wikipedia:en:https://en.wikipedia.org/wiki/Monosodium_citrate @@ -8269,7 +8269,7 @@ sv:E331(ii), Dinatriumcitrat xx:E331(ii) e_number:en:331 wikidata:en:Q6460572 -additives_classes:en: en:emulsifier, en:sequestrant, en:stabilizer +additives_classes:en: en:emulsifier, en:sequestrant, en:stabiliser organic_eu:en:authorized wikidata:en:Q4177124 wikipedia:en:https://en.wikipedia.org/wiki/Disodium_citrate @@ -8326,11 +8326,11 @@ ru:E332, Калиевые цитраты sk:E332, Citrán draselný sl:E332, Kalijevi citrati sv:E332, Kaliumcitrater -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser xx:E332 e_number:en:332 wikidata:en:Q419921 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser vegan:en:yes vegetarian:en:yes @@ -8357,11 +8357,11 @@ ro:E332(i), Citrat monopotasic sk:E332(i). E332(i) food additive sl:E332(i), Monokalijev citrat sv:E332(i), Monokaliumcitrat -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser xx:E332(i) e_number:en:332 wikidata:en:Q419921 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser wikidata:en:Q63255997 # additive/e332i-monopotassium-citrate has 3 products in french @2019-04-22 @@ -8414,10 +8414,10 @@ xx:E332(ii) #zh-sg:E332(ii), 柠檬酸钾 #zh-tw:E332(ii), 檸檬酸鉀 e_number:en:332 -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser wikidata:en:Q419921 # last wikidatasynchronisation 4-sep-2020 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser wikipedia:en:https://en.wikipedia.org/wiki/Potassium_citrate # additive/e332ii-tripotassium-citrate has 23 products in 2 languages @2019-04-22 description:en:POTASSIUM CITRATE (also known as tripotassium citrate) is a potassium salt of citric acid with the molecular formula K3C6H5O7. As a food additive, potassium citrate is used to regulate acidity. It is also used in many soft drinks as a buffering agent. @@ -8449,11 +8449,11 @@ ru:E333, Цитрат кальция sk:E333, Citrát trivápenatý sl:E333, E333 food additive sv:E333, Kalciumcitrater -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer, en:firming-agent +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser, en:firming-agent xx:E333 e_number:en:333 wikidata:en:Q420280 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized vegan:en:yes vegetarian:en:yes @@ -8482,11 +8482,11 @@ ro:E333(i), Citrat monocalcic sk:E333(i), Citran monovápenatý, Kalcium-citrát sl:E333(i), Monokalcijev citrat sv:E333(i), Monokalciumcitrat -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer, en:firming-agent +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser, en:firming-agent xx:E333(i) e_number:en:333 wikidata:en:Q420280 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized #comment:en:E333 should be in plural, the specific form can be entered under i, ii or iii. In some languages the singular can refer to any of the three variants. The variants should either have a mono-, di- or tri- prefix. @@ -8515,7 +8515,7 @@ sv:E333(ii), Dikalciumcitrat xx:E333(ii) e_number:en:333 wikidata:en:Q420280 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized #comment:en:E333 should be in plural, the specific form can be entered under i, ii or iii. In some languages the singular can refer to any of the three variants. The variants should either have a mono-, di- or tri- prefix. @@ -8545,11 +8545,11 @@ sk:E333(iii), Citran trivápenatý, dicitran trivápenatý sl:E333(iii), Trikalcijev citrat sv:E333(iii), Trikalciumcitrat zh:E333(iii), 柠檬酸钙 -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer, en:firming-agent +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser, en:firming-agent xx:E333(iii) e_number:en:333 wikidata:en:Q420280 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized @@ -8651,7 +8651,7 @@ sv:E335, Natriumtartrater xx:E335 e_number:en:335 wikidata:en:Q58641683 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized vegan:en:yes vegetarian:en:yes @@ -8683,7 +8683,7 @@ sv:E335(i), Mononatriumtartrat xx:E335(i) e_number:en:335 wikidata:en:Q836451 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized anses_additives_of_interest:en:yes @@ -8713,7 +8713,7 @@ sv:E335(ii), Dinatriumtartrat xx:E335(ii) e_number:en:335 wikidata:en:Q17997701 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser organic_eu:en:authorized anses_additives_of_interest:en:yes @@ -8852,7 +8852,7 @@ sv:E337, Kaliumnatriumtartrat, Kaliumnatrium-L(+)-tartrat, rochellesalt xx:E337 e_number:en:337 wikidata:en:Q303489 -additives_classes:en: en:sequestrant, en:stabilizer +additives_classes:en: en:sequestrant, en:stabiliser vegan:en:yes vegetarian:en:yes anses_additives_of_interest:en:yes @@ -8930,7 +8930,7 @@ sl:E339, natrijev fosfat sv:E339, Natriumfosfater, Natriumfosfat vi:E339, Natri phosphat zh:E339, 磷酸钠 -mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabilizer, en:emulsifier, en:thickener, en:humectant, en:raising-agent +mandatory_additive_class:en: en:acidity-regulator, en:sequestrant, en:stabiliser, en:emulsifier, en:thickener, en:humectant, en:raising-agent xx:E339 e_number:en:339 wikidata:en:Q3249706 @@ -8940,7 +8940,7 @@ efsa_evaluation:en: Re‐evaluation of phosphoric acid–phosphates – di‐, t efsa_evaluation_overexposure_risk:en: en:high efsa_evaluation_exposure_mean_greater_than_adi:en: en:children, en:toddlers, en:infants efsa_evaluation_exposure_95th_greater_than_adi:en: en:adolescents, en:children, en:toddlers, en:infants -additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabilizer, en:thickener +additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabiliser, en:thickener vegan:en:yes vegetarian:en:yes anses_additives_of_interest:en:yes @@ -8984,7 +8984,7 @@ xx:E339(i) # azb:E339(i), مونوسودیوم فوسفات e_number:en:339 wikidata:en:Q3249706 -additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabilizer, en:thickener +additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabiliser, en:thickener anses_additives_of_interest:en:yes wikidata:en:Q415877 wikipedia:en:https://en.wikipedia.org/wiki/Monosodium_phosphate @@ -9032,7 +9032,7 @@ xx:E339(ii) # zh-tw:E339(ii), 磷酸一氫鈉 e_number:en:339 wikidata:en:Q3249706 -additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabilizer, en:thickener +additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabiliser, en:thickener anses_additives_of_interest:en:yes wikidata:en:Q418448 wikipedia:en:https://en.wikipedia.org/wiki/Disodium_phosphate @@ -9072,7 +9072,7 @@ xx:E339(iii) # azb:E339(iii), تریسودیوم فوسفات e_number:en:339 wikidata:en:Q3249706 -additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabilizer, en:thickener +additives_classes:en: en:emulsifier, en:humectant, en:preservative, en:sequestrant, en:stabiliser, en:thickener anses_additives_of_interest:en:yes wikipedia:en:https://en.wikipedia.org/wiki/Trisodium_phosphate # fr:phosphate-trisodique has 45 products @2018-11-04 @@ -9113,7 +9113,7 @@ efsa_evaluation:en: Re‐evaluation of phosphoric acid–phosphates – di‐, t efsa_evaluation_overexposure_risk:en: en:high efsa_evaluation_exposure_mean_greater_than_adi:en: en:children, en:toddlers, en:infants efsa_evaluation_exposure_95th_greater_than_adi:en: en:adolescents, en:children, en:toddlers, en:infants -additives_classes:en: en:emulsifier, en:humectant, en:sequestrant, en:stabilizer, en:thickener +additives_classes:en: en:emulsifier, en:humectant, en:sequestrant, en:stabiliser, en:thickener vegan:en:yes vegetarian:en:yes anses_additives_of_interest:en:yes @@ -9144,7 +9144,7 @@ sv:E340(i), Monokaliumfosfat, Monokaliummonofosfat, kaliumortofosfat xx:E340(i) e_number:en:340 wikidata:en:Q3381491 -additives_classes:en: en:emulsifier, en:humectant, en:sequestrant, en:stabilizer, en:thickener +additives_classes:en: en:emulsifier, en:humectant, en:sequestrant, en:stabiliser, en:thickener anses_additives_of_interest:en:yes {test_tags}, is(canonicalize_taxonomy_tag('es', 'ingredients', 'jugo de soya'), 'en:soy-base'); # check that properties are taxonomized if their name match a previously loaded taxonomy -is(get_property("additives", "en:e170i", "additives_classes:en"), "en:colour,en:stabiliser"); +is(get_property("additives", "en:e170i", "additives_classes:en"), "en:colour, en:stabiliser"); # test list_taxonomy_tags_in_language