Skip to content

Commit

Permalink
Merge pull request #2525 from openfoodfacts/data-quality
Browse files Browse the repository at this point in the history
Data quality warning for products with main language set to unknown + 5 consonants
  • Loading branch information
stephanegigandet authored Nov 1, 2019
2 parents a71eb57 + 42823c2 commit cdae4cf
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
27 changes: 27 additions & 0 deletions lib/ProductOpener/DataQualityCommon.pm
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,37 @@ sub check_bugs($) {

my $product_ref = shift;

check_bug_missing_or_unknown_main_language($product_ref);

check_bug_code_missing($product_ref);
check_bug_created_t_missing($product_ref);
}

=head2 check_bug_missing_or_unknown_main_language( PRODUCT_REF )
Products that do not have the lc or lang field set, or a lang field set to "xx" (unknown)
lc and lang fields should always be set, but there has been some bugs in the past
that caused them not to be set in certain conditions.
=cut

sub check_bug_missing_or_unknown_main_language($) {

my $product_ref = shift;

if ((not (defined $product_ref->{lc}))) {
push @{$product_ref->{data_quality_bugs_tags}}, "en:main-language-code-missing";
}

if ((not (defined $product_ref->{lang}))) {
push @{$product_ref->{data_quality_bugs_tags}}, "en:main-language-missing";
}
elsif ($product_ref->{lang} eq 'xx') {
push @{$product_ref->{data_quality_bugs_warnings}}, "en:main-language-unknown";
}
}

sub check_bug_code_missing($) {

my $product_ref = shift;
Expand Down
4 changes: 2 additions & 2 deletions lib/ProductOpener/DataQualityFood.pm
Original file line number Diff line number Diff line change
Expand Up @@ -808,9 +808,9 @@ sub check_ingredients($) {

# Dutch and other languages can have 4 consecutive consonants
if ($display_lc !~ /de|nl/) {
if ($product_ref->{$ingredients_text_lc} =~ /[bcdfghjklmnpqrstvwxz]{4}/is) {
if ($product_ref->{$ingredients_text_lc} =~ /[bcdfghjklmnpqrstvwxz]{5}/is) {

push @{$product_ref->{data_quality_warnings_tags}}, "en:ingredients-" . $display_lc . "-4-consonants";
push @{$product_ref->{data_quality_warnings_tags}}, "en:ingredients-" . $display_lc . "-5-consonants";
}
}

Expand Down
9 changes: 9 additions & 0 deletions scripts/update_all_products.pl
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,15 @@
}
}

if (($fix_missing_lc) and (not defined $product_ref->{lang})) {
print STDERR "lc: " . $product_ref->{lc} . "\n";
if ((defined $product_ref->{lc}) and ($product_ref->{lc} =~ /^[a-z][a-z]$/)) {
print STDERR "fixing missing lang, using lc: " . $product_ref->{lc} . "\n";
$product_ref->{lang} = $product_ref->{lc};
$product_values_changed = 1;
}
}

# Fix ingredients_n that was set as string
if (defined $product_ref->{ingredients_n}) {
$product_ref->{ingredients_n} += 0;
Expand Down

0 comments on commit cdae4cf

Please sign in to comment.