Skip to content

Commit

Permalink
taxonomy: taxonomy_add_croatian_ingredients (#10108)
Browse files Browse the repository at this point in the history
taxonomy_add_croatian_ingredients
  • Loading branch information
benbenben2 authored May 9, 2024
1 parent d8276c7 commit b783bf0
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 19 deletions.
75 changes: 58 additions & 17 deletions lib/ProductOpener/Ingredients.pm
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ my %may_contain_regexps = (
rs => "može sadržati tragove",
ru => "Могут содержаться следы",
sk => "Môže obsahovať",
sl => "lahko vsebuje sledi",
sv => "kan innehålla små mängder|kan innehålla spår av|innehåller spår av|kan innehålla spår|kan innehålla",
);

Expand Down Expand Up @@ -2635,7 +2636,11 @@ sub parse_ingredients_text_service ($product_ref, $updated_product_fields_ref) {
'^Mælkechokoladen indeholder (?:også andre vegetabilske fedtstoffer end kakaosmør og )?mindst',
],

'de' => ['^in ver[äa]nderlichen Gewichtsanteilen$', '^Unter Schutzatmosph.re verpackt$',],
'de' => [
'^in ver[äa]nderlichen Gewichtsanteilen$',
'^Unter Schutzatmosph.re verpackt$',
'Fett gedruckte Zutaten enthalten allergene Inhaltsstoffe', # allergens are in bold
],

'en' => [
# breaking this regexp into the comma separated combinations (because each comma makes a new ingredient):
Expand Down Expand Up @@ -2722,13 +2727,17 @@ sub parse_ingredients_text_service ($product_ref, $updated_product_fields_ref) {
'može imati štetno djelovanje na aktivnosti pažnju djece'
, # can have a detrimental effect on children's attention activities (E122)
'označene podebljano', # marked in bold
'sastojci (su )otisnuti', # ingredients written in bold are allergens
'sastojci otisnuti'
, # ingredients written in bold are allergens: Alergeni sastojci su otisnuti debljim slovima
'savjet kod alergije', # allergy advice
'u čokoladi kakaovi dijelovi'
, # Cocoa parts in chocolate 48%. Usually at the end of the ingredients list. Chocolate can contain many sub-ingredients (cacao, milk, sugar, etc.)
'u promjenjivim omjerima|u promjenjivim udjelima|u promijenljivom udjelu'
, # in variable proportions
'uključujući žitarice koje sadrže gluten', # including grains containing gluten
'za alergene', # for allergens

],

'it' => ['^in proporzion[ei] variabil[ei]$',],
Expand Down Expand Up @@ -4646,11 +4655,9 @@ my %phrases_before_ingredients_list = (

ru => ['состав', 'coctab', 'Ингредиенты',],

si => ['sestavine',],

sk => ['obsahuje', 'zloženie',],

sl => ['vsebuje', '(S|s)estavine',],
sl => ['(S|s)estavine',],

sq => ['P[eë]rb[eë]r[eë]sit',],

Expand Down Expand Up @@ -4710,7 +4717,7 @@ my %phrases_before_ingredients_list_uppercase = (

ru => ['COCTАB',],

si => ['SESTAVINE',],
sl => ['SESTAVINE',],

sv => ['INGREDIENSER', 'INNEHÅLL(ER)?',],

Expand All @@ -4726,13 +4733,23 @@ my %phrases_after_ingredients_list = (

# TODO: Introduce a common list for kcal

al => [
'të ruhet në', # store in
],

bg => [
'да се съхранява (в закрити|на сухо)', # store in ...
'Неотворен', # before opening ...
],

ca => ['envasat en atmosfera protectora', 'conserveu-los en un lloc fresc i sec',],

cs => ['doporučeny způsob přípravy', 'V(ý|y)(ž|z)ivov(e|é) (ú|u)daje ve 100 g',],
cs => [
'doporučeny způsob přípravy',
'minimální trvanlivost do', # Expiration date
'po otevření', # After opening
'V(ý|y)(ž|z)ivov(e|é) (ú|u)daje ve 100 g',
],

da => [
'(?:gennemsnitlig )?n(æ|ae)rings(?:indhold|værdi|deklaration)', 'beskyttes',
Expand All @@ -4753,7 +4770,7 @@ my %phrases_after_ingredients_list = (
'Durchschnittliche N(â|a|ä)hrwerte',
'DURCHSCHNITTLICHE NÄHRWERTE',
'Durchschnittliche N(â|a|ä)hrwert(angaben|angabe)',
'Kakao: \d\d\s?% mindestens.',
# 'Kakao: \d\d\s?% mindestens.', # allergens can appear after.
'N(â|a|ä)hrwert(angaben|angabe|information|tabelle)', #Nährwertangaben pro 100g
'N(â|a|ä)hrwerte je',
'Nâhrwerte',
Expand All @@ -4764,6 +4781,7 @@ my %phrases_after_ingredients_list = (
'Unge(ö|o)ffnet bei max.',
'Unter Schutzatmosphäre verpackt',
'verbrauchen bis',
'Vor und nach dem Öffnen', # keep in dried place
'Vor Wärme geschützt (und trocken )?lagern',
'Vorbereitung Tipps',
'zu verbrauchen bis',
Expand All @@ -4781,7 +4799,9 @@ my %phrases_after_ingredients_list = (
'adds a trivial amount', # e.g. adds a trivial amount of added sugars per serving
'after opening',
#'Best before',
'Can be stored unopened at room temperature', # can be stored ...
'keep cool and dry',
'Can be stored unopened at room temperature',
'instruction',
'nutrition(al)? (as sold|facts|information|typical|value[s]?)',
# "nutrition advice" seems to appear before ingredients rather than after.
# "nutritional" on its own would match the ingredient "nutritional yeast" etc.
Expand All @@ -4807,7 +4827,9 @@ my %phrases_after_ingredients_list = (
'contiene azúcares naturalmente presentes',
'de los cuales az(u|ú)cares',
'de las cuales saturadas',
'envasado', # Packaging in protective atmosphere.
'Mantener en lugar fresco y seco',
'obtenga más información', # get more information
'protegido de la luz',
'conser(y|v)ar entre',
'una vez abierto',
Expand Down Expand Up @@ -4908,28 +4930,33 @@ my %phrases_after_ingredients_list = (
],

hr => [
'(č|Č)uvati|(č|Č)uvajte', # store in...
'[prije otvaranja ](č|Č|c|C|ć|Ć)uvati|(č|Č|c|C|ć|Ć)uvajte', # store in...
'izvaditi', # remove from the refrigerator half an hour before consumption
'najbolje upotrijebiti do', # best before
'način pripreme', # preparation
'(najbolje )upotrijebiti', # best before
'nakon otvaranja', # after opening
'neotvoreno', # not opened can be stored etc.
'pakirano u', # packed in a ... atmosphere
'neto koli(č|Č|c|C|ć|Ć)ina', # net weigth
'nije potrebno kuhati', # no need to keep
'pakirano', # packed in a ... atmosphere (Pakirano/Pakovano u)
'pakiranje sadrži', # pack contains x portions
'prekomjerno konzumiranje', # excessive consumption can have a laxative effect
'preporučuje se', # preparation
'Prijedlog za serviranje', # Proposal for serving
'priprema obroka', # meal preparation
'proizvod je termički obrađen-pasteriziran', # pasteurized
'proizvod sadrži sumporni dioksid', # The product contains sulfur dioxide
'proizvođač', # producer
'prosječn(a|e) (hranjiva|hranjive|nutritivne) (vrijednost|vrijednosti)', # Average nutritional value
'protresti prije otvaranja', # shake before opening
'(protresti )prije (i poslije )otvaranja', # shake before opening
'suha tvar min', # dry matter min 9%
'unato(č|Č|c|C|ć|Ć) vi(š|Š|s|S)estrukim kontrolama', # despite numerous controls ...
'upotreba u jelima', # meal preparation
'upotrijebiti do datuma', # valid until
'upozorenje', # warning
'uputa', # instructions
'upute za upotrebu', # instructions
'uvjeti čuvanja', # storage conditions
'uvoznik za', # importer
'uvoznik [i distributer ]za', # importer
'vakuumirana', # Vacuumed
'vrijeme kuhanja', # Cooking time
'zbog (mutan|prisutnosti)', # Due to ...
Expand All @@ -4939,12 +4966,15 @@ my %phrases_after_ingredients_list = (
hu => [
'Atlagos tápérték 100g termékben',
'((száraz|hűvös|(közvetlen )?napfénytől védett)[, ]*)+helyen tárolandó', # store in cool/dry/etc
'bontatlan csomagolásban', # keep in a closed/dark place
'tárolás', # conservation
],

is => ['n(æ|ae)ringargildi', 'geymi(st|ð) á', 'eftir opnum', 'aðferð',],

it => [
'Conservare in luogo fresco e asciutto',
'Confezionato in atmosfera protettiva', # Packaged in a protective atmosphere
'Conservare in luogo [fresco e ]asciutto',
'consigli per la preparazione',
'Da consumarsi', # best before
'di cui zuccheri',
Expand Down Expand Up @@ -5016,6 +5046,7 @@ my %phrases_after_ingredients_list = (
'conservar em lugar fresco',
'dos quais a(ç|c)(u|ü)ares',
'dos quais a(ç|c)(u|ü)cares',
'embalado', # Packaging in protective atmosphere.
'informa(ç|c)(a|ã)o nutricional',
'modo de prepara(ç|c)(a|ã)o',
'a consumir de prefer(e|ê)ncia antes do',
Expand All @@ -5033,14 +5064,24 @@ my %phrases_after_ingredients_list = (
],

rs => [
'čuvati na (hladnom i suvom mestu|temperaturi od)', # Store in a cool and dry place
'(č|Č|c|C|ć|Ć)uvati na (hladnom i suvom mestu|temperaturi od)', # Store in a cool and dry place
'napomena za potrošače', # note for consumers
'pakovano', # packed in a protective atmosphere
'proizvodi i puni', # Produced and filled
'upotrebljivo', # keep until
],

sk => [
'skladovanie', # store at
],

si => [
sl => [
'hraniti', # Store in a cool and dry place
'pakirano v kontrolirani atmosferi', # packed in a ... atmosphere
'porabiti', # keep until
'predlog za serviranje ', # serving suggestion
'prosječne hranjive vrijednosti 100 g proizvoda', # average nutritional value of 100 g of product
'uvoznik', # imported/distributed by
],

sv => [
Expand Down
5 changes: 3 additions & 2 deletions taxonomies/food/ingredients.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ stopwords:hr:bez glutena, i/ili, i, ili, iz, max, min, najmanje, najviše, obič
stopwords:hu:tartalmaz, változó arányban, min, zsírtartalom, összetevő, összetétel, amelyből, amiből
stopwords:id:mengandung
stopwords:is:úr
stopwords:it:contiene, nella
# example: COCCO in proporzione variabile
stopwords:it:contiene, nella, in proporzione variabile
# Japanese stopwords are matched without word boundaries, do not put as stopwords characters or words that could be part of an actual ingredient entry
stopwords:ja:等, その他
stopwords:lt:iš, su, su pridėtiniu, mažiausiai, įskaitant
Expand Down Expand Up @@ -62587,7 +62588,7 @@ ru:пряности натуральные
# Herbs (general)
#
###################################################################################################
en:herb, mixed herbs
en:herb, mixed herbs, aromatic plants
bg:билка, билки
bs:aromatično bilje
ca:Herbes, herbes aromàtiques, plantes aromàtiques
Expand Down
2 changes: 2 additions & 0 deletions taxonomies/labels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1960,6 +1960,7 @@ de:Natürliche Kalziumquelle
es:Fuente de calcio natural
fi:Luonnollinen kalsiumin lähde
fr:Source naturelle de calcium
hr:Prirodni izvor kalcija
hu:Természetes Kalciumforrás
it:Fonte naturale di calcio
nl:Bron van natuurlijke Calcium
Expand Down Expand Up @@ -4531,6 +4532,7 @@ ca:Producte natural
de:Naturprodukt, Natürliches Produkt
es:Producto natural
fr:Produit naturel
hr:prirodan proizvod, ovo je prirodan proizvod
pt:Produto natural

<en:Natural product
Expand Down

0 comments on commit b783bf0

Please sign in to comment.