diff --git a/src/wiktextract/extractor/nl/tags.py b/src/wiktextract/extractor/nl/tags.py index 936912ef..c6c3436d 100644 --- a/src/wiktextract/extractor/nl/tags.py +++ b/src/wiktextract/extractor/nl/tags.py @@ -10,8 +10,6 @@ # https://nl.wiktionary.org/wiki/Categorie:WikiWoordenboek:Contextlabels GLOSS_TAGS = { "figuurlijk": "figuratively", - "verouderd": "obsolete", # Sjabloon:verouderd - "scheldwoord": "pejorative", "afkorting": "abbreviation", "causatief": "causative", # "chattaal": "", @@ -29,6 +27,34 @@ # "leesteken": "punctuation", "letterwoord": "acronym", "middeleeuwen": "Middle-Ages", + "vrouwelijke naam": ["feminine", "name"], + "mannelijke naam": ["masculine", "name"], + "mannelijke en vrouwelijke naam": ["masculine", "feminine", "name"], + "neologisme": "neologism", + "oudheid": "archaic", + # "palindroom": "palindrome", + "pejoratief": "pejorative", + "persoon": "person", + # "pregnant": "extra meaning", + "samenkoppeling": "compound", + # "sanitair": "", + "scheldwoord": "pejorative", + "schertsend": "humorous", + "spottend": "ironic", + "spreektaal": "vernacular", + "spreekwoord": "proverb", + # "stopwoord": "filled pause", + "straattaal": "slang", + "streektaal": "regiolectal", + # "taal": "language", + "toponiem": "toponymic", + "verkorting": "clipping", + "verouderd": "obsolete", + "Vroegnieuwnederlands": "Early-Modern-Dutch", + "vulgair": "vulgar", + "zegswijze": "idiomatic", + "zeldzaam": "rare", + "Latijns-Amerika": "Latin-America", } TABLE_TAGS = { @@ -219,6 +245,82 @@ "muziekinstrument": "music", "mycologie": "mycology", "mythologie": "mythology", + "natuurkunde": "physics", + "neurologie": "neurology", + "numismatiek": "numismatics", + "oenologie": "oenology", + "onderwijs": "education", + "oorlog": "war", + "optica": "optics", + "ordehandhaving": "law enforcement", + # "paardrijden": "horseriding", + # "planologie": "planology", + "plantkunde": "botany", + "politiek": "politics", + "post": "mail", + "psychologie": "psychology", + "regering": "government", + "religie": "religion", + # "ruimtevaart": "space travel", + "schaak": "chess", + "scheepvaart": "shipping", + "scheikunde": "chemistry", + # "schilderkunst": "painting", + # "schoeisel": "shoewear", + "scouting": "scouting", + "seismologie": "seismology", + "seksualiteit": "sexuality", + "sieraad": "jewellery", + # "slapen": "sleep", + # "snoepgoed": "candy", + "sociologie": "sociology", + # "specerij": "spice", + "speelgoed": "toys", + "spel": "games", + # "spellingsalfabet": "spelling alphabet", + "spoorwegen": "railways", + "sport": "sports", + "statistiek": "statistics", + # "sterrenbeeld": "constellation", + "valutanaam": "money", + "taalkunde": "linguistics", + "tandheelkunde": "dentistry", + "techniek": "technology", + # "teken- en schrijfmateriaal": "", + "tekstkritiek": "textual criticism", + "telecommunicatie": "telecommunications", + "tennis": "tennis", + "textiel": "textiles", + "textielindustrie": "textiles", + "thermodynamica": "thermodynamics", + # "tijdrekening": "timekeeping", + "toerisme": "tourism", + "toneel": "theater", + "transport": "transport", + "tuinbouw": "horticulture", + # "tuinieren": "gardening", + "typografie": "typography", + "valkerij": "falconry", + # "veeteelt": "husbandry", + "verkeer": "traffic", + "visserij": "fishing", + "voeding": "food", + "voetbal": "football", + "volleybal": "volleyball", + # "waterbeheer": "water management", + "wegenbouw": ["road", "construction"], + "werelddeel": "continents", + "werktuigbouwkunde": "mechanical-engineering", + "wetenschap": "sciences", + "wielrennen": "cycling", + # "Wikimedia": "Wikimedia", + # "wikitaal": "", + # "windstreek": "", + # "wintersport": "", + "wiskunde": "mathematics", + # "wonen": "", + "zoötomie": "zootomy", + "zwemmen": "swimming", }