diff --git a/usertools/de_language_data.py b/languages/get_de_data.py
similarity index 100%
rename from usertools/de_language_data.py
rename to languages/get_de_data.py
diff --git a/overrides/de.json b/overrides/de.json
new file mode 100644
index 00000000..0ddfe81b
--- /dev/null
+++ b/overrides/de.json
@@ -0,0 +1,202 @@
+{
+  "Vorlage:Abkürzungen": {
+    "body": "==== Abkürzungen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Alternative Schreibweisen": {
+    "body": "==== Alternative Schreibweisen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Anmerkung": {
+    "body": "==== Anmerkung ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Aussprache": {
+    "body": "==== Aussprache ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Bedeutungen": {
+    "body": "==== Bedeutungen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Beispiele": {
+    "body": "==== Beispiele ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Bekannte Namensträger": {
+    "body": "==== Bekannte Namensträger ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Charakteristische Wortkombinationen": {
+    "body": "==== Charakteristische Wortkombinationen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Entlehnungen": {
+    "body": "==== Entlehnungen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Gegenwörter": {
+    "body": "==== Gegenwörter ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Grammatische Merkmale": {
+    "body": "==== Grammatische Merkmale ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Herkunft": {
+    "body": "==== Herkunft ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Holonyme": {
+    "body": "==== Holonyme ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Koseformen": {
+    "body": "==== Koseformen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Lesungen": {
+    "body": "==== Lesungen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Männliche Wortformen": {
+    "body": "==== Männliche Wortformen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Namensvarianten": {
+    "body": "==== Namensvarianten ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Nebenformen": {
+    "body": "==== Nebenformen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Nicht mehr gültige Schreibweisen": {
+    "body": "==== Nicht mehr gültige Schreibweisen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Oberbegriffe": {
+    "body": "==== Oberbegriffe ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Quellen": {
+    "body": "",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:erweitern": {
+    "body": "",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:QS Herkunft": {
+    "body": "",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:QS Bedeutungen": {
+    "body": "",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Redewendungen": {
+    "body": "==== Redewendungen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Referenzen": {
+    "body": "==== Referenzen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Sinnverwandte Wörter": {
+    "body": "==== Sinnverwandte Wörter ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Sprichwörter": {
+    "body": "==== Sprichwörter ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Symbole": {
+    "body": "==== Symbole ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Synonyme": {
+    "body": "==== Synonyme ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Umschrift": {
+    "body": "==== Umschrift ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Unterbegriffe": {
+    "body": "==== Unterbegriffe ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Verkleinerungsformen": {
+    "body": "==== Verkleinerungsformen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Vokalisierung": {
+    "body": "==== Vokalisierung ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Weibliche Wortformen": {
+    "body": "==== Weibliche Wortformen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Wortbildungen": {
+    "body": "==== Wortbildungen ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Wortfamilie": {
+    "body": "==== Wortfamilie ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Worttrennung": {
+    "body": "==== Worttrennung ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:in kyrillischer Schrift": {
+    "body": "==== in kyrillischer Schrift ====",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  },
+  "Vorlage:Übersetzungen": {
+    "body": "Übersetzungen",
+    "namespace_id": 10,
+    "need_pre_expand": true
+  }
+}
diff --git a/src/wiktextract/config.py b/src/wiktextract/config.py
index 8d80276c..b2e3c426 100644
--- a/src/wiktextract/config.py
+++ b/src/wiktextract/config.py
@@ -120,7 +120,7 @@ def __init__(
                 "FORM_OF_TEMPLATES", "form_of_templates.json"
             )
         if dump_file_lang_code == "de":
-            self.set_attr_from_json("DE_FORM_TABLES", "form_templates.json")
+            self.set_attr_from_json("DE_FORM_TABLES", "form_tables.json")
         self.analyze_templates = True  # find templates that need pre-expand
         self.extract_thesaurus_pages = True
         self.load_edition_settings()
diff --git a/src/wiktextract/data/de/form_tables.json b/src/wiktextract/data/de/form_tables.json
index 939a2674..6d4afc53 100644
--- a/src/wiktextract/data/de/form_tables.json
+++ b/src/wiktextract/data/de/form_tables.json
@@ -13,6 +13,7 @@
   "Pronomina-Tabelle",
   "Afrikaans Substantiv Übersicht",
   "Albanisch Verb Übersicht",
+  "Altenglisch Substantiv Übersicht",
   "Altgriechisch Adjektiv Übersicht",
   "Altgriechisch Substantiv Übersicht",
   "Altirisch Substantiv Übersicht",
@@ -40,6 +41,7 @@
   "Französisch Substantiv Übersicht",
   "Französisch Verb Übersicht",
   "Galicisch Substantiv Übersicht",
+  "Georgisch Substantiv Übersicht",
   "Hausa Adjektiv Übersicht",
   "Hausa Possessiv Übersicht",
   "Hausa Substantiv Übersicht",
@@ -48,11 +50,12 @@
   "Irisch Adjektiv Übersicht",
   "Irisch Substantiv Übersicht",
   "Isländisch Name Übersicht",
+  "Isländisch Adjektiv Übersicht",
   "Isländisch Substantiv Übersicht",
   "Isländisch Verb Übersicht",
   "Italienisch Adjektiv Übersicht",
   "Italienisch Substantiv Übersicht",
-  "Italienisch Verb Übersicht",
+  "Italienisch Substantiv Übersicht",
   "Katalanisch Adjektiv Übersicht",
   "Katalanisch Substantiv Übersicht",
   "Katalanisch Verb Übersicht",
@@ -64,12 +67,14 @@
   "Latein Adjektiv Übersicht",
   "Latein Adverb Übersicht",
   "Latein Substantiv Übersicht",
+  "Lateinisch Substantiv Übersicht",
   "Lettisch Substantiv Übersicht",
   "Lettisch Verb Übersicht",
   "Mazedonisch Substantiv Übersicht",
   "Nahuatl Substantiv Übersicht",
   "Neugriechisch Substantiv Übersicht",
   "Niederdeutsch Adjektiv Übersicht",
+  "Niederdeutsch Substantiv Übersicht",
   "Niederländisch Adjektiv Übersicht",
   "Niederländisch Substantiv Übersicht",
   "Niedersorbisch Substantiv Übersicht",
@@ -83,6 +88,7 @@
   "Polnisch Grundzahl Übersicht",
   "Polnisch Substantiv Übersicht",
   "Portugiesisch Substantiv Übersicht",
+  "Prußisch Substantiv Übersicht",
   "Rumänisch Numerale Übersicht",
   "Rumänisch Personalpronomen Übersicht",
   "Rumänisch Substantiv Übersicht",
diff --git a/src/wiktextract/data/de/languages.json b/src/wiktextract/data/de/languages.json
index bc0151f2..06dc7b9e 100644
--- a/src/wiktextract/data/de/languages.json
+++ b/src/wiktextract/data/de/languages.json
@@ -1,5 +1,4 @@
 {
-  "MHA": ["modernes Hocharabisch"],
   "aa": ["Afar"],
   "aae": ["Arbëresh"],
   "ab": ["Abchasisch"],
@@ -46,6 +45,7 @@
   "aua": ["Asumboa"],
   "aud": ["Anutisch"],
   "av": ["Awarisch"],
+  "avk": ["Kotava"],
   "ay": ["Aymara"],
   "az": ["Aserbaidschanisch"],
   "azb": ["Südaserbaidschanisch"],
@@ -97,10 +97,12 @@
   "chr": ["Cherokee"],
   "chy": ["Cheyenne"],
   "ciw": ["Chippewa"],
+  "cjm": ["Östliches Cham"],
   "ckb": ["Sorani"],
   "ckt": ["Tschuktschisch"],
   "co": ["Korsisch"],
   "com": ["Comanche"],
+  "cop": ["Koptisch"],
   "cr": ["Cree"],
   "crh": ["Krimtatarisch"],
   "cri": ["Saotomensisches Kreol"],
@@ -109,7 +111,7 @@
   "cs": ["Tschechisch"],
   "csb": ["Kaschubisch"],
   "ctu": ["Tumbalá-Chol"],
-  "cu": ["Altkirchenslawisch"],
+  "cu": ["Altkirchenslawisch", "Kirchenslawisch"],
   "cv": ["Tschuwaschisch"],
   "cy": ["Walisisch"],
   "da": ["Dänisch"],
@@ -129,9 +131,10 @@
   "ee": ["Ewe"],
   "egl": ["Emilianisch"],
   "egy": ["Ägyptisch"],
-  "el": ["Griechisch (Neu-)"],
+  "el": ["Griechisch (Neu-)", "Neugriechisch"],
   "ems": ["Alutiiq"],
   "en": ["Englisch"],
+  "ENHG": ["Frühneuhochdeutsch"],
   "enm": ["Mittelenglisch"],
   "eo": ["Esperanto"],
   "es": ["Spanisch"],
@@ -174,7 +177,7 @@
   "gnc": ["Guanche"],
   "goh": ["Althochdeutsch"],
   "got": ["Gotisch"],
-  "grc": ["Altgriechisch"],
+  "grc": ["Altgriechisch", "Mittelgriechisch"],
   "gsw": ["Schweizerdeutsch"],
   "gu": ["Gujarati"],
   "gv": ["Manx"],
@@ -203,12 +206,15 @@
   "ik": ["Inupiaq"],
   "ikt": ["Inuinnaqtun"],
   "ilo": ["Ilokano"],
+  "ils": ["International"],
+  "ims": ["Marsisch"],
   "inh": ["Inguschisch"],
   "io": ["Ido"],
   "is": ["Isländisch"],
   "it": ["Italienisch"],
   "iu": ["Inuktitut"],
   "ja": ["Japanisch"],
+  "jam": ["Jamaika-Kreolisch"],
   "jbo": ["Lojban"],
   "jv": ["Javanisch"],
   "ka": ["Georgisch"],
@@ -229,6 +235,8 @@
   "kjj": ["Chinalugisch"],
   "kk": ["Kasachisch"],
   "kl": ["Grönländisch"],
+  "kla": ["Klamath"],
+  "klb": ["Kiliwa"],
   "km": ["Kambodschanisch"],
   "kmr": ["Kurmandschi"],
   "kn": ["Kannada"],
@@ -236,6 +244,8 @@
   "koi": ["Komi-Permjakisch"],
   "kok": ["Konkani"],
   "kos": ["Kosraeanisch"],
+  "kpg": ["Kapingamarangi"],
+  "kr": ["Kanuri"],
   "krc": ["Karatschai-Balkarisch"],
   "krl": ["Karelisch"],
   "ks": ["Kashmiri"],
@@ -250,6 +260,7 @@
   "la": ["Latein"],
   "lad": ["Ladino"],
   "lb": ["Luxemburgisch"],
+  "ldn": ["Láadan"],
   "lep": ["Lepcha"],
   "lg": ["Luganda"],
   "li": ["Limburgisch"],
@@ -261,15 +272,18 @@
   "ln": ["Lingala"],
   "lo": ["Laotisch"],
   "lt": ["Litauisch"],
+  "ltg": ["Lettgallisch"],
   "lv": ["Lettisch"],
   "lzz": ["Lasisch"],
   "mad": ["Maduresisch"],
   "mak": ["Makassar"],
   "mas": ["Maa"],
   "mdf": ["Mokscha"],
+  "mfe": ["Morisien"],
   "mg": ["Madagassisch"],
   "mga": ["Mittelirisch"],
   "mh": ["Marshallesisch"],
+  "MHA": ["modernes Hocharabisch"],
   "mi": ["Maori"],
   "mia": ["Miami-Illinois"],
   "mic": ["Micmac"],
@@ -281,30 +295,43 @@
   "mnc": ["Mandschurisch"],
   "mns": ["Mansisch"],
   "moh": ["Mohawk"],
+  "mpm": ["Yosondúa-Mixtekisch"],
   "mr": ["Marathi"],
+  "mrv": ["Mangarevanisch"],
   "ms": ["Malaiisch"],
   "mt": ["Maltesisch"],
   "mus": ["Creek"],
+  "mwl": ["Mirandés"],
   "mxi": ["Mozarabisch"],
   "my": ["Birmanisch"],
+  "myn": ["Huastekisch"],
   "myv": ["Ersja"],
   "na": ["Nauruisch"],
-  "nah": ["Nahuatl"],
+  "nah": ["Nahuatl", "Zentral-Nahuatl"],
   "nan": ["Min Nan"],
   "nap": ["Neapolitanisch"],
   "naq": ["Nama"],
   "nb": ["Bokmål"],
   "nch": ["Huastekisches Zentral-Nahuatl"],
-  "nci": ["Klassisches Nahuatl"],
+  "nci": ["Klassisches Nahuatl", "Klassisches Nahuatl‎"],
+  "ncx": ["Zentrales Puebla-Nahuatl"],
   "nd": ["Nord-Ndebele"],
+  "ndo": ["Oshivambo"],
   "nds": ["Niederdeutsch"],
   "ne": ["Nepalesisch"],
   "new": ["Newari"],
   "ng": ["Ndonga"],
   "ngo": ["Ngoni"],
+  "ngu": ["Guerrero-Nahuatl"],
+  "nhe": ["Huastekisches Ost-Nahuatl"],
+  "nhg": ["Tetelcingo-Nahuatl"],
+  "nhv": ["Temascaltepec-Nahuatl"],
+  "nhw": ["Huastekisches West-Nahuatl"],
   "nic": ["Dogon"],
+  "niu": ["Niueanisch"],
   "nl": ["Niederländisch"],
   "nld": ["Flämisch"],
+  "nlv": ["Orizaba-Nahuatl"],
   "nmn": ["ǃXóõ"],
   "nn": ["Nynorsk"],
   "no": ["Norwegisch"],
@@ -314,6 +341,7 @@
   "nqo": ["N'Ko"],
   "nr": ["Süd-Ndebele"],
   "nrf": ["Altnormannisch"],
+  "nrn": ["Norn"],
   "nso": ["Nord-Sotho"],
   "nup": ["Nupe"],
   "nv": ["Navajo"],
@@ -321,6 +349,7 @@
   "obt": ["Altbretonisch"],
   "oc": ["Okzitanisch"],
   "oco": ["Altkornisch"],
+  "odt": ["Altniederländisch"],
   "ofs": ["Altfriesisch"],
   "oge": ["Altgeorgisch"],
   "oj": ["Ojibwe"],
@@ -330,8 +359,11 @@
   "orv": ["Altostslawisch"],
   "os": ["Ossetisch"],
   "osa": ["Osage"],
+  "osc": ["Oskisch"],
   "osx": ["Altsächsisch"],
+  "ota": ["Osmanisches Türkisch"],
   "otk": ["Alttürkisch"],
+  "oto": ["Mezquital-Otomi"],
   "otw": ["Ottawa"],
   "owl": ["Altwalisisch"],
   "pa": ["Pandschabi"],
@@ -339,14 +371,18 @@
   "pap": ["Papiamentu"],
   "pau": ["Palauisch"],
   "paw": ["Pawnee"],
+  "pcd": ["Pikardisch"],
   "pdc": ["Pennsylvaniadeutsch"],
   "pdt": ["Plautdietsch"],
   "peo": ["Altpersisch"],
+  "pgn": ["Pälignisch"],
   "pi": ["Pali"],
   "pih": ["Pitkern"],
   "pis": ["Pijin"],
+  "pkp": ["Pukapuka"],
   "pl": ["Polnisch"],
   "pms": ["Piemontesisch"],
+  "pnb": ["West-Pandschabi"],
   "pov": ["Guineabissauisches Kreol"],
   "pox": ["Polabisch"],
   "pqm": ["Malecite-Passamaquoddy"],
@@ -359,6 +395,7 @@
   "qka": ["Erzgebirgisch"],
   "qu": ["Quechua"],
   "qua": ["Quapaw"],
+  "que": ["Argentinisches Quechua"],
   "quz": ["Cusco-Quechua"],
   "raj": ["Rajasthani"],
   "rap": ["Rapanui"],
@@ -382,6 +419,7 @@
   "sco": ["Scots"],
   "sd": ["Sindhi"],
   "se": ["Nordsamisch"],
+  "sei": ["Seri"],
   "sg": ["Sango"],
   "sga": ["Altirisch"],
   "sgs": ["Schemaitisch"],
@@ -398,12 +436,14 @@
   "smn": ["Inarisamisch"],
   "sn": ["Shona"],
   "so": ["Somalisch"],
+  "sog": ["Sogdisch"],
+  "spx": ["Südpikenisch"],
   "sq": ["Albanisch"],
   "sr": ["Serbisch"],
   "srn": ["Sranantongo"],
   "ss": ["Siswati"],
   "st": ["Sesotho"],
-  "stq": ["Saterfriesisch"],
+  "stq": ["Saterfriesisch", "Ostfriesisch"],
   "su": ["Sundanesisch"],
   "sux": ["Sumerisch"],
   "sv": ["Schwedisch"],
@@ -422,6 +462,7 @@
   "th": ["Thai"],
   "ti": ["Tigrinya"],
   "tk": ["Turkmenisch"],
+  "tkl": ["Tokelauisch"],
   "tl": ["Tagalog"],
   "tlh": ["Klingonisch"],
   "tli": ["Tlingit"],
@@ -436,13 +477,19 @@
   "trw": ["Torwali"],
   "ts": ["Xitsonga"],
   "tsi": ["Tsimshian"],
+  "tsz": ["Purépecha"],
   "tt": ["Tatarisch"],
   "tvl": ["Tuvaluisch"],
+  "twi": ["Twi"],
   "ty": ["Tahitianisch"],
   "tyv": ["Tuwinisch"],
+  "tzh": ["Tzeltal"],
+  "tzo": ["Tzotzil"],
   "udm": ["Udmurtisch"],
   "ug": ["Uigurisch"],
+  "uga": ["Ugaritisch"],
   "uk": ["Ukrainisch"],
+  "umc": ["Marrukinisch"],
   "umu": ["Munsee"],
   "unm": ["Unami"],
   "ur": ["Urdu"],
@@ -461,6 +508,7 @@
   "wlc": ["shiMwali"],
   "wni": ["shiNdzuani"],
   "wo": ["Wolof"],
+  "xae": ["Äquisch"],
   "xal": ["Kalmückisch"],
   "xcl": ["Altarmenisch"],
   "xfa": ["Faliskisch"],
@@ -472,19 +520,25 @@
   "xno": ["Anglonormannisch"],
   "xpq": ["Mohegan-Pequot"],
   "xtg": ["Gallisch"],
+  "xum": ["Umbrisch"],
   "xur": ["Urartäisch"],
   "xve": ["Venetisch"],
+  "xvo": ["Volskisch"],
+  "xvs": ["Vestinisch"],
   "yak": ["Yakima"],
+  "yaq": ["Yaqui"],
   "yi": ["Jiddisch"],
   "yo": ["Yoruba"],
   "yua": ["Mayathan"],
   "yue": ["Kantonesisch"],
   "za": ["Zhuang"],
+  "zai": ["Isthmus-Zapotekisch"],
   "zdj": ["shiNgazidja"],
   "zea": ["Seeländisch"],
-  "zh": ["Chinesisch"],
   "zh-cn": ["Chinesisch (vereinfacht)"],
   "zh-tw": ["Chinesisch (traditionell)"],
+  "zh": ["Chinesisch"],
+  "zlw-ocs": ["Alttschechisch"],
   "zu": ["isiZulu"],
   "zza": ["Zazaki"]
 }
diff --git a/src/wiktextract/extractor/de/example.py b/src/wiktextract/extractor/de/example.py
new file mode 100644
index 00000000..8a3a97f6
--- /dev/null
+++ b/src/wiktextract/extractor/de/example.py
@@ -0,0 +1,82 @@
+from collections import defaultdict
+from typing import Dict, List
+
+
+from wikitextprocessor import NodeKind, WikiNode
+from wikitextprocessor.parser import LevelNode
+from wiktextract.extractor.de.utils import find_and_remove_child, match_senseid
+
+from wiktextract.page import clean_node
+from wiktextract.wxr_context import WiktextractContext
+
+
+def extract_examples(
+    wxr: WiktextractContext,
+    page_data: List[Dict],
+    level_node: LevelNode,
+) -> None:
+    for list_node in level_node.find_child(NodeKind.LIST):
+        for list_item_node in list_node.find_child(NodeKind.LIST_ITEM):
+            example_data = defaultdict(str)
+
+            ref_nodes = find_and_remove_child(
+                list_item_node,
+                NodeKind.HTML,
+                lambda html_node: html_node.tag == "ref",
+            )
+            for ref_node in ref_nodes:
+                extract_reference(wxr, example_data, ref_node)
+
+            example_text = clean_node(wxr, {}, list_item_node.children)
+
+            senseid, example_text = match_senseid(example_text)
+
+            if example_text:
+                example_data["text"] = example_text
+
+            if senseid:
+                for sense in page_data[-1]["senses"]:
+                    if sense["senseid"] == senseid:
+                        sense["examples"].append(example_data)
+
+            else:
+                if example_data:
+                    wxr.wtp.debug(
+                        f"Found example data without senseid and text: {example_data}",
+                        sortid="extractor/de/examples/extract_examples/28",
+                    )
+    for non_list_node in level_node.invert_find_child(NodeKind.LIST):
+        wxr.wtp.debug(
+            f"Found unexpected non-list node in example section: {non_list_node}",
+            sortid="extractor/de/examples/extract_examples/33",
+        )
+
+
+def extract_reference(
+    wxr: WiktextractContext, example_data: Dict[str, str], ref_node: WikiNode
+):
+    reference_data = defaultdict()
+
+    reference_data["raw_ref"] = clean_node(wxr, {}, ref_node.children)
+
+    template_nodes = list(ref_node.find_child(NodeKind.TEMPLATE))
+
+    if len(template_nodes) > 1:
+        wxr.wtp.debug(
+            f"Found unexpected number of templates in example: {template_nodes}",
+            sortid="extractor/de/examples/extract_examples/64",
+        )
+    elif len(template_nodes) == 1:
+        template_node = template_nodes[0]
+
+        # Most reference templates follow the Literatur template and use named
+        # parameters. We extract them here.
+        # https://de.wiktionary.org/wiki/Vorlage:Literatur
+        for key, value in template_node.template_parameters.items():
+            if isinstance(key, str):
+                reference_data[key.lower()] = clean_node(wxr, {}, value)
+
+        # XXX: Treat other templates as well.
+        # E.g. https://de.wiktionary.org/wiki/Vorlage:Ref-OWID
+
+    example_data["ref"] = reference_data
diff --git a/src/wiktextract/extractor/de/gloss.py b/src/wiktextract/extractor/de/gloss.py
index ffce2c82..ea2761c2 100644
--- a/src/wiktextract/extractor/de/gloss.py
+++ b/src/wiktextract/extractor/de/gloss.py
@@ -3,54 +3,100 @@
 from typing import Dict, List
 
 from wikitextprocessor import NodeKind, WikiNode
+from wikitextprocessor.parser import LevelNode
+from wiktextract.extractor.de.utils import find_and_remove_child, match_senseid
 
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
 
 def extract_glosses(
+    wxr: WiktextractContext,
+    page_data: List[Dict],
+    level_node: LevelNode,
+) -> None:
+    for list_node in level_node.find_child(NodeKind.LIST):
+        process_gloss_list_item(wxr, page_data, list_node)
+
+    for non_list_node in level_node.invert_find_child(NodeKind.LIST):
+        wxr.wtp.debug(
+            f"Found unexpected non-list node in pronunciation section: {non_list_node}",
+            sortid="extractor/de/pronunciation/extract_pronunciation/64",
+        )
+
+
+def process_gloss_list_item(
     wxr: WiktextractContext,
     page_data: List[Dict],
     list_node: WikiNode,
+    parent_senseid: str = "",
+    parent_gloss_data: defaultdict(list) = None,
 ) -> None:
     for list_item_node in list_node.find_child(NodeKind.LIST_ITEM):
         item_type = list_item_node.sarg
         if item_type == "*":
-            wxr.wtp.debug(
-                f"Skipped a sense modifier in gloss list: {list_item_node}",
-                sortid="extractor/de/glosses/extract_glosses/19",
+            handle_sense_modifier(wxr, list_item_node)
+
+        elif item_type in [":", "::"]:
+            if any(
+                [
+                    template_node.template_name
+                    in ["QS Herkunft", "QS Bedeutungen"]
+                    for template_node in list_item_node.find_child_recursively(
+                        NodeKind.TEMPLATE
+                    )
+                ]
+            ):
+                continue
+
+            gloss_data = (
+                defaultdict(list)
+                if parent_gloss_data is None
+                else parent_gloss_data.copy()
             )
-            # XXX: We should extract the modifier. However, it seems to affect
-            # multiple glosses. Needs investigation.
-            pass
-        elif item_type == ":":
-            gloss_data = defaultdict(list)
-            for sub_list_node in list_item_node.find_child(NodeKind.LIST):
-                wxr.wtp.debug(
-                    f"Skipped a sub-list in gloss list: {sub_list_node}",
-                    sortid="extractor/de/glosses/extract_glosses/27",
-                )
-                # XXX: We should extract the subglosses as subsenses.
-                pass
+
+            # Extract sub-glosses for later processing
+            sub_glosses_list_nodes = list(
+                find_and_remove_child(list_item_node, NodeKind.LIST)
+            )
+
+            raw_gloss = clean_node(wxr, {}, list_item_node.children)
+            gloss_data["raw_glosses"] = [raw_gloss]
+
+            process_K_template(wxr, gloss_data, list_item_node)
 
             gloss_text = clean_node(wxr, gloss_data, list_item_node.children)
 
-            match = re.match(r"\[(\d+[a-z]?)\]", gloss_text)
-            if match:
-                sense_number = match.group(1)
-                gloss_text = gloss_text[match.end() :].strip()
-            else:
-                sense_number = None
+            senseid, gloss_text = match_senseid(gloss_text)
 
-            if not sense_number:
+            if senseid:
+                senseid = (
+                    senseid
+                    if senseid[0].isnumeric()
+                    else parent_senseid + senseid
+                )
+                gloss_data["senseid"] = senseid
+            else:
                 wxr.wtp.debug(
-                    f"Failed to extract sense number from gloss: {gloss_text}",
+                    f"Failed to extract sense number from gloss node: {list_item_node}",
                     sortid="extractor/de/glosses/extract_glosses/28",
                 )
 
-            gloss_data["glosses"] = [gloss_text]
+            # XXX: Extract tags from nodes instead using Italic and Template
+            gloss_text = extract_tags_from_gloss_text(gloss_data, gloss_text)
 
-            page_data[-1]["senses"].append(gloss_data)
+            if gloss_text or not sub_glosses_list_nodes:
+                gloss_data["glosses"] = [gloss_text]
+                page_data[-1]["senses"].append(gloss_data)
+
+            for sub_list_node in sub_glosses_list_nodes:
+                process_gloss_list_item(
+                    wxr,
+                    page_data,
+                    sub_list_node,
+                    senseid,
+                    gloss_data if not gloss_text else None,
+                )
 
         else:
             wxr.wtp.debug(
@@ -58,3 +104,62 @@ def extract_glosses(
                 sortid="extractor/de/glosses/extract_glosses/29",
             )
             continue
+
+
+def handle_sense_modifier(wxr, list_item_node):
+    wxr.wtp.debug(
+        f"Skipped a sense modifier in gloss list: {list_item_node}",
+        sortid="extractor/de/glosses/extract_glosses/19",
+    )
+    # XXX: We should extract the modifier. However, it seems to affect
+    # multiple glosses. Needs investigation.
+    pass
+
+
+def process_K_template(
+    wxr: WiktextractContext,
+    gloss_data: defaultdict(list),
+    list_item_node: NodeKind.LIST_ITEM,
+) -> None:
+    for template_node in list_item_node.find_child(NodeKind.TEMPLATE):
+        if template_node.template_name == "K":
+            text = clean_node(wxr, gloss_data, template_node).removesuffix(":")
+            tags = re.split(r";|,", text)
+            gloss_data["tags"] = [t.strip() for t in tags]
+
+            # Prepositional and case information is sometimes only expanded to
+            # category links and not present in cleaned node. We still want it
+            # as a tag.
+            prep = template_node.template_parameters.get("Prä")
+            case = template_node.template_parameters.get("Kas")
+            category = (prep if prep else "") + (" + " + case if case else "")
+            if category:
+                gloss_data["tags"].append(category)
+
+            # XXX: Investigate better ways to handle free text in K template
+            ft = template_node.template_parameters.get("ft")
+            if ft:
+                wxr.wtp.debug(
+                    f"Found ft '{ft}' in K template which could be considered part of the gloss. Moved to tags for now.",
+                    sortid="extractor/de/glosses/extract_glosses/63",
+                )
+
+            # Remove the template_node from the children of list_item_node
+            list_item_node.children = [
+                c for c in list_item_node.children if c != template_node
+            ]
+
+
+def extract_tags_from_gloss_text(
+    gloss_data: defaultdict(list), gloss_text: str
+) -> None:
+    parts = gloss_text.split(":", 1)
+    if len(parts) > 1:
+        tags_part = parts[0].strip()
+
+        categories = [c.strip() for c in re.split(",", tags_part)]
+        if all(c.isalnum() for c in categories):
+            gloss_data["tags"].extend(categories)
+            return parts[1].strip()
+
+    return gloss_text
diff --git a/src/wiktextract/extractor/de/page.py b/src/wiktextract/extractor/de/page.py
index 07ba4d88..29b36d0f 100644
--- a/src/wiktextract/extractor/de/page.py
+++ b/src/wiktextract/extractor/de/page.py
@@ -7,9 +7,11 @@
 from wikitextprocessor.parser import LevelNode
 
 from wiktextract.datautils import append_base_data
+from wiktextract.extractor.de.pronunciation import extract_pronunciation
 from wiktextract.wxr_context import WiktextractContext
 
 from .gloss import extract_glosses
+from .example import extract_examples
 
 # Templates that are used to form panels on pages and that should be ignored in
 # various positions
@@ -27,108 +29,10 @@
 # Templates that should not be pre-expanded
 DO_NOT_PRE_EXPAND_TEMPLATES = {
     "Ü-Tabelle",  # Translation table
-    "Quellen",  # Can be ignored since we have the <ref> tags in the tree
+    "Übersetzungen umleiten",  # Translation table redirect
 }
 
 
-def fix_level_hierarchy_of_subsections(
-    wxr: WiktextractContext, tree: List[WikiNode]
-) -> List[WikiNode]:
-    """
-    This function introduces level hierarchy to subsections and their content.
-
-    The German Wiktionary does generally not use level 4 headings but instead
-    uses templates to define the subsections. These templates are usually
-    followed by a list of content that belongs to the subsection. Yet, in the
-    tree the content is on the same level as the subsection template. In Gernman
-    wiktionary, for cosmetic reasons, a level 4 heading is used to introduce the
-    translation subsection that then also contains other subsections not related
-    to translations.
-
-    See:
-    https://de.wiktionary.org/wiki/Hilfe:Formatvorlage#Der_%E2%80%9EEndteil%E2%80%9C
-    """
-    level_nodes: List[WikiNode] = []
-    for node in tree:
-        if isinstance(node, WikiNode):
-            # A level 4 heading is used to introduce the translation
-            # section.
-            if node.kind == NodeKind.LEVEL4:
-                # Find the index of the first template after the Ü-Tabelle
-                # template
-                split_idx = len(node.children)
-                for idx, child in enumerate(node.children):
-                    if split_idx < len(node.children):
-                        if (
-                            isinstance(child, WikiNode)
-                            and child.kind == NodeKind.TEMPLATE
-                        ):
-                            break
-                        else:
-                            split_idx = idx + 1
-                    if (
-                        isinstance(child, WikiNode)
-                        and child.kind == NodeKind.TEMPLATE
-                        and child.template_name == "Ü-Tabelle"
-                    ):
-                        split_idx = idx + 1
-
-                children_until_translation_table = node.children[:split_idx]
-
-                children_after_translation_table = node.children[split_idx:]
-
-                node.children = children_until_translation_table
-                level_nodes.append(node)
-
-                level_nodes.extend(
-                    fix_level_hierarchy_of_subsections(
-                        wxr, children_after_translation_table
-                    )
-                )
-
-            elif node.kind == NodeKind.TEMPLATE:
-                level_node = LevelNode(NodeKind.LEVEL4, node.loc)
-                level_node.largs = [[node]]
-                level_nodes.append(level_node)
-
-            elif node.kind == NodeKind.LIST:
-                if len(level_nodes) > 0:
-                    level_nodes[-1].children.append(node)
-                else:
-                    wxr.wtp.debug(
-                        f"Unexpected list while introducing level hierarchy: {node}",
-                        sortid="extractor/de/page/introduce_level_hierarchy/52",
-                    )
-                    continue
-
-            # Sometimes links are used outside of a section to link the whole
-            # entry to a category. We treat them here as level 4 headings,
-            # without any children.
-            elif node.kind == NodeKind.LINK:
-                level_node = LevelNode(NodeKind.LEVEL4, node.loc)
-                level_node.largs = [[node]]
-                level_nodes.append(level_node)
-
-            # ignore <br> tags
-            elif node.kind == NodeKind.HTML and node.sarg == "br":
-                pass
-            else:
-                wxr.wtp.debug(
-                    f"Unexpected WikiNode while introducing level hierarchy: {node}",
-                    sortid="extractor/de/page/introduce_level_hierarchy/55",
-                )
-        else:
-            if not len(level_nodes):
-                if not isinstance(node, str) or not node.strip() == "":
-                    wxr.wtp.debug(
-                        f"Unexpected string while introducing level hierarchy: {node}",
-                        sortid="extractor/de/page/introduce_level_hierarchy/61",
-                    )
-                continue
-            level_nodes[-1].children.append(node)
-    return level_nodes
-
-
 def parse_section(
     wxr: WiktextractContext,
     page_data: List[Dict],
@@ -161,15 +65,17 @@ def parse_section(
                 )
         return
 
-    # Level 4 headings were introduced by fix_level_hierarchy_of_subsections()
-    # for subsections that are introduced by templates.
+    # Level 4 headings were introduced by overriding the default templates.
+    # See overrides/de.json for details.
     elif level_node.kind == NodeKind.LEVEL4:
-        for template_node in level_node.find_content(NodeKind.TEMPLATE):
-            section_name = template_node.template_name
-            wxr.wtp.start_subsection(section_name)
-            if section_name == "Bedeutungen":
-                for list_node in level_node.find_child(NodeKind.LIST):
-                    extract_glosses(wxr, page_data, list_node)
+        section_name = level_node.largs[0][0]
+        wxr.wtp.start_subsection(section_name)
+        if section_name == "Bedeutungen":
+            extract_glosses(wxr, page_data, level_node)
+        if section_name == "Aussprache":
+            extract_pronunciation(wxr, page_data, level_node)
+        if section_name == "Beispiele":
+            extract_examples(wxr, page_data, level_node)
 
 
 FORM_POS = {
@@ -292,10 +198,22 @@ def process_pos_section(
                 sortid="extractor/de/page/process_pos_section/31",
             )
 
-    subsections = fix_level_hierarchy_of_subsections(wxr, level_node.children)
+    for level_4_node in level_node.find_child(NodeKind.LEVEL4):
+        parse_section(wxr, page_data, base_data, level_4_node)
 
-    for subsection in subsections:
-        parse_section(wxr, page_data, base_data, subsection)
+    for non_l4_node in level_node.invert_find_child(NodeKind.LEVEL4):
+        if (
+            isinstance(non_l4_node, WikiNode)
+            and non_l4_node.kind == NodeKind.TEMPLATE
+            and "Übersicht" in non_l4_node.template_name
+        ):
+            # XXX: de: Extract form tables
+            pass
+        else:
+            wxr.wtp.debug(
+                f"Unexpected node in pos section: {non_l4_node}",
+                sortid="extractor/de/page/process_pos_section/41",
+            )
     return
 
 
@@ -310,13 +228,12 @@ def parse_page(
 
     # Parse the page, pre-expanding those templates that are likely to
     # influence parsing
+    DO_NOT_PRE_EXPAND_TEMPLATES.update(wxr.config.DE_FORM_TABLES)
     tree = wxr.wtp.parse(
         page_text,
         pre_expand=True,
         additional_expand=ADDITIONAL_EXPAND_TEMPLATES,
-        do_not_pre_expand=DO_NOT_PRE_EXPAND_TEMPLATES.update(
-            wxr.config.DE_FORM_TABLES
-        ),
+        do_not_pre_expand=DO_NOT_PRE_EXPAND_TEMPLATES,
     )
 
     page_data = []
diff --git a/src/wiktextract/extractor/de/pronunciation.py b/src/wiktextract/extractor/de/pronunciation.py
new file mode 100644
index 00000000..b5fb1d0d
--- /dev/null
+++ b/src/wiktextract/extractor/de/pronunciation.py
@@ -0,0 +1,181 @@
+from collections import defaultdict
+from typing import Dict, List, Union
+
+from wikitextprocessor import NodeKind, WikiNode
+from wikitextprocessor.parser import LevelNode
+from wiktextract.extractor.share import create_audio_url_dict
+
+from wiktextract.page import clean_node
+from wiktextract.wxr_context import WiktextractContext
+
+
+def extract_pronunciation(
+    wxr: WiktextractContext,
+    page_data: List[Dict],
+    level_node: LevelNode,
+):
+    for list_node in level_node.find_child(NodeKind.LIST):
+        sound_data = [defaultdict(list)]
+
+        for not_list_item_node in list_node.invert_find_child(
+            NodeKind.LIST_ITEM
+        ):
+            wxr.wtp.debug(
+                f"Found unexpected non-list-item node in pronunciation section: {not_list_item_node}",
+                sortid="extractor/de/pronunciation/extract_pronunciation/28",
+            )
+
+        for list_item_node in list_node.find_child(NodeKind.LIST_ITEM):
+            children = list(list_item_node.filter_empty_str_child())
+            if len(children) == 0:
+                continue
+
+            head_template, rest = children[0], children[1:]
+            if (
+                not isinstance(head_template, WikiNode)
+                or head_template.kind != NodeKind.TEMPLATE
+                or not rest
+            ):
+                wxr.wtp.debug(
+                    f"Found unexpected non-template node in pronunciation section: {head_template}",
+                    sortid="extractor/de/pronunciation/extract_pronunciation/37",
+                )
+                continue
+            if head_template.template_name == "IPA":
+                process_ipa(wxr, sound_data, rest)
+            elif head_template.template_name == "Hörbeispiele":
+                sound_data.append(defaultdict(list))
+                process_hoerbeispiele(wxr, sound_data, rest)
+            elif head_template.template_name == "Reime":
+                process_rhymes(wxr, sound_data, rest)
+            else:
+                wxr.wtp.debug(
+                    f"Found unexpected template in pronunciation section: {head_template} with content {rest}",
+                    sortid="extractor/de/pronunciation/extract_pronunciation/45)",
+                )
+
+        # Remove empty entries
+        sound_data = [entry for entry in sound_data if entry != {}]
+        if len(sound_data) > 0:
+            page_data[-1]["sounds"].extend(sound_data)
+
+    for non_list_node in level_node.invert_find_child(NodeKind.LIST):
+        wxr.wtp.debug(
+            f"Found unexpected non-list node in pronunciation section: {non_list_node}",
+            sortid="extractor/de/pronunciation/extract_pronunciation/64",
+        )
+
+
+def process_ipa(
+    wxr: WiktextractContext,
+    sound_data: List[Dict],
+    nodes: List[Union[WikiNode, str]],
+):
+    for node in nodes:
+        if is_template_node_with_name(node, "Lautschrift"):
+            process_lautschrift_template(wxr, sound_data, node)
+        elif is_tag_node(node):
+            append_tag(wxr, sound_data, node)
+        elif is_new_sound_data_entry_sep(node):
+            sound_data.append(defaultdict(list))
+        else:
+            wxr.wtp.debug(
+                f"Found unexpected non-Lautschrift node in IPA section: {node}",
+                sortid="extractor/de/pronunciation/process_ipa/57",
+            )
+
+
+def process_lautschrift_template(
+    wxr: WiktextractContext, sound_data: List[Dict], node
+):
+    template_parameters = node.template_parameters
+
+    ipa = template_parameters.get(1)
+
+    lang_code = template_parameters.get("spr")
+    if lang_code:
+        language = wxr.wtp.LANGUAGES_BY_CODE[lang_code]
+        add_sound_data_without_appending_to_existing_properties(
+            sound_data,
+            {
+                "ipa": [ipa],
+                "lang_code": lang_code,
+                "language": language,
+            },
+        )
+    else:
+        sound_data[-1]["ipa"].append(ipa)
+
+
+def process_hoerbeispiele(
+    wxr: WiktextractContext, sound_data: List[Dict], nodes: List[WikiNode]
+):
+    for node in nodes:
+        if is_template_node_with_name(node, "Audio"):
+            process_audio_template(wxr, sound_data, node)
+        elif is_tag_node(node):
+            append_tag(wxr, sound_data, node)
+        elif is_new_sound_data_entry_sep(node):
+            sound_data.append(defaultdict(list))
+        else:
+            wxr.wtp.debug(
+                f"Found unexpected node in Hoerbeispiele section: {node}",
+                sortid="extractor/de/pronunciation/process_hoerbeispiele/193",
+            )
+
+
+def process_audio_template(
+    wxr: WiktextractContext, sound_data: List[Dict], node
+):
+    audio_file = node.template_parameters.get(1)
+    if audio_file:
+        add_sound_data_without_appending_to_existing_properties(
+            sound_data, create_audio_url_dict(audio_file)
+        )
+
+
+def process_rhymes(
+    wxr: WiktextractContext, sound_data: List[Dict], nodes: List[WikiNode]
+):
+    # XXX: Extract rhymes from the referenced rhymes page
+    pass
+
+
+def is_template_node_with_name(node: Union[WikiNode, str], template_name: str):
+    return (
+        isinstance(node, WikiNode)
+        and node.kind == NodeKind.TEMPLATE
+        and node.template_name == template_name
+    )
+
+
+def add_sound_data_without_appending_to_existing_properties(
+    sound_data: List[Dict],
+    new_sound_data: Dict,
+):
+    """Creates a new IPA data entry if properties exist in previous entry."""
+    if any([key in sound_data[-1] for key in new_sound_data.keys()]):
+        sound_data.append(defaultdict(list))
+
+    for key, value in new_sound_data.items():
+        if isinstance(value, str):
+            sound_data[-1][key] = value
+        else:
+            sound_data[-1][key].extend(value)
+
+
+def is_tag_node(node: Union[WikiNode, str]):
+    return isinstance(node, WikiNode) and node.kind in [
+        NodeKind.TEMPLATE,
+        NodeKind.ITALIC,
+    ]
+
+
+def append_tag(wxr: WiktextractContext, sound_data: Dict, node: WikiNode):
+    tag = clean_node(wxr, {}, node).strip()
+    if tag:
+        sound_data[-1]["tags"].append(tag)
+
+
+def is_new_sound_data_entry_sep(node: Union[WikiNode, str]):
+    return isinstance(node, str) and node.strip() in [",", ";"]
diff --git a/src/wiktextract/extractor/de/utils.py b/src/wiktextract/extractor/de/utils.py
new file mode 100644
index 00000000..73416645
--- /dev/null
+++ b/src/wiktextract/extractor/de/utils.py
@@ -0,0 +1,24 @@
+import re
+from wikitextprocessor import NodeKind, WikiNode
+
+
+def match_senseid(node_text: str):
+    match = re.match(r"\[(\d*[a-z]?)\]", node_text)
+
+    if match:
+        senseid = match.group(1)
+        node_text = node_text[match.end() :].strip()
+    else:
+        senseid = None
+
+    return senseid, node_text
+
+
+def find_and_remove_child(node: WikiNode, kind: NodeKind, cb=None):
+    children = []
+    for idx, child in reversed(list(node.find_child(kind, with_index=True))):
+        if cb and not cb(child):
+            continue
+        del node.children[idx]
+        children.append(child)
+    return reversed(children)
diff --git a/tests/test_de_example.py b/tests/test_de_example.py
new file mode 100644
index 00000000..980a0be6
--- /dev/null
+++ b/tests/test_de_example.py
@@ -0,0 +1,140 @@
+import unittest
+from collections import defaultdict
+
+from wikitextprocessor import Wtp
+
+from wiktextract.config import WiktionaryConfig
+from wiktextract.extractor.de.example import extract_examples, extract_reference
+
+from wiktextract.thesaurus import close_thesaurus_db
+from wiktextract.wxr_context import WiktextractContext
+
+
+class TestDEExample(unittest.TestCase):
+    maxDiff = None
+
+    def setUp(self) -> None:
+        self.wxr = WiktextractContext(
+            Wtp(lang_code="de"), WiktionaryConfig(dump_file_lang_code="de")
+        )
+
+    def tearDown(self) -> None:
+        self.wxr.wtp.close_db_conn()
+        close_thesaurus_db(
+            self.wxr.thesaurus_db_path, self.wxr.thesaurus_db_conn
+        )
+
+    def test_de_extract_examples(self):
+        self.wxr.wtp.start_page("")
+        root = self.wxr.wtp.parse(
+            ":[1] example1A \n:[1] example1B\n:[2] example2\n:[3] example3"
+        )
+
+        page_data = [defaultdict(list)]
+        page_data[-1]["senses"] = [
+            defaultdict(list, {"senseid": "1"}),
+            defaultdict(list, {"senseid": "2"}),
+        ]
+
+        extract_examples(self.wxr, page_data, root)
+
+        self.assertEqual(
+            page_data,
+            [
+                {
+                    "senses": [
+                        {
+                            "examples": [
+                                {"text": "example1A"},
+                                {"text": "example1B"},
+                            ],
+                            "senseid": "1",
+                        },
+                        {
+                            "examples": [{"text": "example2"}],
+                            "senseid": "2",
+                        },
+                    ]
+                }
+            ],
+        )
+
+    def test_de_extract_example_with_reference(self):
+        self.wxr.wtp.start_page("")
+        root = self.wxr.wtp.parse(":[1] example1 <ref>ref1A</ref>")
+
+        page_data = [defaultdict(list)]
+        page_data[-1]["senses"] = [
+            defaultdict(list, {"senseid": "1"}),
+        ]
+
+        extract_examples(self.wxr, page_data, root)
+
+        self.assertEqual(
+            page_data,
+            [
+                {
+                    "senses": [
+                        {
+                            "examples": [
+                                {
+                                    "text": "example1",
+                                    "ref": {"raw_ref": "ref1A"},
+                                },
+                            ],
+                            "senseid": "1",
+                        },
+                    ]
+                }
+            ],
+        )
+
+    def test_de_extract_reference_from_literatur_template(self):
+        # https://de.wiktionary.org/wiki/Beispiel
+        self.wxr.wtp.start_page("Beispiel")
+        self.wxr.wtp.add_page("Vorlage:Literatur", 10, "Expanded template")
+        root = self.wxr.wtp.parse(
+            "<ref>{{Literatur|Autor=Steffen Möller|Titel=Viva Warszawa|TitelErg=Polen für Fortgeschrittene|Verlag=Piper|Ort=München/Berlin|Jahr=2015}}, Seite 273. ISBN 978-3-89029-459-9.</ref>"
+        )
+
+        example_data = defaultdict(str)
+
+        extract_reference(self.wxr, example_data, root.children[0])
+
+        self.assertEqual(
+            example_data,
+            {
+                "ref": {
+                    "raw_ref": "Expanded template, Seite 273. ISBN 978-3-89029-459-9.",
+                    "titel": "Viva Warszawa",
+                    "autor": "Steffen Möller",
+                    "titelerg": "Polen für Fortgeschrittene",
+                    "verlag": "Piper",
+                    "ort": "München/Berlin",
+                    "jahr": "2015",
+                }
+            },
+        )
+
+    def test_de_extract_reference_from_templates_without_named_args(self):
+        # https://de.wiktionary.org/wiki/Beispiel
+        # Reference templates not following the Literatur template pattern are
+        # currently not extracted field by field (e.g. Vorlage:Ref-OWID)
+        self.wxr.wtp.start_page("Beispiel")
+        self.wxr.wtp.add_page("Vorlage:Ref-OWID", 10, "Expanded template")
+        root = self.wxr.wtp.parse(
+            "<ref>{{Ref-OWID|Sprichwörter|401781|Schlechte Beispiele verderben gute Sitten.}}</ref>"
+        )
+
+        example_data = defaultdict(str)
+
+        extract_reference(self.wxr, example_data, root.children[0])
+
+        self.assertEqual(
+            example_data,
+            {
+                "ref": {
+                    "raw_ref": "Expanded template",
+                }
+            },
+        )
diff --git a/tests/test_de_gloss.py b/tests/test_de_gloss.py
index 3d19861b..02a41751 100644
--- a/tests/test_de_gloss.py
+++ b/tests/test_de_gloss.py
@@ -1,18 +1,26 @@
 import unittest
 from collections import defaultdict
+from unittest.mock import patch
 
 from wikitextprocessor import Wtp
 
 from wiktextract.config import WiktionaryConfig
-from wiktextract.extractor.de.gloss import extract_glosses
+from wiktextract.extractor.de.gloss import (
+    extract_glosses,
+    extract_tags_from_gloss_text,
+    process_K_template,
+)
 from wiktextract.thesaurus import close_thesaurus_db
 from wiktextract.wxr_context import WiktextractContext
 
 
 class TestGlossList(unittest.TestCase):
+    maxDiff = None
+
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
-            Wtp(lang_code="de"), WiktionaryConfig(dump_file_lang_code="de")
+            Wtp(lang_code="de"),
+            WiktionaryConfig(dump_file_lang_code="de"),
         )
 
     def tearDown(self) -> None:
@@ -27,7 +35,7 @@ def test_de_extract_glosses(self):
 
         page_data = [defaultdict(list)]
 
-        extract_glosses(self.wxr, page_data, root.children[0])
+        extract_glosses(self.wxr, page_data, root)
 
         self.assertEqual(
             page_data,
@@ -36,11 +44,243 @@ def test_de_extract_glosses(self):
                     "senses": [
                         {
                             "glosses": ["gloss1"],
+                            "raw_glosses": ["[1] gloss1"],
+                            "senseid": "1",
                         },
                         {
                             "glosses": ["gloss2"],
+                            "raw_glosses": ["[2] gloss2"],
+                            "senseid": "2",
+                        },
+                    ]
+                }
+            ],
+        )
+
+    def test_de_extract_glosses_with_subglosses(self):
+        self.wxr.wtp.start_page("")
+        root = self.wxr.wtp.parse(
+            ":[1] gloss1\n::[a] subglossA\n::[b] subglossB"
+        )
+
+        page_data = [defaultdict(list)]
+
+        extract_glosses(self.wxr, page_data, root)
+
+        self.assertEqual(
+            page_data,
+            [
+                {
+                    "senses": [
+                        {
+                            "glosses": ["gloss1"],
+                            "raw_glosses": ["[1] gloss1"],
+                            "senseid": "1",
+                        },
+                        {
+                            "glosses": ["subglossA"],
+                            "raw_glosses": ["[a] subglossA"],
+                            "senseid": "1a",
+                        },
+                        {
+                            "glosses": ["subglossB"],
+                            "raw_glosses": ["[b] subglossB"],
+                            "senseid": "1b",
+                        },
+                    ]
+                }
+            ],
+        )
+
+    def test_de_extract_glosses_with_only_subglosses(self):
+        self.wxr.wtp.add_page("Vorlage:K", 10, "tag")
+        self.wxr.wtp.start_page("")
+        root = self.wxr.wtp.parse(
+            ":[1] {{K|tag}}\n::[a] subglossA\n::[1b] subglossB"
+        )
+
+        page_data = [defaultdict(list)]
+
+        extract_glosses(self.wxr, page_data, root)
+        self.assertEqual(
+            page_data,
+            [
+                {
+                    "senses": [
+                        {
+                            "tags": ["tag"],
+                            "glosses": ["subglossA"],
+                            "raw_glosses": ["[a] subglossA"],
+                            "senseid": "1a",
+                        },
+                        {
+                            "tags": ["tag"],
+                            "glosses": ["subglossB"],
+                            "raw_glosses": ["[1b] subglossB"],
+                            "senseid": "1b",
                         },
                     ]
                 }
             ],
         )
+
+    def test_process_K_template_removes_K_template_nodes(self):
+        self.wxr.wtp.add_page("Vorlage:K", 10, "tag1, tag2")
+        self.wxr.wtp.start_page("")
+        root = self.wxr.wtp.parse("{{K|tag1|tag2}} gloss1")
+
+        gloss_data = defaultdict(list)
+
+        self.assertEqual(len(root.children), 2)
+
+        process_K_template(self.wxr, gloss_data, root)
+
+        self.assertEqual(
+            gloss_data,
+            {
+                "tags": ["tag1", "tag2"],
+            },
+        )
+
+        self.assertEqual(len(root.children), 1)
+
+    def get_mock(self, mock_return_value: str):
+        def generic_mock(*args, **kwargs):
+            return mock_return_value
+
+        return generic_mock
+
+    def test_process_K_template(self):
+        # Test cases chosen from:
+        # https://de.wiktionary.org/wiki/Vorlage:K/Doku
+        test_cases = [
+            # https://de.wiktionary.org/wiki/delektieren
+            # One tag
+            {
+                "input": "{{K|refl.}}",
+                "expected_tags": ["reflexiv"],
+                "mock_return": "reflexiv:",
+            },
+            # https://de.wiktionary.org/wiki/abbreviare
+            # With ft and spr args
+            {
+                "input": "{{K|trans.|ft=etwas in seinem [[räumlich]]en oder [[zeitlich]]en [[Ausmaß]] verringern|spr=it}}",
+                "expected_tags": [
+                    "transitiv",
+                    "etwas in seinem räumlichen oder zeitlichen Ausmaß verringern",
+                ],
+                "mock_return": "transitiv, etwas in seinem räumlichen oder zeitlichen Ausmaß verringern:",
+            },
+            # https://de.wiktionary.org/wiki/abbreviare
+            # With multiple tags
+            {
+                "input": "{{K|trans.|Linguistik|Wortbildung|spr=it}}",
+                "expected_tags": [
+                    "transitiv",
+                    "Linguistik",
+                    "Wortbildung",
+                ],
+                "mock_return": "transitiv, Linguistik, Wortbildung:",
+            },
+            # https://de.wiktionary.org/wiki/almen
+            # Ideally we would filter out "besonders" but there doesn't seem
+            # to be a general rule which tags are semmantially relevant
+            # With multiple tags and t1, t2 args
+            {
+                "input": "{{K|trans.|t1=;|besonders|t2=_|bayrisch|österr.}}",
+                "expected_tags": [
+                    "transitiv",
+                    "besonders bayrisch",
+                    "österreichisch",
+                ],
+                "mock_return": "transitiv, besonders bayrisch, österreichisch",
+            },
+            # https://de.wiktionary.org/wiki/einlaufen
+            # With two tags and t7 arg
+            {
+                "input": "{{K|intrans.|Nautik|t7=_|ft=(von Schiffen)}}",
+                "expected_tags": ["intransitiv", "Nautik (von Schiffen)"],
+                "mock_return": "intransitiv, Nautik (von Schiffen):",
+            },
+            # https://de.wiktionary.org/wiki/zählen
+            # With Prä and Kas args
+            {
+                "input": "{{K|intrans.|Prä=auf|Kas=Akk.|ft=(auf jemanden/etwas zählen)}}",
+                "expected_tags": [
+                    "intransitiv",
+                    "(auf jemanden/etwas zählen)",
+                    "auf + Akk.",
+                ],
+                "mock_return": "intransitiv, (auf jemanden/etwas zählen):",
+            },
+            # https://de.wiktionary.org/wiki/bojovat
+            # With Prä and Kas args and redundant ft arg
+            {
+                "input": "{{K|intrans.|Prä=proti|Kas=Dativ||ft=bojovat [[proti]] + [[Dativ]]|spr=cs}}",
+                "expected_tags": [
+                    "intransitiv",
+                    "bojovat proti + Dativ",
+                    "proti + Dativ",
+                ],
+                "mock_return": "intransitiv, bojovat proti + Dativ:",
+            },
+        ]
+
+        for case in test_cases:
+            with self.subTest(case=case):
+                gloss_data = defaultdict(list)
+
+                self.wxr.wtp.start_page("")
+
+                root = self.wxr.wtp.parse(case["input"])
+
+                with patch(
+                    "wiktextract.extractor.de.gloss.clean_node",
+                    self.get_mock(case["mock_return"]),
+                ):
+                    process_K_template(self.wxr, gloss_data, root)
+                    self.assertEqual(
+                        gloss_data["tags"],
+                        case["expected_tags"],
+                    )
+
+    def test_de_extract_tags_from_gloss_text(self):
+        test_cases = [
+            # https://de.wiktionary.org/wiki/Hengst
+            {
+                "input": "Zoologie: männliches Tier aus der Familie der Einhufer und Kamele",
+                "expected_tags": ["Zoologie"],
+                "expected_gloss": "männliches Tier aus der Familie der Einhufer und Kamele",
+            },
+            # https://de.wiktionary.org/wiki/ARD
+            {
+                "input": "umgangssprachlich, Kurzwort, Akronym: für das erste Fernsehprogramm der ARD",
+                "expected_tags": ["umgangssprachlich", "Kurzwort", "Akronym"],
+                "expected_gloss": "für das erste Fernsehprogramm der ARD",
+            },
+            # https://de.wiktionary.org/wiki/Endspiel
+            {
+                "input": "Drama von Samuel Beckett: Menschliche Existenz in der Endphase des Verfalls und der vergeblichen Suche nach einem Ausweg",
+                "expected_tags": None,
+                "expected_gloss": "Drama von Samuel Beckett: Menschliche Existenz in der Endphase des Verfalls und der vergeblichen Suche nach einem Ausweg",
+            }
+            # Add more test cases as needed
+        ]
+        for case in test_cases:
+            with self.subTest(case=case):
+                gloss_data = defaultdict(list)
+
+                gloss_text = extract_tags_from_gloss_text(
+                    gloss_data, case["input"]
+                )
+
+                if case["expected_tags"] is None:
+                    self.assertEqual(gloss_data, {})
+                else:
+                    self.assertEqual(
+                        gloss_data,
+                        {
+                            "tags": case["expected_tags"],
+                        },
+                    )
+                self.assertEqual(gloss_text, case["expected_gloss"])
diff --git a/tests/test_de_page.py b/tests/test_de_page.py
index 47a1aef7..0e2c14c0 100644
--- a/tests/test_de_page.py
+++ b/tests/test_de_page.py
@@ -7,7 +7,6 @@
 
 from wiktextract.config import WiktionaryConfig
 from wiktextract.extractor.de.page import (
-    fix_level_hierarchy_of_subsections,
     parse_page,
     parse_section,
 )
@@ -28,6 +27,7 @@ def setUp(self):
             # capture_examples=True,
         )
         self.wxr = WiktextractContext(Wtp(lang_code="de"), conf1)
+        self.maxDiff = None
 
     def tearDown(self) -> None:
         self.wxr.wtp.close_db_conn()
@@ -82,21 +82,11 @@ def test_de_parse_page_skipping_head_templates(self):
     # The way append_base_data() works requires the presence of a sense
     # dictionary before starting a new pos section. Therefore, we need to add
     # at least one sense data point to the test case.
+
     def test_de_parse_section(self):
         self.wxr.wtp.add_page("Vorlage:Wortart", 10, "")
         self.wxr.wtp.add_page("Vorlage:Bedeutungen", 10, "")
-        page_text = """
-=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} ===
-{{Bedeutungen}}
-:[1] gloss1
-=== {{Wortart|Verb|Englisch}} ===
-{{Bedeutungen}}
-:[1] gloss2
-=== {{Wortart|Substantiv|Englisch}} ===
-{{Bedeutungen}}
-:[1] gloss3
-
-"""
+        page_text = "=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} ===\n====Bedeutungen====\n:[1] gloss1\n=== {{Wortart|Verb|Englisch}} ===\n====Bedeutungen====\n:[1] gloss2\n=== {{Wortart|Substantiv|Englisch}} ===\n====Bedeutungen====\n:[1] gloss3"
         self.wxr.wtp.start_page("")
         root = self.wxr.wtp.parse(
             page_text,
@@ -116,6 +106,8 @@ def test_de_parse_section(self):
                     "senses": [
                         {
                             "glosses": ["gloss1"],
+                            "senseid": "1",
+                            "raw_glosses": ["[1] gloss1"],
                         },
                     ],
                 },
@@ -125,6 +117,8 @@ def test_de_parse_section(self):
                     "senses": [
                         {
                             "glosses": ["gloss1"],
+                            "senseid": "1",
+                            "raw_glosses": ["[1] gloss1"],
                         },
                     ],
                 },
@@ -134,6 +128,8 @@ def test_de_parse_section(self):
                     "senses": [
                         {
                             "glosses": ["gloss2"],
+                            "senseid": "1",
+                            "raw_glosses": ["[1] gloss2"],
                         },
                     ],
                 },
@@ -143,70 +139,10 @@ def test_de_parse_section(self):
                     "senses": [
                         {
                             "glosses": ["gloss3"],
+                            "senseid": "1",
+                            "raw_glosses": ["[1] gloss3"],
                         },
                     ],
                 },
             ],
         )
-
-    def test_de_fix_level_hierarchy_of_subsections(self):
-        self.wxr.wtp.add_page("Vorlage:Englisch Substantiv Übersicht", 10, "")
-        self.wxr.wtp.add_page("Vorlage:Worttrennung", 10, "")
-        self.wxr.wtp.add_page("Vorlage:Aussprache", 10, "")
-        self.wxr.wtp.add_page("Vorlage:Übersetzungen", 10, "")
-        self.wxr.wtp.add_page("Vorlage:Ü-Tabelle", 10, "")
-        self.wxr.wtp.add_page("Vorlage:Referenzen", 10, "")
-
-        page_text = """
-{{Englisch Substantiv Übersicht
-|args=args}}
-
-{{Worttrennung}}
-:item
-
-{{Aussprache}}
-:item
-
-==== {{Übersetzungen}} ====
-{{Ü-Tabelle|1|G=arg|Ü-Liste=
-:item
-}}
-
-{{Referenzen}}
-:item
-"""
-        self.wxr.wtp.start_page("")
-        root = self.wxr.wtp.parse(
-            page_text,
-            pre_expand=True,
-        )
-
-        subsections = fix_level_hierarchy_of_subsections(
-            self.wxr, root.children
-        )
-
-        target_page_text = """==== {{Englisch Substantiv Übersicht\n|args=args}} ====
-
-==== {{Worttrennung}} ====
-:item
-
-==== {{Aussprache}} ====
-:item
-
-==== {{Übersetzungen}} ====
-{{Ü-Tabelle|1|G=arg|Ü-Liste=
-:item
-}}
-
-==== {{Referenzen}} ====
-:item
-"""
-        root = self.wxr.wtp.parse(
-            target_page_text,
-            pre_expand=True,
-        )
-
-        self.assertEqual(
-            [str(s) for s in subsections],
-            [str(t) for t in root.children],
-        )
diff --git a/tests/test_de_pronunciation.py b/tests/test_de_pronunciation.py
new file mode 100644
index 00000000..6fae64eb
--- /dev/null
+++ b/tests/test_de_pronunciation.py
@@ -0,0 +1,176 @@
+import unittest
+from collections import defaultdict
+
+from wikitextprocessor import Wtp
+
+from wiktextract.config import WiktionaryConfig
+from wiktextract.extractor.de.pronunciation import (
+    process_ipa,
+    process_hoerbeispiele,
+)
+from wiktextract.thesaurus import close_thesaurus_db
+from wiktextract.wxr_context import WiktextractContext
+
+
+class TestDEPronunciation(unittest.TestCase):
+    maxDiff = None
+
+    def setUp(self) -> None:
+        self.wxr = WiktextractContext(
+            Wtp(lang_code="de"), WiktionaryConfig(dump_file_lang_code="de")
+        )
+
+    def tearDown(self) -> None:
+        self.wxr.wtp.close_db_conn()
+        close_thesaurus_db(
+            self.wxr.thesaurus_db_path, self.wxr.thesaurus_db_conn
+        )
+
+    def test_de_process_ipa(self):
+        test_cases = [
+            {
+                "input": "{{Lautschrift|ipa1}}",
+                "expected": [
+                    {
+                        "ipa": ["ipa1"],
+                    }
+                ],
+            },
+            {
+                "input": "{{Lautschrift|ipa1|spr=de}}",
+                "expected": [
+                    {"ipa": ["ipa1"], "language": "Deutsch", "lang_code": "de"}
+                ],
+            },
+            {
+                "input": "{{Lautschrift|ipa1}} {{Lautschrift|ipa2}}{{Lautschrift|ipa3|spr=de}}",
+                "expected": [
+                    {"ipa": ["ipa1", "ipa2"]},
+                    {"ipa": ["ipa3"], "language": "Deutsch", "lang_code": "de"},
+                ],
+            },
+            {
+                "input": "{{Lautschrift|ipa1}}, ''tag1'' {{Lautschrift|ipa2}}",
+                "expected": [
+                    {"ipa": ["ipa1"]},
+                    {"ipa": ["ipa2"], "tags": ["tag1"]},
+                ],
+            },
+        ]
+
+        for case in test_cases:
+            with self.subTest(case=case):
+                self.wxr.wtp.start_page("")
+                self.wxr.wtp.add_page("Vorlage:IPA", 10, "")
+                self.wxr.wtp.add_page("Vorlage:Lautschrift", 10, "(Deutsch)")
+
+                self.wxr.wtp.LANGUAGES_BY_CODE["de"] = "Deutsch"
+
+                root = self.wxr.wtp.parse(case["input"])
+
+                sound_data = [defaultdict(list)]
+
+                process_ipa(
+                    self.wxr, sound_data, list(root.filter_empty_str_child())
+                )
+
+                self.assertEqual(sound_data, case["expected"])
+
+    def test_de_process_hoerbeispiele(self):
+        # https://de.wiktionary.org/wiki/Beispiel
+        filename1 = "De-Beispiel.ogg"
+        # https://de.wiktionary.org/wiki/butineur
+        filename2 = "LL-Q150 (fra)-WikiLucas00-butineur.wav"
+        test_cases = [
+            {
+                "input": "{{Audio|" + filename1 + "}}",
+                "expected": [
+                    {
+                        "audio": filename1,
+                        "mp3_url": None,  # None indicates we don't care about the exact value
+                        "ogg_url": None,
+                    }
+                ],
+            },
+            {
+                "input": "{{Audio|"
+                + filename1
+                + "}} {{Audio|"
+                + filename2
+                + "}}",
+                "expected": [
+                    {
+                        "audio": filename1,
+                        "mp3_url": None,
+                        "ogg_url": None,
+                    },
+                    {
+                        "audio": filename2,
+                        "ogg_url": None,
+                        "mp3_url": None,
+                        "wav_url": None,
+                    },
+                ],
+            },
+            {
+                "input": "{{Audio|"
+                + filename1
+                + "}} ''tag1'', ''tag2'' {{Audio|"
+                + filename2
+                + "}}",
+                "expected": [
+                    {
+                        "audio": filename1,
+                        "mp3_url": None,
+                        "ogg_url": None,
+                        "tags": ["tag1"],
+                    },
+                    {
+                        "audio": filename2,
+                        "mp3_url": None,
+                        "ogg_url": None,
+                        "wav_url": None,
+                        "tags": ["tag2"],
+                    },
+                ],
+            },
+        ]
+
+        for case in test_cases:
+            with self.subTest(case=case):
+                self.wxr.wtp.start_page("")
+                self.wxr.wtp.add_page("Vorlage:IPA", 10, "")
+                self.wxr.wtp.add_page("Vorlage:Audio", 10, "")
+
+                self.wxr.wtp.LANGUAGES_BY_CODE["de"] = "Deutsch"
+
+                root = self.wxr.wtp.parse(case["input"])
+
+                sound_data = [defaultdict(list)]
+
+                process_hoerbeispiele(
+                    self.wxr, sound_data, list(root.filter_empty_str_child())
+                )
+
+                self.assertSoundDataMatchesExpected(
+                    sound_data, case["expected"]
+                )
+
+    def assertSoundDataMatchesExpected(self, sound_data, expected):
+        self.assertEqual(
+            len(sound_data),
+            len(expected),
+            f"Mismatch in number of sound data entries{sound_data}",
+        )
+
+        for data, exp in zip(sound_data, expected):
+            for key, value in exp.items():
+                if value is None:
+                    self.assertIn(key, data)
+                else:
+                    self.assertEqual(data[key], value)
+
+            for key in data:
+                self.assertIn(key, exp)
+                if exp[key] is not None:
+                    self.assertEqual(data[key], exp[key])