From 14b24fba18661ef199b9907ed55e2bab6058b1fb Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Tue, 17 Dec 2024 10:23:27 +0800
Subject: [PATCH 1/5] [it] extract plain text tags in linkage list

---
 src/wiktextract/extractor/it/linkage.py |  4 +++-
 tests/test_it_linkage.py                | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/wiktextract/extractor/it/linkage.py b/src/wiktextract/extractor/it/linkage.py
index 75abd5d5..484a84df 100644
--- a/src/wiktextract/extractor/it/linkage.py
+++ b/src/wiktextract/extractor/it/linkage.py
@@ -43,7 +43,9 @@ def extract_linkage_list_item(
         elif isinstance(node, str):
             for word_str in node.split(","):
                 word_str = word_str.strip()
-                if word_str != "":
+                if word_str.startswith("(") and word_str.endswith(")"):
+                    raw_tags.append(word_str.strip("()"))
+                elif word_str != "":
                     linkages.append(Linkage(word=word_str, raw_tags=raw_tags))
                     raw_tags.clear()
 
diff --git a/tests/test_it_linkage.py b/tests/test_it_linkage.py
index 3aabea24..ea968969 100644
--- a/tests/test_it_linkage.py
+++ b/tests/test_it_linkage.py
@@ -42,3 +42,19 @@ def test_synonyms(self):
                 {"word": "intenso"},
             ],
         )
+
+    def test_text_tag(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "cane",
+            """== {{-it-}} ==
+===Sostantivo===
+# [[animale]]
+===Iperonimi===
+* (dominio) [[eucariote]]""",
+        )
+        self.assertEqual(
+            data[0]["hypernyms"],
+            [{"word": "eucariote", "raw_tags": ["dominio"]}],
+        )

From 22517f2aabb52ca4a48c3b0b53110ed3ef9124b4 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Tue, 17 Dec 2024 10:46:25 +0800
Subject: [PATCH 2/5] [it] extract proverb section

---
 src/wiktextract/extractor/it/linkage.py | 23 ++++++++++++++++++++++-
 src/wiktextract/extractor/it/models.py  |  1 +
 tests/test_it_linkage.py                | 21 +++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/wiktextract/extractor/it/linkage.py b/src/wiktextract/extractor/it/linkage.py
index 484a84df..0314fd99 100644
--- a/src/wiktextract/extractor/it/linkage.py
+++ b/src/wiktextract/extractor/it/linkage.py
@@ -14,7 +14,11 @@ def extract_linkage_section(
     linkages = []
     for list_node in level_node.find_child(NodeKind.LIST):
         for list_item in list_node.find_child(NodeKind.LIST_ITEM):
-            linkages.extend(extract_linkage_list_item(wxr, list_item))
+            linkages.extend(
+                extract_proverb_list_item(wxr, list_item)
+                if linkage_type == "proverbs"
+                else extract_linkage_list_item(wxr, list_item)
+            )
 
     for data in page_data:
         if data.lang_code == page_data[-1].lang_code:
@@ -50,3 +54,20 @@ def extract_linkage_list_item(
                     raw_tags.clear()
 
     return linkages
+
+
+def extract_proverb_list_item(
+    wxr: WiktextractContext, list_item: WikiNode
+) -> list[Linkage]:
+    proverb = Linkage(word="")
+    for index, node in enumerate(list_item.children):
+        if isinstance(node, WikiNode) and node.kind == NodeKind.ITALIC:
+            proverb.word = clean_node(wxr, None, node)
+        elif isinstance(node, str) and ":" in node:
+            proverb.sense = clean_node(
+                wxr,
+                None,
+                [node[node.index(":") + 1 :]] + list_item.children[index + 1 :],
+            )
+            break
+    return [proverb] if proverb.word != "" else []
diff --git a/src/wiktextract/extractor/it/models.py b/src/wiktextract/extractor/it/models.py
index 7ba272a7..44cf2f7f 100644
--- a/src/wiktextract/extractor/it/models.py
+++ b/src/wiktextract/extractor/it/models.py
@@ -72,6 +72,7 @@ class Linkage(ItalianBaseModel):
     word: str
     tags: list[str] = []
     raw_tags: list[str] = []
+    sense: str = ""
 
 
 class WordEntry(ItalianBaseModel):
diff --git a/tests/test_it_linkage.py b/tests/test_it_linkage.py
index ea968969..f1cf5c23 100644
--- a/tests/test_it_linkage.py
+++ b/tests/test_it_linkage.py
@@ -58,3 +58,24 @@ def test_text_tag(self):
             data[0]["hypernyms"],
             [{"word": "eucariote", "raw_tags": ["dominio"]}],
         )
+
+    def test_proverbs(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "cane",
+            """== {{-it-}} ==
+===Sostantivo===
+# [[animale]]
+===Proverbi e modi di dire===
+* ''Menare il '''can''' per l'aia'': tergiversare, prendere tempo""",
+        )
+        self.assertEqual(
+            data[0]["proverbs"],
+            [
+                {
+                    "word": "Menare il can per l'aia",
+                    "sense": "tergiversare, prendere tempo",
+                }
+            ],
+        )

From 1645b6f12403a1422b40fe4c7bcddd8f7c06dd12 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Tue, 17 Dec 2024 12:17:43 +0800
Subject: [PATCH 3/5] [it] override "Template:-verb-"

it's contents are inside "noinclude" tag
---
 src/wiktextract/data/overrides/it.json      | 5 +++++
 src/wiktextract/extractor/it/translation.py | 1 +
 tests/test_it_etymology.py                  | 2 +-
 tests/test_it_translation.py                | 2 +-
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/wiktextract/data/overrides/it.json b/src/wiktextract/data/overrides/it.json
index e02978bd..42fd7447 100644
--- a/src/wiktextract/data/overrides/it.json
+++ b/src/wiktextract/data/overrides/it.json
@@ -8,5 +8,10 @@
     "body": "===Note / Riferimenti===\n",
     "namespace_id": 10,
     "need_pre_expand": true
+  },
+  "Template:-verb-": {
+    "body": "{{Sezione voce|Immagine=Open_book_01.svg|Dimensione=30px|Sezione=verbo|Sezione al plurale=verbi|Genere=m|Lingua={{{1|}}}}}{{#invoke:Categorizzazione verbi italiani|main|{{{1|}}}}}",
+    "namespace_id": 10,
+    "need_pre_expand": true
   }
 }
diff --git a/src/wiktextract/extractor/it/translation.py b/src/wiktextract/extractor/it/translation.py
index 8467177c..e1cde868 100644
--- a/src/wiktextract/extractor/it/translation.py
+++ b/src/wiktextract/extractor/it/translation.py
@@ -13,6 +13,7 @@ def extract_translation_section(
     page_data: list[WordEntry],
     level_node: LevelNode,
 ) -> None:
+    # https://it.wiktionary.org/wiki/Aiuto:Traduzioni
     sense = ""
     translations = []
     cats = {}
diff --git a/tests/test_it_etymology.py b/tests/test_it_etymology.py
index e228a363..110bc18a 100644
--- a/tests/test_it_etymology.py
+++ b/tests/test_it_etymology.py
@@ -7,7 +7,7 @@
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestItGloss(TestCase):
+class TestItEtymology(TestCase):
     maxDiff = None
 
     def setUp(self) -> None:
diff --git a/tests/test_it_translation.py b/tests/test_it_translation.py
index 5566da77..c91ee5e6 100644
--- a/tests/test_it_translation.py
+++ b/tests/test_it_translation.py
@@ -7,7 +7,7 @@
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestItGloss(TestCase):
+class TestItTranslation(TestCase):
     maxDiff = None
 
     def setUp(self) -> None:

From b7ab69f1ec2eded41ab5968c9420b348692f021d Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Tue, 17 Dec 2024 15:34:57 +0800
Subject: [PATCH 4/5] [it] handle pos subsection templates

---
 src/wiktextract/extractor/it/pos.py |  37 +++++++++-
 tests/test_it_gloss.py              | 104 ++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/src/wiktextract/extractor/it/pos.py b/src/wiktextract/extractor/it/pos.py
index f863ff48..c8b59530 100644
--- a/src/wiktextract/extractor/it/pos.py
+++ b/src/wiktextract/extractor/it/pos.py
@@ -7,8 +7,24 @@
 from .section_titles import POS_DATA
 from .tag_form_line import extract_tag_form_line_nodes
 
+# https://it.wiktionary.org/wiki/Categoria:Template_per_i_verbi
+POS_SUBSECTION_TEMPLATES = frozenset(
+    [
+        "-participio passato-",
+        "-participio presente-",
+        "Ausiliare",
+        "Deponente",
+        "Intransitivo",
+        "Medio",
+        "Passivo",
+        "Reciproco",
+        "Riflessivo",
+        "Transitivo",
+    ]
+)
 
-def extract_pos_section(
+
+def add_new_pos_data(
     wxr: WiktextractContext,
     page_data: list[WordEntry],
     base_data: WordEntry,
@@ -23,6 +39,15 @@ def extract_pos_section(
     for link_node in level_node.find_child(NodeKind.LINK):
         clean_node(wxr, page_data[-1], link_node)
 
+
+def extract_pos_section(
+    wxr: WiktextractContext,
+    page_data: list[WordEntry],
+    base_data: WordEntry,
+    level_node: LevelNode,
+    pos_title: str,
+) -> None:
+    add_new_pos_data(wxr, page_data, base_data, level_node, pos_title)
     first_gloss_list_index = len(level_node.children)
     for index, node in enumerate(level_node.children):
         if (
@@ -35,6 +60,16 @@ def extract_pos_section(
                 extract_gloss_list_item(wxr, page_data[-1], list_item)
             if index < first_gloss_list_index:
                 first_gloss_list_index = index
+        elif (
+            isinstance(node, TemplateNode)
+            and node.template_name in POS_SUBSECTION_TEMPLATES
+        ):
+            if len(page_data[-1].senses) > 0:
+                add_new_pos_data(
+                    wxr, page_data, base_data, level_node, pos_title
+                )
+            raw_tag = clean_node(wxr, page_data[-1], node).strip("= \n")
+            page_data[-1].raw_tags.append(raw_tag)
 
     extract_tag_form_line_nodes(
         wxr, page_data[-1], level_node.children[:first_gloss_list_index]
diff --git a/tests/test_it_gloss.py b/tests/test_it_gloss.py
index 2863cbfd..35774147 100644
--- a/tests/test_it_gloss.py
+++ b/tests/test_it_gloss.py
@@ -54,3 +54,107 @@ def test_gloss_list(self):
                 }
             ],
         )
+
+    def test_double_pos_subsection_templates(self):
+        self.wxr.wtp.add_page("Template:-la-", 10, "Latino")
+        self.wxr.wtp.add_page(
+            "Template:Intransitivo",
+            10,
+            """====[[intransitivo|Intransitivo]]====
+[[Categoria:Verbi intransitivi_in_latino]]""",
+        )
+        self.wxr.wtp.add_page(
+            "Template:Deponente",
+            10,
+            """====[[deponente|Deponente]]====
+[[Categoria:Verbi deponenti_in_latino]]""",
+        )
+        data = parse_page(
+            self.wxr,
+            "aboriscor",
+            """== {{-la-}} ==
+===[[Image:Open_book_01.svg|30px|]]''[[verbo|Verbo]]''===
+[[Categoria:Verbi in latino]]
+{{Intransitivo|la}}
+{{Deponente|la}}
+'''ăbŏriscor'''
+
+# [[venir]] [[meno]]""",
+        )
+        self.assertEqual(
+            data,
+            [
+                {
+                    "lang": "Latino",
+                    "lang_code": "la",
+                    "word": "aboriscor",
+                    "pos": "verb",
+                    "pos_title": "Verbo",
+                    "categories": [
+                        "Verbi in latino",
+                        "Verbi intransitivi_in_latino",
+                        "Verbi deponenti_in_latino",
+                    ],
+                    "senses": [{"glosses": ["venir meno"]}],
+                    "raw_tags": ["Intransitivo", "Deponente"],
+                }
+            ],
+        )
+
+    def test_subsecton_template_add_new_word_entry(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        self.wxr.wtp.add_page(
+            "Template:Ausiliare",
+            10,
+            """====[[ausiliare|Ausiliare]]====
+[[Categoria:Verbi ausiliari_in_italiano]]""",
+        )
+        self.wxr.wtp.add_page(
+            "Template:Intransitivo",
+            10,
+            """====[[intransitivo|Intransitivo]]====
+[[Categoria:Verbi intransitivi_in_latino]]""",
+        )
+        data = parse_page(
+            self.wxr,
+            "essere",
+            """== {{-it-}} ==
+===[[Image:Open_book_01.svg|30px|]]''[[verbo|Verbo]]''===
+[[Categoria:Verbi in italiano]]
+{{Ausiliare|it}}
+# serve per la coniugazione
+
+{{Intransitivo|it}}
+# Questo verbo serve per dire""",
+        )
+        self.assertEqual(
+            data,
+            [
+                {
+                    "lang": "Italiano",
+                    "lang_code": "it",
+                    "word": "essere",
+                    "pos": "verb",
+                    "pos_title": "Verbo",
+                    "categories": [
+                        "Verbi in italiano",
+                        "Verbi ausiliari_in_italiano",
+                    ],
+                    "senses": [{"glosses": ["serve per la coniugazione"]}],
+                    "raw_tags": ["Ausiliare"],
+                },
+                {
+                    "lang": "Italiano",
+                    "lang_code": "it",
+                    "word": "essere",
+                    "pos": "verb",
+                    "pos_title": "Verbo",
+                    "categories": [
+                        "Verbi in italiano",
+                        "Verbi intransitivi_in_latino",
+                    ],
+                    "senses": [{"glosses": ["Questo verbo serve per dire"]}],
+                    "raw_tags": ["Intransitivo"],
+                },
+            ],
+        )

From d533e569fdd61a8bad6bc6d26079aa738f097161 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Tue, 17 Dec 2024 16:58:51 +0800
Subject: [PATCH 5/5] [it] handle example list with more than one italic nodes
 layout

---
 src/wiktextract/extractor/it/example.py | 44 ++++++++++++--
 src/wiktextract/extractor/it/pos.py     | 12 ++--
 tests/test_it_example.py                | 76 +++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/src/wiktextract/extractor/it/example.py b/src/wiktextract/extractor/it/example.py
index d8ce4ad6..10b56d58 100644
--- a/src/wiktextract/extractor/it/example.py
+++ b/src/wiktextract/extractor/it/example.py
@@ -14,12 +14,15 @@ def extract_example_list_item(
     text_nodes = []
     roman = ""
     translation = ""
+    ref = ""
+    has_zh_tradsem = False
     for index, node in enumerate(list_item.children):
         if (
             isinstance(node, TemplateNode)
             and node.template_name == "zh-tradsem"
         ):
             examples.extend(extract_zh_tradsem(wxr, node))
+            has_zh_tradsem = True
         elif isinstance(node, WikiNode):
             match node.kind:
                 case NodeKind.ITALIC:
@@ -39,17 +42,38 @@ def extract_example_list_item(
                 case _ if lang_code in ["zh", "ja"]:
                     if before_italic:
                         text_nodes.append(node)
-        elif (
-            isinstance(node, str) and lang_code in ["zh", "ja"] and "-" in node
-        ):
+        elif isinstance(node, str) and "-" in node:
+            for t_node in list_item.find_child(NodeKind.TEMPLATE):
+                if t_node.template_name == "Term":
+                    ref = clean_node(wxr, None, t_node).strip("()")
+                    break
             translation = clean_node(
                 wxr,
                 sense,
                 wxr.wtp.node_to_wikitext(
                     [node[node.index("-") + 1 :]]
-                    + list_item.children[index + 1 :]
+                    + [
+                        n
+                        for n in list_item.children[index + 1 :]
+                        if not (
+                            isinstance(n, TemplateNode)
+                            and n.template_name == "Term"
+                        )
+                    ]
                 ),
             )
+            if not has_zh_tradsem and len(examples) > 1:
+                examples.clear()
+                examples.append(
+                    Example(
+                        text=clean_node(
+                            wxr,
+                            None,
+                            list_item.children[:index]
+                            + [node[: node.index("-")]],
+                        )
+                    )
+                )
             break
         elif lang_code in ["zh", "ja"] and len(examples) == 0 and before_italic:
             text_nodes.append(node)
@@ -69,11 +93,23 @@ def extract_example_list_item(
         )
         examples.append(example)
 
+    if not has_zh_tradsem and len(examples) > 1:
+        examples.clear()
+        examples.append(
+            Example(
+                text=clean_node(
+                    wxr, None, list(list_item.invert_find_child(NodeKind.LIST))
+                )
+            )
+        )
+
     for example in examples:
         if roman != "":
             example.roman = roman
         if translation != "":
             example.translation = translation
+        if ref != "":
+            example.ref = ref
         if example.text != "":
             sense.examples.append(example)
 
diff --git a/src/wiktextract/extractor/it/pos.py b/src/wiktextract/extractor/it/pos.py
index c8b59530..bded7d5c 100644
--- a/src/wiktextract/extractor/it/pos.py
+++ b/src/wiktextract/extractor/it/pos.py
@@ -91,12 +91,7 @@ def extract_gloss_list_item(
             else:
                 gloss_nodes.append(t_str)
         elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
-            if node.sarg.endswith("*"):
-                for example_list_item in node.find_child(NodeKind.LIST_ITEM):
-                    extract_example_list_item(
-                        wxr, sense, example_list_item, word_entry.lang_code
-                    )
-            elif (
+            if (
                 node.sarg.endswith(":")
                 and len(sense.examples) > 0
                 and sense.examples[-1].translation == ""
@@ -105,6 +100,11 @@ def extract_gloss_list_item(
                     sense.examples[-1].translation = clean_node(
                         wxr, sense, tr_list_item.children
                     )
+            elif node.sarg.endswith(("*", ":")):
+                for example_list_item in node.find_child(NodeKind.LIST_ITEM):
+                    extract_example_list_item(
+                        wxr, sense, example_list_item, word_entry.lang_code
+                    )
         else:
             gloss_nodes.append(node)
     gloss_str = clean_node(wxr, sense, gloss_nodes)
diff --git a/tests/test_it_example.py b/tests/test_it_example.py
index 11b6747c..e8079358 100644
--- a/tests/test_it_example.py
+++ b/tests/test_it_example.py
@@ -139,3 +139,79 @@ def test_zh_tradsem(self):
                 }
             ],
         )
+
+    def test_double_italic_nodes_with_translation(self):
+        self.wxr.wtp.add_page("Template:-en-", 10, "Inglese")
+        data = parse_page(
+            self.wxr,
+            "water",
+            """== {{-en-}} ==
+===Sostantivo===
+# acqua
+#: ''May I have a glass of '''water'''?'' - ''Posso avere un bicchiere d''''acqua'''''?""",
+        )
+        self.assertEqual(
+            data[0]["senses"],
+            [
+                {
+                    "glosses": ["acqua"],
+                    "examples": [
+                        {
+                            "text": "May I have a glass of water?",
+                            "translation": "Posso avere un bicchiere d'acqua?",
+                        }
+                    ],
+                }
+            ],
+        )
+
+    def test_double_italic_nodes_no_translation(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "essere",
+            """== {{-it-}} ==
+===Sostantivo===
+#chi [[esiste]]
+#* ''gli '''esseri''' viventi''; ''gli '''esseri''' animati''""",
+        )
+        self.assertEqual(
+            data[0]["senses"],
+            [
+                {
+                    "glosses": ["chi esiste"],
+                    "examples": [
+                        {"text": "gli esseri viventi; gli esseri animati"}
+                    ],
+                }
+            ],
+        )
+
+    def test_term_ref_template(self):
+        self.wxr.wtp.add_page("Template:-la-", 10, "Latino")
+        self.wxr.wtp.add_page("Template:Term", 10, "({{{1}}})")
+        data = parse_page(
+            self.wxr,
+            "libero",
+            """== {{-la-}} ==
+===Verbo===
+# [[assolvere]], [[liberare]] dalle [[accuse]], [[giudicare]] [[innocente]]
+#* ''et eum omni [[ignominia]] '''liberat''''' - e lo [[assolve]] da ogni [[ignominia]] {{Term|[[:w:Marco Tullio Cicerone|Cicerone]], [[:w:Pro Cluentio|Pro Cluentio]], [[:s:la:Pro_Aulo_Cluentio_Habito|XLVII, 132]]}}""",
+        )
+        self.assertEqual(
+            data[0]["senses"],
+            [
+                {
+                    "glosses": [
+                        "assolvere, liberare dalle accuse, giudicare innocente"
+                    ],
+                    "examples": [
+                        {
+                            "text": "et eum omni ignominia liberat",
+                            "translation": "e lo assolve da ogni ignominia",
+                            "ref": "Cicerone, Pro Cluentio, XLVII, 132",
+                        }
+                    ],
+                }
+            ],
+        )