diff --git a/src/wiktextract/extractor/it/tag_form_line.py b/src/wiktextract/extractor/it/tag_form_line.py index b7e1da16..71b2767b 100644 --- a/src/wiktextract/extractor/it/tag_form_line.py +++ b/src/wiktextract/extractor/it/tag_form_line.py @@ -4,6 +4,7 @@ from ...wxr_context import WiktextractContext from .inflection import extract_it_decl_agg_template, extract_tabs_template from .models import Form, WordEntry +from .tags import translate_raw_tags def extract_tag_form_line_nodes( @@ -16,10 +17,12 @@ def extract_tag_form_line_nodes( elif isinstance(node, TemplateNode): if node.template_name.lower() == "tabs": extract_tabs_template(wxr, word_entry, node) - elif node.template_name.lower() in FORM_LINK_TEMPLATES.keys(): + elif node.template_name.lower() in FORM_LINK_TEMPLATES: extract_form_link_template(wxr, word_entry, node) elif node.template_name.lower().startswith("it-decl-agg"): extract_it_decl_agg_template(wxr, word_entry, node) + elif node.template_name.lower() == "a cmp": + extract_a_cmp_template(wxr, word_entry, node) ITALIC_TAGS = { @@ -70,3 +73,25 @@ def extract_form_link_template( if form != "": word_entry.forms.append(Form(form=form, tags=["plural"])) arg_name += 1 + + +def extract_a_cmp_template( + wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode +) -> None: + # https://it.wiktionary.org/wiki/Template:A_cmp + expanded_node = wxr.wtp.parse( + wxr.wtp.node_to_wikitext(t_node), expand_all=True + ) + raw_tag = "" + for node in expanded_node.find_child(NodeKind.ITALIC | NodeKind.BOLD): + match node.kind: + case NodeKind.ITALIC: + raw_tag = clean_node(wxr, None, node) + case NodeKind.BOLD: + form_str = clean_node(wxr, None, node) + if form_str not in ["", wxr.wtp.title]: + form = Form(form=form_str) + if raw_tag != "": + form.raw_tags.append(raw_tag) + translate_raw_tags(form) + word_entry.forms.append(form) diff --git a/src/wiktextract/extractor/it/tags.py b/src/wiktextract/extractor/it/tags.py index 036ae1f9..1d9d5939 100644 --- a/src/wiktextract/extractor/it/tags.py +++ b/src/wiktextract/extractor/it/tags.py @@ -12,8 +12,14 @@ "m e f": ["masculine", "feminine"], } +FORM_LINE_TEMPLATE_TAGS = { + # https://it.wiktionary.org/wiki/Template:A_cmp + "comparativo": "comparative", + "superlativo": "superlative", +} + -TAGS = {**TABLE_TAGS} +TAGS = {**TABLE_TAGS, **FORM_LINE_TEMPLATE_TAGS} def translate_raw_tags(data: WordEntry) -> None: diff --git a/tests/test_it_forms.py b/tests/test_it_forms.py index 8ebeddf9..75a572ea 100644 --- a/tests/test_it_forms.py +++ b/tests/test_it_forms.py @@ -58,7 +58,10 @@ def test_linkp_template(self): def test_it_decl_agg(self): self.wxr.wtp.add_page("Template:-it-", 10, "Italiano") - self.wxr.wtp.add_page("Template:It-decl-agg4", 10, """{| + self.wxr.wtp.add_page( + "Template:It-decl-agg4", + 10, + """{| |- align="center" |   !bgcolor="#FFFFE0" color="#000"| ''[[singolare]]''  @@ -69,7 +72,8 @@ def test_it_decl_agg(self): !bgcolor="#FFFFE0" color="#000"| ''[[maschile]]''  |  [[libero]]   |  [[liberi]]   -|}""") +|}""", + ) data = parse_page( self.wxr, "libero", @@ -83,3 +87,28 @@ def test_it_decl_agg(self): data[0]["forms"], [{"form": "liberi", "tags": ["positive", "masculine", "plural"]}], ) + + def test_a_cmp(self): + self.wxr.wtp.add_page("Template:-en-", 10, "Inglese") + self.wxr.wtp.add_page( + "Template:A cmp", + 10, + "(''comparativo'' '''[[direr]]''', '''more dire''', ''superlativo'' '''[[direst]]''', '''most dire''')", + ) + data = parse_page( + self.wxr, + "dire", + """== {{-en-}} == +===Aggettivo=== +{{Pn}} {{A cmp|direr|c2=more dire|direst|s2=most dire}} +# [[sinistro]]""", + ) + self.assertEqual( + data[0]["forms"], + [ + {"form": "direr", "tags": ["comparative"]}, + {"form": "more dire", "tags": ["comparative"]}, + {"form": "direst", "tags": ["superlative"]}, + {"form": "most dire", "tags": ["superlative"]}, + ], + )