Skip to content

Commit

Permalink
[it] extract "A cmp" adj forms template
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Dec 19, 2024
1 parent 58f2b7d commit 6ad657c
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 4 deletions.
27 changes: 26 additions & 1 deletion src/wiktextract/extractor/it/tag_form_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from ...wxr_context import WiktextractContext
from .inflection import extract_it_decl_agg_template, extract_tabs_template
from .models import Form, WordEntry
from .tags import translate_raw_tags


def extract_tag_form_line_nodes(
Expand All @@ -16,10 +17,12 @@ def extract_tag_form_line_nodes(
elif isinstance(node, TemplateNode):
if node.template_name.lower() == "tabs":
extract_tabs_template(wxr, word_entry, node)
elif node.template_name.lower() in FORM_LINK_TEMPLATES.keys():
elif node.template_name.lower() in FORM_LINK_TEMPLATES:
extract_form_link_template(wxr, word_entry, node)
elif node.template_name.lower().startswith("it-decl-agg"):
extract_it_decl_agg_template(wxr, word_entry, node)
elif node.template_name.lower() == "a cmp":
extract_a_cmp_template(wxr, word_entry, node)


ITALIC_TAGS = {
Expand Down Expand Up @@ -70,3 +73,25 @@ def extract_form_link_template(
if form != "":
word_entry.forms.append(Form(form=form, tags=["plural"]))
arg_name += 1


def extract_a_cmp_template(
wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
) -> None:
# https://it.wiktionary.org/wiki/Template:A_cmp
expanded_node = wxr.wtp.parse(
wxr.wtp.node_to_wikitext(t_node), expand_all=True
)
raw_tag = ""
for node in expanded_node.find_child(NodeKind.ITALIC | NodeKind.BOLD):
match node.kind:
case NodeKind.ITALIC:
raw_tag = clean_node(wxr, None, node)
case NodeKind.BOLD:
form_str = clean_node(wxr, None, node)
if form_str not in ["", wxr.wtp.title]:
form = Form(form=form_str)
if raw_tag != "":
form.raw_tags.append(raw_tag)
translate_raw_tags(form)
word_entry.forms.append(form)
8 changes: 7 additions & 1 deletion src/wiktextract/extractor/it/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,14 @@
"m e f": ["masculine", "feminine"],
}

FORM_LINE_TEMPLATE_TAGS = {
# https://it.wiktionary.org/wiki/Template:A_cmp
"comparativo": "comparative",
"superlativo": "superlative",
}


TAGS = {**TABLE_TAGS}
TAGS = {**TABLE_TAGS, **FORM_LINE_TEMPLATE_TAGS}


def translate_raw_tags(data: WordEntry) -> None:
Expand Down
33 changes: 31 additions & 2 deletions tests/test_it_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ def test_linkp_template(self):

def test_it_decl_agg(self):
self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
self.wxr.wtp.add_page("Template:It-decl-agg4", 10, """{|
self.wxr.wtp.add_page(
"Template:It-decl-agg4",
10,
"""{|
|- align="center"
|  
!bgcolor="#FFFFE0" color="#000"| ''[[singolare]]'' 
Expand All @@ -69,7 +72,8 @@ def test_it_decl_agg(self):
!bgcolor="#FFFFE0" color="#000"| ''[[maschile]]'' 
|  [[libero]]  
|  [[liberi]]  
|}""")
|}""",
)
data = parse_page(
self.wxr,
"libero",
Expand All @@ -83,3 +87,28 @@ def test_it_decl_agg(self):
data[0]["forms"],
[{"form": "liberi", "tags": ["positive", "masculine", "plural"]}],
)

def test_a_cmp(self):
self.wxr.wtp.add_page("Template:-en-", 10, "Inglese")
self.wxr.wtp.add_page(
"Template:A cmp",
10,
"(''comparativo'' '''[[direr]]''', '''more dire''', ''superlativo'' '''[[direst]]''', '''most dire''')",
)
data = parse_page(
self.wxr,
"dire",
"""== {{-en-}} ==
===Aggettivo===
{{Pn}} {{A cmp|direr|c2=more dire|direst|s2=most dire}}
# [[sinistro]]""",
)
self.assertEqual(
data[0]["forms"],
[
{"form": "direr", "tags": ["comparative"]},
{"form": "more dire", "tags": ["comparative"]},
{"form": "direst", "tags": ["superlative"]},
{"form": "most dire", "tags": ["superlative"]},
],
)

0 comments on commit 6ad657c

Please sign in to comment.