Skip to content

Commit

Permalink
Divide French Wiktionary linkage list item words with slash
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Oct 30, 2023
1 parent f6a6412 commit 1fd8dd9
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/wiktextract/extractor/fr/linkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def process_linkage_list(
) and tag_text.strip().endswith(")"):
tag_text = pending_tag + tag_text
pending_tag = ""
elif tag_text.strip() == ",":
elif tag_text.strip() in {",", "/"}:
# list item has more than one word
page_data[-1][linkage_type].append(linkage_data)
linkage_data = defaultdict(list)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_fr_linkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,22 @@ def test_derives_autres_langues_section(self):
}
],
)

def test_words_divided_by_slash(self):
page_data = [defaultdict(list)]
self.wxr.wtp.start_page("eau")
root = self.wxr.wtp.parse(
"* [[benoîte d’eau]] / [[benoite d’eau]]"
)
extract_linkage(self.wxr, page_data, root, "dérivés")
self.assertEqual(
page_data,
[
{
"derived": [
{"word": "benoîte d’eau"},
{"word": "benoite d’eau"},
]
}
],
)

0 comments on commit 1fd8dd9

Please sign in to comment.