Skip to content

Commit

Permalink
Merge pull request #952 from xxyzz/pt
Browse files Browse the repository at this point in the history
[pt, it] add "source" field and translate some tags
  • Loading branch information
xxyzz authored Dec 20, 2024
2 parents 86243d8 + 355d415 commit d8cb2f3
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 4 deletions.
26 changes: 24 additions & 2 deletions src/wiktextract/extractor/it/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"chimica industriale": "chemistry",
"chirurgia": "surgery",
"cinematografia": "cinematography",
"colori": "color",
"colore": "color",
"commercio": "commerce",
# "composti organici": "",
# "composti inorganici": "",
Expand Down Expand Up @@ -191,8 +191,30 @@
"volgare": "vulgar",
}

# https://it.wiktionary.org/wiki/Categoria:Template_ambito
GLOSS_LIST_TEMPATE_TAGS = {
"accrescitivo": "augmentative", # Template:Accr
"colloquiale": "colloquial", # Template:Coll
"diminutivo": "diminutive", # Template:Dim
"per estensione": "broadly", # Template:Est
"senso figurato": "figuratively", # Template:Fig
"letteralmente": "literally", # Template:Lett
"peggiorativo": "pejorative", # Template:Pegg
"riferito solo a persone": "person", # Template:Pers
"per sineddoche": "synecdoche", # Template:Sndc
"specialmente al plurale": ["especially", "in-plural"], # Template:Spec pl
"spregiativo": "pejorative", # Template:Spreg
"vezzeggiativo": "endearing", # Template:Vezz
"volgare": "vulgar", # Template:Vulg
}


TAGS = {**TABLE_TAGS, **FORM_LINE_TEMPLATE_TAGS, **TERM_TEMPLATE_TAGS}
TAGS = {
**TABLE_TAGS,
**FORM_LINE_TEMPLATE_TAGS,
**TERM_TEMPLATE_TAGS,
**GLOSS_LIST_TEMPATE_TAGS,
}


def translate_raw_tags(data: WordEntry) -> None:
Expand Down
18 changes: 16 additions & 2 deletions src/wiktextract/extractor/pt/linkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,21 @@ def extract_linkage_section(
linkage_type: str,
sense: str,
sense_index: int,
source: str,
) -> None:
for node in level_node.children:
if isinstance(node, TemplateNode) and node.template_name == "fraseini":
sense, sense_index = extract_fraseini_template(wxr, node)
elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
for list_item in node.find_child(NodeKind.LIST_ITEM):
extract_linkage_list_item(
wxr, word_entry, list_item, linkage_type, sense, sense_index
wxr,
word_entry,
list_item,
linkage_type,
sense,
sense_index,
source,
)


Expand All @@ -104,6 +111,7 @@ def extract_linkage_list_item(
linkage_type: str,
sense: str,
sense_index: int,
source: str,
) -> None:
linkage_words = []
raw_tags = []
Expand Down Expand Up @@ -161,6 +169,7 @@ def extract_linkage_list_item(
linkage_type,
sense,
sense_index,
source,
)
elif isinstance(node, str):
m = re.search(r"\((.+)\)", node)
Expand All @@ -169,7 +178,11 @@ def extract_linkage_list_item(

for word in linkage_words:
linkage = Linkage(
word=word, sense=sense, sense_index=sense_index, raw_tags=raw_tags
word=word,
sense=sense,
sense_index=sense_index,
raw_tags=raw_tags,
source=source,
)
translate_raw_tags(linkage)
getattr(word_entry, linkage_type).append(linkage)
Expand Down Expand Up @@ -206,4 +219,5 @@ def extract_wikisaurus_page(
linkage_type,
sense,
sense_index,
page_title,
)
1 change: 1 addition & 0 deletions src/wiktextract/extractor/pt/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class Linkage(PortugueseBaseModel):
sense_index: int = Field(
default=0, ge=0, description="Number of the definition, start from 1"
)
source: str = ""


class Sound(PortugueseBaseModel):
Expand Down
1 change: 1 addition & 0 deletions src/wiktextract/extractor/pt/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def parse_section(
LINKAGE_SECTIONS[title_text],
"",
0,
"",
)
elif title_text == "Etimologia":
extract_etymology_section(wxr, page_data, level_node)
Expand Down

0 comments on commit d8cb2f3

Please sign in to comment.