Skip to content

Commit

Permalink
Update typings and es.json for Spanish Wiktionary
Browse files Browse the repository at this point in the history
This work is a contribution to the EWOK project, which receives funding from LABEX ASLAN (ANR–10–LABX–0081) at the Université de Lyon, as part of the "Investissements d'Avenir" program initiated and overseen by the Agence Nationale de la Recherche (ANR) in France.
  • Loading branch information
empiriker committed Dec 1, 2023
1 parent cbc62fe commit 88a79b2
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 30 deletions.
19 changes: 19 additions & 0 deletions json_schema/es.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@
"title": "Glosses",
"type": "array"
},
"senseid": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"description": "Sense number used in Wiktionary",
"title": "Senseid"
},
"subsenses": {
"default": [],
"description": "List of subsenses",
Expand Down Expand Up @@ -80,6 +93,12 @@
"title": "Pos",
"type": "string"
},
"pos_title": {
"default": null,
"description": "Original POS title",
"title": "Pos Title",
"type": "string"
},
"senses": {
"anyOf": [
{
Expand Down
25 changes: 1 addition & 24 deletions src/wiktextract/extractor/es/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@
import logging
from typing import Optional

from pydantic import (
BaseModel,
ConfigDict,
Field,
ValidationError,
model_validator,
)
from pydantic import BaseModel, ConfigDict, Field, model_validator
from pydantic.json_schema import GenerateJsonSchema

from wiktextract.wxr_context import WiktextractContext
Expand All @@ -30,23 +24,6 @@ def debug(
class BaseModelWrap(BaseModel):
model_config = ConfigDict(validate_assignment=True)

# def update(self, data: dict):
# for k, v in data.items():
# setattr(self, k, v)
# return self

# def get(self, key: str, _=None):
# return getattr(self, key)

# def __getitem__(self, item):
# return getattr(self, item)

# def __setitem__(self, item, value):
# try:
# setattr(self, item, value)
# except ValidationError:
# pass


class LoggingExtraFieldsModel(BaseModelWrap):
@model_validator(mode="before")
Expand Down
12 changes: 6 additions & 6 deletions src/wiktextract/extractor/es/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@

def parse_section(
wxr: WiktextractContext,
page_data: List[Dict],
base_data: Dict,
page_data: List[WordEntry],
base_data: WordEntry,
level_node: WikiNode,
) -> None:
# Page Structure: https://es.wiktionary.org/wiki/Wikcionario:Estructura
Expand All @@ -53,13 +53,14 @@ def parse_section(

def process_pos_block(
wxr: WiktextractContext,
page_data: List[Dict],
base_data: Dict,
page_data: List[WordEntry],
base_data: WordEntry,
pos_level_node: WikiNode,
pos_template_name: str,
pos_title: str,
):
pos_type = wxr.config.POS_SUBTITLES[pos_template_name]["pos"]

page_data.append(copy.deepcopy(base_data))
page_data[-1].pos = pos_type
page_data[-1].pos_title = pos_title
Expand Down Expand Up @@ -91,7 +92,7 @@ def process_pos_block(

def parse_page(
wxr: WiktextractContext, page_title: str, page_text: str
) -> List[Dict[str, str]]:
) -> List[Dict[str, any]]:
if wxr.config.verbose:
logging.info(f"Parsing page: {page_title}")
# Pass current wiktextractcontext to pydantic for more better logging
Expand Down Expand Up @@ -128,7 +129,6 @@ def parse_page(
lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
)
base_data.categories.extend(categories["categories"])
# page_data.append(copy.deepcopy(base_data))
for level3_node in level2_node.find_child(NodeKind.LEVEL3):
parse_section(wxr, page_data, base_data, level3_node)

Expand Down

0 comments on commit 88a79b2

Please sign in to comment.