From 737a3d208a3f25a664817ccae82a594ca0046862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20J=C3=A4rventaus?= Date: Mon, 28 Oct 2024 07:56:54 +0200 Subject: [PATCH 1/2] [en] Split tag validation error into two --- src/wiktextract/wiktionary.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/wiktextract/wiktionary.py b/src/wiktextract/wiktionary.py index 1fdbdd66..3ad78cd2 100644 --- a/src/wiktextract/wiktionary.py +++ b/src/wiktextract/wiktionary.py @@ -248,14 +248,25 @@ def check_tags( from .tags import uppercase_tags, valid_tags if tag not in valid_tags and tag not in uppercase_tags: - check_error( - wxr, - dt, - word, - lang, - pos, - f"invalid tag {tag} not in valid_tags(or uppercase_tags)", - ) + if len(tag) > 0 and tag[0].isupper(): + check_error( + wxr, + dt, + word, + lang, + pos, + f"invalid uppercase tag {tag} not in or uppercase_tags", + ) + else: + check_error( + wxr, + dt, + word, + lang, + pos, + f"invalid tag {tag} not in valid_tags " + "or uppercase_tags", + ) def check_str_fields( From 945ca9e18f2271b392f0c5433bd301fb8a681598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20J=C3=A4rventaus?= Date: Mon, 28 Oct 2024 08:31:41 +0200 Subject: [PATCH 2/2] [en] wiktionary.py: add suffixes to `called_from` in debug messages --- src/wiktextract/wiktionary.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/wiktextract/wiktionary.py b/src/wiktextract/wiktionary.py index 3ad78cd2..6ff7879c 100644 --- a/src/wiktextract/wiktionary.py +++ b/src/wiktextract/wiktionary.py @@ -178,8 +178,13 @@ def check_error( lang: str | None, pos: str | None, msg: str, + called_from: str | None = None, ) -> None: """Formats and outputs an error message about data format checks.""" + if called_from is None: + called_from = "wiktionary/179/20240425" + else: + called_from = "wiktionary/179/20240425" + called_from msg += ": " + json.dumps(dt, sort_keys=True, ensure_ascii=False) prefix = word or "" if lang: @@ -198,7 +203,7 @@ def check_error( "title": word, "section": lang, "subsection": pos, - "called_from": "wiktionary/179/20240425", + "called_from": called_from, "path": tuple(), } config.debugs.append(error_data) @@ -256,6 +261,7 @@ def check_tags( lang, pos, f"invalid uppercase tag {tag} not in or uppercase_tags", + called_from="uppercase_tags", ) else: check_error(