From 8e55f44e93ad395d331c7295d0396a868dc252c3 Mon Sep 17 00:00:00 2001 From: Joerg Herbel Date: Tue, 21 May 2024 15:46:58 +0200 Subject: [PATCH] Check for forbidden HTML tags CMK-17499 --- checkmk_weblate_syncer/html_tags.py | 18 ++++++ checkmk_weblate_syncer/portable_object.py | 12 ++++ checkmk_weblate_syncer/update_sources.py | 9 ++- checkmk_weblate_syncer/update_translations.py | 14 ++++- tests/test_html_tags.py | 59 ++++++++++++++++++ tests/test_portable_object.py | 62 +++++++++++++++++++ 6 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 checkmk_weblate_syncer/html_tags.py create mode 100644 tests/test_html_tags.py diff --git a/checkmk_weblate_syncer/html_tags.py b/checkmk_weblate_syncer/html_tags.py new file mode 100644 index 0000000..3df6557 --- /dev/null +++ b/checkmk_weblate_syncer/html_tags.py @@ -0,0 +1,18 @@ +import re + +# keep in sync with tests/pylint/checker_localization.py:HTMLTagsChecker +_TAG_PATTERN = re.compile("<.*?>") +_ALLOWED_TAGS_PATTERN = re.compile( + r"" +) + + +def forbidden_tags(text: str) -> set[str]: + return { + tag + for tag in re.findall( + _TAG_PATTERN, + text, + ) + if not re.match(_ALLOWED_TAGS_PATTERN, tag) + } diff --git a/checkmk_weblate_syncer/portable_object.py b/checkmk_weblate_syncer/portable_object.py index de047ce..8291bb5 100644 --- a/checkmk_weblate_syncer/portable_object.py +++ b/checkmk_weblate_syncer/portable_object.py @@ -2,6 +2,18 @@ from pathlib import Path +def remove_header(portable_object_content: str) -> str: + lines = portable_object_content.splitlines() + index_first_source_string_location = 0 + for index, line in enumerate(lines): + if re.match(r"^#: .*?:\d+$", line): + index_first_source_string_location = index + break + return "\n".join(lines[index_first_source_string_location:]) + ( + "\n" if portable_object_content.endswith("\n") else "" + ) + + def make_soure_string_locations_relative( portable_object_content: str, relative_to: Path, diff --git a/checkmk_weblate_syncer/update_sources.py b/checkmk_weblate_syncer/update_sources.py index d92af2e..6aaf32b 100644 --- a/checkmk_weblate_syncer/update_sources.py +++ b/checkmk_weblate_syncer/update_sources.py @@ -3,8 +3,9 @@ from .config import UpdateSourcesConfig from .git import commit_and_push_files, repository_in_clean_state +from .html_tags import forbidden_tags from .logging import LOGGER -from .portable_object import make_soure_string_locations_relative +from .portable_object import make_soure_string_locations_relative, remove_header def run(config: UpdateSourcesConfig) -> int: @@ -30,6 +31,12 @@ def run(config: UpdateSourcesConfig) -> int: LOGGER.error("Generating pot file failed") raise e + LOGGER.info("Checking HTML tags") + if forbidden_html_tags := forbidden_tags(remove_header(pot_file_content)): + raise ValueError( + f"Found forbidden HTML tags: {', '.join(sorted(forbidden_html_tags))}" + ) + LOGGER.info("Making source string locations relative") pot_file_content = make_soure_string_locations_relative( portable_object_content=pot_file_content, diff --git a/checkmk_weblate_syncer/update_translations.py b/checkmk_weblate_syncer/update_translations.py index 0a9efe4..8c9f2d9 100644 --- a/checkmk_weblate_syncer/update_translations.py +++ b/checkmk_weblate_syncer/update_translations.py @@ -8,8 +8,13 @@ from .config import PoFilePair, RepositoryConfig, UpdateTranslationsConfig from .git import commit_and_push_files, repository_in_clean_state +from .html_tags import forbidden_tags from .logging import LOGGER -from .portable_object import remove_last_translator, remove_source_string_locations +from .portable_object import ( + remove_header, + remove_last_translator, + remove_source_string_locations, +) @dataclass(frozen=True) @@ -105,6 +110,13 @@ def _process_po_file_pair( path=locale_po_file, ) + LOGGER.info("Checking HTML tags") + if forbidden_html_tags := forbidden_tags(remove_header(po_file_content)): + return _Failure( + error_message=f"Found forbidden HTML tags: {', '.join(sorted(forbidden_html_tags))}", + path=locale_po_file, + ) + LOGGER.info("Stripping source string locations and Last-Translator") po_file_content = remove_source_string_locations(po_file_content) po_file_content = remove_last_translator(po_file_content) diff --git a/tests/test_html_tags.py b/tests/test_html_tags.py new file mode 100644 index 0000000..d4da7c5 --- /dev/null +++ b/tests/test_html_tags.py @@ -0,0 +1,59 @@ +import pytest + +from checkmk_weblate_syncer.html_tags import forbidden_tags + + +@pytest.mark.parametrize( + ["text", "expected_result"], + [ + pytest.param( + "abc123", + frozenset(), + ), + pytest.param( + "bold", + frozenset(), + ), + pytest.param( + '* ? %s', + frozenset(), + ), + pytest.param( + '© Checkmk GmbH', + frozenset(), + ), + pytest.param( + "123 ", + frozenset( + [""], + ), + ), + pytest.param( + # pylint: disable=line-too-long + """#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/wato/pages/host_rename.py:640 +#, python-format +msgid " (%d times)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/visuals/_page_edit_visual.py:137 +msgid " (Copy)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/nodevis/topology.py:1814 +msgid " (Data incomplete, maximum number of nodes reached)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/backup/handler.py:969 +#, python-format +msgid " (Duration: %s)" +msgstr "" +""", + frozenset(), + ), + ], +) +def test_html_tags_checker( + text: str, + expected_result: frozenset[str], +) -> None: + assert forbidden_tags(text) == expected_result diff --git a/tests/test_portable_object.py b/tests/test_portable_object.py index 3e2863b..dc72ee2 100644 --- a/tests/test_portable_object.py +++ b/tests/test_portable_object.py @@ -2,11 +2,73 @@ from checkmk_weblate_syncer.portable_object import ( make_soure_string_locations_relative, + remove_header, remove_last_translator, remove_source_string_locations, ) +def test_remove_header() -> None: + assert ( + remove_header( + """# Copyright (C) 2019 Checkmk GmbH - License: GNU General Public License v2 +# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and +# conditions defined in the file COPYING, which is part of this source code package. + +msgid "" +msgstr "" +"Project-Id-Version: Checkmk user interface translation 0.1\n" +"Report-Msgid-Bugs-To: feedback@checkmk.com\n" +"POT-Creation-Date: 2011-05-13 09:42+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/wato/pages/host_rename.py:640 +#, python-format +msgid " (%d times)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/visuals/_page_edit_visual.py:137 +msgid " (Copy)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/nodevis/topology.py:1814 +msgid " (Data incomplete, maximum number of nodes reached)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/backup/handler.py:969 +#, python-format +msgid " (Duration: %s)" +msgstr "" +""" + ) + # pylint: disable=line-too-long + == """#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/wato/pages/host_rename.py:640 +#, python-format +msgid " (%d times)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/visuals/_page_edit_visual.py:137 +msgid " (Copy)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/nodevis/topology.py:1814 +msgid " (Data incomplete, maximum number of nodes reached)" +msgstr "" + +#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/backup/handler.py:969 +#, python-format +msgid " (Duration: %s)" +msgstr "" +""" + ) + + def test_make_soure_string_locations_relative() -> None: assert ( make_soure_string_locations_relative(