From 77f8a9312bcc8f8fb532772bfdf1ef41d9984902 Mon Sep 17 00:00:00 2001 From: Lioman Date: Wed, 12 Apr 2023 13:13:01 +0200 Subject: [PATCH] remove non printable chars from titles #31 --- pelican/plugins/search/search.py | 7 +++++-- tests/test_search_settings_generator.py | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/pelican/plugins/search/search.py b/pelican/plugins/search/search.py index 276b6c0..81d8268 100644 --- a/pelican/plugins/search/search.py +++ b/pelican/plugins/search/search.py @@ -115,12 +115,15 @@ def get_input_files( page.save_as if self._index_output() else page.relative_source_path ) # Escape double-quotation marks in the title - title = striptags(page.title).replace('"', '\\"') + title = "".join( + c if c.isprintable() else " " + for c in striptags(page.title).replace('"', '\\"') + ) input_files.append( { "path": page_to_index, "url": f"/{page.url}", - "title": f"{title}", + "title": title, } ) diff --git a/tests/test_search_settings_generator.py b/tests/test_search_settings_generator.py index f2193b0..a1a9f52 100644 --- a/tests/test_search_settings_generator.py +++ b/tests/test_search_settings_generator.py @@ -361,3 +361,26 @@ def test_template_pages_collected( "title": "", }, ] + + @pytest.mark.parametrize("non_printable", ["\u00a0", "\u200B", "\uFEFF"]) + def test_unicode_non_printable_chars_stripped(self, non_printable): + """Non printable chars should be all converted to a standard space""" + generator = SearchSettingsGenerator( + context={ + "pages": [ + self.PageArticleMock(f"title with nbsp{non_printable}char") + ], + "articles": [], + }, + settings={"TEMPLATE_PAGES": []}, + path=None, + theme=None, + output_path="output", + ) + assert generator.get_input_files() == [ + { + "path": "save_as", + "url": "/url", + "title": "title with nbsp char", + } + ]