diff --git a/src/wiktextract/clean.py b/src/wiktextract/clean.py index 1556eec0..8b1e9fec 100644 --- a/src/wiktextract/clean.py +++ b/src/wiktextract/clean.py @@ -1376,11 +1376,11 @@ def repl_1_syntaxhighlight(m): # Remove with previewonly class (generated e.g. by {{taxlink|...}}) title = re.sub(r'(?si)]*?\bclass="[^"<>]*?' r'\bpreviewonly\b[^>]*?>' - r'((<[^<>]>[^<>]*]*>)|.)*?', + r'.+?', "", title) # Remove ... title = re.sub(r'(?si)]*?\bclass="[^"]*?\berror\b[^>]*?>' - r'((<.*?]>)|.)*?', + r'.+?', "", title) # Change
and
to newlines. Ditto for tr, li, table, dl, ul, ol title = re.sub(r"(?si)]*>", @@ -1438,7 +1438,7 @@ def repl_1_syntaxhighlight(m): ) title = re.sub(r"(?s)\[\[\s*:?([^]|#<>]+?)\s*(#[^][|<>]*?)?\]\]", repl_1, title) - title = re.sub(r"(?s)\[\[\s*(([a-zA-z0-9]+)\s*:)?\s*([^][#|<>]+?)" + title = re.sub(r"(?s)\[\[\s*(([a-zA-Z0-9]+)\s*:)?\s*([^][#|<>]+?)" r"\s*(#[^][|]*?)?\|?\]\]", repl_link, title) title = re.sub(r"(?s)\[\[\s*([^][|<>]+?)\s*\|"