Skip to content

Commit

Permalink
lint: Improve t-029
Browse files Browse the repository at this point in the history
  • Loading branch information
acabal committed Feb 18, 2025
1 parent 8e56bd6 commit 95600fc
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion se/se_epub_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2526,7 +2526,8 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
temp_xhtml = regex.sub(r"<title[^>]*?>.+?</title>", "", file_contents) # Remove <title> because it might contain something like <title>Chapter 2: The Antechamber of M. de Tréville</title>
temp_xhtml = regex.sub(r"<abbr[^>]*?>", "<abbr>", temp_xhtml) # Replace things like <abbr xml:lang="la">
temp_xhtml = regex.sub(r"<img[^>]*?>", "", temp_xhtml) # Remove <img alt> attributes
temp_xhtml = temp_xhtml.replace("A.B.C.", "X") # Remove A.B.C, which is not an abbreviations.
temp_xhtml = temp_xhtml.replace("A.B.C.", "X") # Remove A.B.C, which is not an abbreviation.
temp_xhtml = temp_xhtml.replace("X.Y.Z.", "X") # Remove X.Y.Z., which is usually used in the same sense as `A.B.C.` and is also not an abbreviation.
# Note the regex also excludes preceding numbers, so that we can have inline numbering like:
# "A number of questions: 1. regarding those who make heretics; 2. concerning those who were made heretics..."
matches = regex.findall(r"[^\s0-9]+\.\s+[\p{Lowercase_Letter}](?!’[\p{Uppercase_Letter}])[\p{Lowercase_Letter}]+", temp_xhtml)
Expand Down

0 comments on commit 95600fc

Please sign in to comment.