Skip to content

Commit

Permalink
ebook DE removed redundant fix_ellipsis()
Browse files Browse the repository at this point in the history
  • Loading branch information
entorb committed Dec 1, 2024
1 parent 9988698 commit 71580fd
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 33 deletions.
14 changes: 10 additions & 4 deletions scripts/ebook/step_6.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
HTML modifications.
"""

import os
import re
import sys
from pathlib import Path

os.chdir(Path(__file__).parent.parent.parent)
sys.path.append(str(Path(__file__).resolve().parent.parent))
from check_chapters_settings import settings

LANG = settings["lang"]

source_file = Path("tmp/hpmor-epub-5-html-unmod.html")
target_file = Path("hpmor.html")
Expand All @@ -20,6 +23,11 @@ def fix_ellipsis(s: str) -> str:
"""
Fix ellipsis spacing for ebooks.
"""
if LANG == "DE":
# this was redundant to the new DE rules in check_chapters.py
# before opening DE-quotes: add space
# s = re.sub(r"…(?=[„])", "… ", s)
return s
# 1. remove all spaces around ellipsis
s = re.sub(r" *… *", "…", s)
# 2. recreate some spaces
Expand All @@ -33,8 +41,6 @@ def fix_ellipsis(s: str) -> str:
s = re.sub(r"…(?=<em>)", "… ", s)
# before opening EN-quotes: add space
# s = re.sub(r"…(?=[“])", "… ", s)
# before opening DE-quotes: add space
s = re.sub(r"…(?=[„])", "… ", s)
return s


Expand Down
67 changes: 38 additions & 29 deletions scripts/ebook/step_6_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,45 @@
"""Unit Tests."""
# ruff: noqa: S101

import sys
from pathlib import Path

import pytest
from step_6 import fix_ellipsis

sys.path.append(str(Path(__file__).resolve().parent.parent))
from check_chapters_settings import settings

LANG = settings["lang"]

if LANG == "EN":

@pytest.mark.parametrize(
("text", "expected"),
[
# quotations
("foo…”", "foo…”"),
("“…foo", "“…foo"),
# html
("foo…</p>", "foo…</p>"),
("<p>…foo", "<p>…foo"),
# between 2 words
("foo…bar", "foo… bar"),
("foo …bar", "foo… bar"),
("foo … bar", "foo… bar"),
("foo… bar", "foo… bar"),
# start of sentence
("foo.…bar", "foo. …bar"),
("foo!…bar", "foo! …bar"),
("foo?…bar", "foo? …bar"),
# end of sentence
("foo…. bar", "foo…. bar"),
("foo…! bar", "foo…! bar"),
("foo…? bar", "foo…? bar"),
# emph
("foo</em>…bar", "foo</em>… bar"),
("foo…<em>bar", "foo… <em>bar"),
],
)
def test_fix_ellipsis(text: str, expected: str) -> None:
assert fix_ellipsis(text) == expected
@pytest.mark.parametrize(
("text", "expected"),
[
# quotations
("foo…”", "foo…”"),
("“…foo", "“…foo"),
# html
("foo…</p>", "foo…</p>"),
("<p>…foo", "<p>…foo"),
# between 2 words
("foo…bar", "foo… bar"),
("foo …bar", "foo… bar"),
("foo … bar", "foo… bar"),
("foo… bar", "foo… bar"),
# start of sentence
("foo.…bar", "foo. …bar"),
("foo!…bar", "foo! …bar"),
("foo?…bar", "foo? …bar"),
# end of sentence
("foo…. bar", "foo…. bar"),
("foo…! bar", "foo…! bar"),
("foo…? bar", "foo…? bar"),
# emph
("foo</em>…bar", "foo</em>… bar"),
("foo…<em>bar", "foo… <em>bar"),
],
)
def test_fix_ellipsis_en(text: str, expected: str) -> None:
assert fix_ellipsis(text) == expected

0 comments on commit 71580fd

Please sign in to comment.