From f4bdc0aa6e7d97a7336f6040f28cb7779719a0d9 Mon Sep 17 00:00:00 2001 From: vr8hub Date: Sun, 12 Jan 2025 13:45:00 -0600 Subject: [PATCH] Add additional dialect, fix and improve comments on convert_british_to_american --- se/typography.py | 11 ++++++----- .../british2american/test-1/golden/b2a.xhtml | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/se/typography.py b/se/typography.py index 27bf45c3..d4088398 100644 --- a/se/typography.py +++ b/se/typography.py @@ -525,7 +525,7 @@ def convert_british_to_american(xhtml: str) -> str: Attempt to convert a string of XHTML from British-style quotation to American-style quotation. The overall method is to: o Replace double quotes with ldq/rdq tags o Replace "known" improper left single quotes with ap tag - o Replace remaining left single quotes with ap tag + o Replace remaining left single quotes with lsq tag o Tag closing right single quotes as ap or rsq depending on context o Replace tags with appropriate (American) quote, e.g. ldq with left single quote @@ -535,12 +535,13 @@ def convert_british_to_american(xhtml: str) -> str: OUTPUTS The XHTML with British-style quotation converted to American style """ + # double-quotes are unambiguous, so tagged immediately xhtml = regex.sub(r"“", r"", xhtml) xhtml = regex.sub(r"”", r"", xhtml) - # mark as apostrophe reversed (opening rather than closed) quote on known dialect; - # at, is, and the decades are also words on their own, but are deemed more likely to - # be elisions than starting a quote - xhtml = regex.sub(r"([^A-Za-z])‘([Aa]lf|at|[Aa]ve|[Ee]|[Ee]ard|[Ee]lp(ed|s)?|[Ee]m|[Ee]re|[Ee]rself|[I]im|is|[Ii]sself|[Ii]story|[Oo]ller|[Oo]w|[Oo]wever|[Rr]e|(twen|thir|four|fif|six|seven|eigh|nine)ties)\b",r"\1\2", xhtml) + # mark as apostrophe reversed quote (opening rather than closed) on known dialect; + # at and the decades are also words on their own, but are deemed more likely to be elisions + # than starting a quote + xhtml = regex.sub(r"([^A-Za-z])‘([Aa]lf|at|[Aa]ve|[Cc]ording|[Ee]|[Ee]ard|[Ee]lp(ed|s)?|[Ee]m|[Ee]re|[Ee]rself|[I]im|[Ii]sself|[Ii]story|Merr?i[ck]{1,2}[aei](r|ns?)|[Oo]ller|oo?man|[Oo]w|[Oo]wever|[Rr]e|(twen|thir|four|fif|six|seven|eigh|nine)ties)\b",r"\1\2", xhtml) # treat any remaining opening single quotes as lsq xhtml = regex.sub(r"‘", r"", xhtml) # ’a’ is two apostrophes diff --git a/tests/draft_commands/british2american/test-1/golden/b2a.xhtml b/tests/draft_commands/british2american/test-1/golden/b2a.xhtml index 16df4ce7..5e89807e 100644 --- a/tests/draft_commands/british2american/test-1/golden/b2a.xhtml +++ b/tests/draft_commands/british2american/test-1/golden/b2a.xhtml @@ -63,11 +63,11 @@

“False pretences and impersonation, sir. There’s five charges against him in different parts of the country, mostly at hotels. He represents himself as a rich man, stays there for some time living like a lord, cashes a big cheque and then goes off. Calls ’isself Sir Solomon Philbrick. Funny thing is, I think he really believes his tale ’isself. I’ve come across several cases like that one time or another. There was a bloke in Somerset what thought ’e was Bishop of Bath and Wells and confirmed a whole lot of kids⁠—very reverent, too.”

-

“ ‘Did you try pulling out ’is teeth and sending them to his pa?’ I asks.

+

“ ‘Did you try pulling out “is teeth and sending them to his pa?’ I asks.

-

“This ’ere’s your pal,” he said; “this ’ere’s the path you’ve got to walk on. Neither of you is to touch the other or any part of ’is clothing. Nothing is to be passed from one to the other. You are to keep at a distance of one yard and talk of ’istory, philosophy or kindred subjects. When I rings the bell you stops talking, see? Your pace is to be neither quicker nor slower than average walking-pace. Them’s the Governor’s instructions, and Gawd ’elp yer if yer does anything wrong. Now walk.”

+

“This ’ere’s your pal,” he said; “this ’ere’s the path you’ve got to walk on. Neither of you is to touch the other or any part of “is clothing. Nothing is to be passed from one to the other. You are to keep at a distance of one yard and talk of ’istory, philosophy or kindred subjects. When I rings the bell you stops talking, see? Your pace is to be neither quicker nor slower than average walking-pace. Them’s the Governor’s instructions, and Gawd ’elp yer if yer does anything wrong. Now walk.”

-

“That,” said Dr. Fagan with some disgust, ’is my daughter.”

+

“That,” said Dr. Fagan with some disgust, “is my daughter.”