From c36fceb1353dd5d009d94cf90173f08944e7b6a4 Mon Sep 17 00:00:00 2001 From: linearcombination <4829djaskdfj@gmail.com> Date: Wed, 23 Oct 2024 10:30:45 -0700 Subject: [PATCH] Preserve text formatted as poetry in STET --- backend/document/stet/stet.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/document/stet/stet.py b/backend/document/stet/stet.py index 9700a30f..5af4afac 100644 --- a/backend/document/stet/stet.py +++ b/backend/document/stet/stet.py @@ -71,9 +71,12 @@ def split_chapter_into_verses(chapter: USFMChapter) -> dict[str, str]: verse_number = re.search(r'(\d+)', verse_span) if verse_number: verse_number_ = verse_number.group(1) - # Remove all and
tags and their content from the verse text + # Remove all tags and their content from the verse text verse_text = re.sub(r".*?", "", verse_span) - verse_text = re.sub(r".*?
", "", verse_text) + # Remove
tags that do not have class matching "poetry-" + verse_text = re.sub( + r".*?
", "", verse_text + ) # Remove the remaining HTML tags and strip extra spaces verse_text = re.sub(r"<.*?>", "", verse_text).strip() logger.debug("verse_number: %s, verse_text: %s", verse_number_, verse_text)