Skip to content

Commit

Permalink
Fix issue with formatting of definitions
Browse files Browse the repository at this point in the history
This fixes, for instance, the issue with the Faith word definition
formatting that was raised in JIRA
  • Loading branch information
linearcombination committed Dec 19, 2024
1 parent faa80ea commit d7cf697
Showing 1 changed file with 17 additions and 11 deletions.
28 changes: 17 additions & 11 deletions backend/document/stet/stet.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from document.stet.util import is_valid_int
from docx import Document # type: ignore
from docx.document import Document as DocxDocument # type: ignore
from docx.text.paragraph import Paragraph # type: ignore
from docx.text.paragraph import Paragraph # type: ignore
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_PARAGRAPH_ALIGNMENT # type: ignore
from docx.oxml import OxmlElement # type: ignore
from docx.oxml.ns import qn # type: ignore
Expand Down Expand Up @@ -97,20 +97,23 @@ def get_word_entry_dtos(
# Extract data from word field
match = re.match(r"(.*)(\n)?(.*)?", row.cells[0].text)
if not match:
raise ValueError(f"Couldn't parse word def: {row.cells[0].text}")
raise ValueError(f"Couldn't parse word: {row.cells[0].text}")
word = match.group(1)
word_entry_dto.word = word
raw_strongs = match.group(3)
word_entry_dto.strongs_numbers = raw_strongs.strip()
definition = ""
previous_paragraph_style_name = ""
for paragraph in row.cells[1].paragraphs:
text = paragraph.text.strip()
if previous_paragraph_style_name not in (paragraph.style.name, ""):
definition += "\n"
if paragraph.style.name == "List Paragraph":
definition += f"- {paragraph.text.strip()}\n"
if text:
definition += f"- {paragraph.text.strip()}\n"
else:
definition += f"{paragraph.text.strip()}\n"
if text:
definition += f"{paragraph.text.strip()}\n"
previous_paragraph_style_name = paragraph.style.name
word_entry_dto.definition = definition
# process verse list
Expand Down Expand Up @@ -359,9 +362,9 @@ def generate_docx_document(
lang0_resource_dir,
)
for chapter_num_, chapter_ in source_usfm_book.chapters.items():
source_usfm_book.chapters[
chapter_num_
].verses = split_chapter_into_verses(chapter_)
source_usfm_book.chapters[chapter_num_].verses = (
split_chapter_into_verses(chapter_)
)
source_usfm_books.append(source_usfm_book)
lang1_resource_lookup_dto_ = resource_lookup_dto(
lang1_code, lang1_usfm_resource_type, book_code
Expand All @@ -378,9 +381,9 @@ def generate_docx_document(
lang1_resource_dir,
)
for chapter_num_, chapter_ in target_usfm_book.chapters.items():
target_usfm_book.chapters[
chapter_num_
].verses = split_chapter_into_verses(chapter_)
target_usfm_book.chapters[chapter_num_].verses = (
split_chapter_into_verses(chapter_)
)
target_usfm_books.append(target_usfm_book)
current_task.update_state(state="Assembling content")
for word_entry_dto in word_entry_dtos:
Expand Down Expand Up @@ -514,7 +517,9 @@ def generate_docx(
# Process HTML content in source_text and highlight keyword
source_paragraph = row_cells[0].paragraphs[0]
source_paragraph.paragraph_format.line_spacing = 2.0 # Adjust line spacing
add_highlighted_html_to_docx(verse.source_text, source_paragraph, word_entry.word)
add_highlighted_html_to_docx(
verse.source_text, source_paragraph, word_entry.word
)
# Add target_text with wider line spacing
target_paragraph = row_cells[1].paragraphs[0]
target_paragraph.paragraph_format.line_spacing = 2.0 # Adjust line spacing
Expand Down Expand Up @@ -586,6 +591,7 @@ def add_plain_html_to_docx(html: str, paragraph: Paragraph) -> None:
for temp_paragraph in temp_doc.paragraphs:
paragraph.add_run(temp_paragraph.text)


def add_lined_page_at_end(doc: Document) -> Document:
"""
Adds a single page filled with ruled lines to the end of the document for note-taking.
Expand Down

0 comments on commit d7cf697

Please sign in to comment.