From 87da5d815be36e9d7d9a71228505b1783852f3a4 Mon Sep 17 00:00:00 2001 From: linearcombination <4829djaskdfj@gmail.com> Date: Fri, 13 Sep 2024 10:18:48 -0700 Subject: [PATCH 1/7] Fix docx book repeat issue There was a bug wherein when two books were chosen the resulting document would only include one of the books twice. This commit fixes that and adds an end to end test. --- ...ly_strategies_lang_then_book_by_chapter.py | 134 +++++++++--------- tests/e2e/test_api.py | 45 ++++++ 2 files changed, 111 insertions(+), 68 deletions(-) diff --git a/backend/document/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py b/backend/document/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py index 56483933..8813debe 100755 --- a/backend/document/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py +++ b/backend/document/domain/assembly_strategies_docx/assembly_strategies_lang_then_book_by_chapter.py @@ -51,33 +51,31 @@ def assemble_content_by_lang_then_book( chunk_size, to interleaving strategy to do the actual interleaving. """ - book_id_map = dict((id, pos) for pos, id in enumerate(BOOK_NAMES.keys())) composers: list[Composer] = [] - most_lang_codes = max( - [ - [usfm_book.lang_code for usfm_book in usfm_books], - [tn_book.lang_code for tn_book in tn_books], - [tq_book.lang_code for tq_book in tq_books], - [tw_book.lang_code for tw_book in tw_books], - [bc_book.lang_code for bc_book in bc_books], - ], - key=lambda x: len(x), + book_id_map = dict((id, pos) for pos, id in enumerate(BOOK_NAMES.keys())) + all_lang_codes = ( + {usfm_book.lang_code for usfm_book in usfm_books} + .union(tn_book.lang_code for tn_book in tn_books) + .union(tq_book.lang_code for tq_book in tq_books) + .union(tw_book.lang_code for tw_book in tw_books) + .union(bc_book.lang_code for bc_book in bc_books) ) - most_book_codes = max( - [ - [usfm_book.book_code for usfm_book in usfm_books], - [tn_book.book_code for tn_book in tn_books], - [tq_book.book_code for tq_book in tq_books], - [tw_book.book_code for tw_book in tw_books], - [bc_book.book_code for bc_book in bc_books], - ], - key=lambda x: len(x), + most_lang_codes = list(all_lang_codes) + # Collect and deduplicate book codes + all_book_codes = ( + {usfm_book.book_code for usfm_book in usfm_books} + .union(tn_book.book_code for tn_book in tn_books) + .union(tq_book.book_code for tq_book in tq_books) + .union(tw_book.book_code for tw_book in tw_books) + .union(bc_book.book_code for bc_book in bc_books) + ) + most_book_codes = list(all_book_codes) + # Cache book_id_map lookup + book_codes_sorted = sorted( + most_book_codes, key=lambda book_code: book_id_map[book_code] ) for lang_code in most_lang_codes: - for book_code in sorted( - most_book_codes, - key=lambda book_code: book_id_map[book_code], - ): + for book_code in book_codes_sorted: selected_usfm_books = [ usfm_book for usfm_book in usfm_books @@ -117,55 +115,55 @@ def assemble_content_by_lang_then_book( if bc_book.lang_code == lang_code and bc_book.book_code == book_code ] bc_book = selected_bc_books[0] if selected_bc_books else None - if usfm_book is not None: - composers.append( - assemble_usfm_by_book( - usfm_book, - tn_book, - tq_book, - tw_book, - usfm_book2, - bc_book, + if usfm_book is not None: + composers.append( + assemble_usfm_by_book( + usfm_book, + tn_book, + tq_book, + tw_book, + usfm_book2, + bc_book, + ) ) - ) - elif usfm_book is None and tn_book is not None: - composers.append( - assemble_tn_by_book( - usfm_book, - tn_book, - tq_book, - tw_book, - usfm_book2, - bc_book, + elif usfm_book is None and tn_book is not None: + composers.append( + assemble_tn_by_book( + usfm_book, + tn_book, + tq_book, + tw_book, + usfm_book2, + bc_book, + ) ) - ) - elif usfm_book is None and tn_book is None and tq_book is not None: - composers.append( - assemble_tq_by_book( - usfm_book, - tn_book, - tq_book, - tw_book, - usfm_book2, - bc_book, + elif usfm_book is None and tn_book is None and tq_book is not None: + composers.append( + assemble_tq_by_book( + usfm_book, + tn_book, + tq_book, + tw_book, + usfm_book2, + bc_book, + ) ) - ) - elif ( - usfm_book is None - and tn_book is None - and tq_book is None - and (tw_book is not None or bc_book is not None) - ): - composers.append( - assemble_tw_by_book( - usfm_book, - tn_book, - tq_book, - tw_book, - usfm_book2, - bc_book, + elif ( + usfm_book is None + and tn_book is None + and tq_book is None + and (tw_book is not None or bc_book is not None) + ): + composers.append( + assemble_tw_by_book( + usfm_book, + tn_book, + tq_book, + tw_book, + usfm_book2, + bc_book, + ) ) - ) first_composer = composers[0] for composer in composers[1:]: first_composer.append(composer.doc) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index dc4913aa..1dd79e5c 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -2,6 +2,7 @@ import os import pathlib +import re import pytest import requests @@ -1650,3 +1651,47 @@ def test_en_bc_col_language_book_order_with_no_email_1c() -> None: }, ) check_finished_document_without_verses_success(response, suffix="pdf") + + +def test_en_ulb_1jn_en_ulb_3jn_language_book_order_with_no_email_1c() -> None: + with TestClient(app=app, base_url=settings.api_test_url()) as client: + response = client.post( + "/documents", + json={ + # "email_address": settings.TO_EMAIL_ADDRESS, + "assembly_strategy_kind": model.AssemblyStrategyEnum.LANGUAGE_BOOK_ORDER, + "assembly_layout_kind": model.AssemblyLayoutEnum.ONE_COLUMN, + "layout_for_print": False, + "chunk_size": model.ChunkSizeEnum.CHAPTER, + "generate_pdf": False, + "generate_epub": False, + "generate_docx": False, + "resource_requests": [ + { + "lang_code": "en", + "resource_type": "ulb", + "book_code": "1jn", + }, + { + "lang_code": "en", + "resource_type": "ulb", + "book_code": "3jn", + }, + ], + }, + ) + finished_document_request_key = check_result( + response, suffix="html", poll_duration=4 + ) + html_filepath = os.path.join( + settings.DOCUMENT_OUTPUT_DIR, + "{}.html".format(finished_document_request_key), + ) + with open(html_filepath, "r") as fin: + html = fin.read() + body_match = re.search(r"