diff --git a/readalongs/api.py b/readalongs/api.py index 2914b553..43c3bfef 100644 --- a/readalongs/api.py +++ b/readalongs/api.py @@ -45,15 +45,18 @@ class like pathlib.Path. Warning: don't just use "/some/path/config.json" import io import logging import os +import tempfile from dataclasses import dataclass from typing import Optional, Sequence, Tuple, Union import click +from lxml import etree from readalongs import cli from readalongs.align import create_ras_from_text from readalongs.log import LOGGER from readalongs.text.add_ids_to_xml import add_ids +from readalongs.text.make_package import create_web_component_html from readalongs.text.util import parse_xml from readalongs.util import JoinerCallbackForClick, get_langs_deferred @@ -222,12 +225,11 @@ def convert_to_readalong( Page breaks are marked by two empty sentences in a row language: list of languages to declare at the top of the readalong (has no functional effect since g2p is not applied, it's only metadata) + offline_html: if True, return the full offline HTML instead of just the .readlong XML Returns: - str: the readalong XML string, ready to print to a .readalong file + str: the readalong XML or HTML file contents, ready to print to .readalong or .html """ - from lxml import etree - xml_text = create_ras_from_text( ["".join(token.text for token in sentence) for sentence in sentences], language, @@ -259,3 +261,37 @@ def convert_to_readalong( ).decode("utf8") return xml_text + "\n" + + +def convert_to_offline_html( + sentences: Sequence[Sequence[Token]], + audio_file_name: Union[str, os.PathLike], + language: Sequence[str] = ("und",), +) -> str: + """Convert a list of sentences/paragraphs/pages of tokens, with corresponding autdio, + into a readalong Offline HTML + + Args: + sentences: a list of sentences, each of which is a list of Token objects + Paragraph breaks are marked by a empty sentence (i.e., an empty list) + Page breaks are marked by two empty sentences in a row + audio_file_name: the name of the audio file to be used in the offline HTML + language: list of languages to declare at the top of the readalong + (has no functional effect since g2p is not applied, it's only metadata) + + Returns: + str: the readalong XML or HTML file contents, ready to print to .readalong or .html + """ + + readalong_xml = convert_to_readalong(sentences, language) + try: + readalong_file = tempfile.NamedTemporaryFile( + "w", encoding="utf8", delete=False, suffix=".readalong" + ) + readalong_file.write(readalong_xml) + readalong_file.close() + print(readalong_file.name) + offline_html = create_web_component_html(readalong_file.name, audio_file_name) + return offline_html + finally: + os.unlink(readalong_file.name) diff --git a/test/test_api.py b/test/test_api.py index 7499de13..3b2fce78 100755 --- a/test/test_api.py +++ b/test/test_api.py @@ -97,36 +97,38 @@ def test_deprecated_prepare(self): api.prepare(self.data_dir / "ej-fra.txt", os.devnull, ("fra",)) self.assertIn("deprecated", "\n".join(cm.output)) + sentences_to_convert = [ + [ + api.Token("Bonjöûr,", 0.2, 1.0), + api.Token(" "), + api.Token("hello", 1.0, 0.2), + api.Token("!"), + ], + [api.Token("Sentence2", 4.2, 0.2), api.Token("!")], + [], + [api.Token("Paragraph2", 4.2, 0.2), api.Token(".")], + [], + [], + [ + api.Token("("), + api.Token('"'), + api.Token("Page2", 5.2, 0.2), + api.Token("."), + api.Token('"'), + api.Token(")"), + ], + ] + def test_convert_to_readalong(self): - sentences = [ - [ - api.Token("Bonjöûr,", 0.2, 1.0), - api.Token(" "), - api.Token("hello", 1.0, 0.2), - api.Token("!"), - ], - [api.Token("Sentence2", 4.2, 0.2), api.Token("!")], - [], - [api.Token("Paragraph2", 4.2, 0.2), api.Token(".")], - [], - [], - [ - api.Token("("), - api.Token('"'), - api.Token("Page2", 5.2, 0.2), - api.Token("."), - api.Token('"'), - api.Token(")"), - ], - ] - - readalong = api.convert_to_readalong(sentences) + + readalong = api.convert_to_readalong(self.sentences_to_convert) # print(readalong) # Make the reference by calling align with the same text and adjusting # things we expect to be different. sentences_as_text = "\n".join( - "".join(token.text for token in sentence) for sentence in sentences + "".join(token.text for token in sentence) + for sentence in self.sentences_to_convert ) with open(self.tempdir / "sentences.txt", "w", encoding="utf8") as f: f.write(sentences_as_text) @@ -152,6 +154,21 @@ def test_convert_to_readalong(self): readalong = re.sub(r"dur=\".*?\"", 'dur="ddd"', readalong) self.assertEqual(readalong, align_result) + def test_convert_to_offline_html(self): + html = api.convert_to_offline_html( + self.sentences_to_convert, str(self.data_dir / "noise.mp3") + ) + with open("test.html", "w", encoding="utf8") as f: + f.write(html) + # print(html) + self.assertIn("", html) + self.assertIn("", html) + if __name__ == "__main__": main()