diff --git a/readalongs/api.py b/readalongs/api.py
index 8ca72b6d..4a7f2b30 100644
--- a/readalongs/api.py
+++ b/readalongs/api.py
@@ -34,37 +34,50 @@ class like pathlib.Path. Warning: don't just use "/some/path/config.json"
Additional API function:
-convert_to_readalong(sentences: Sequence[Sequence[Token]], language: Sequence[str]) -> str:
+convert_prealigned_text_to_readalong():
convert a list of sentences into a readalong XML string ready to print to file.
Just like align and make_xml, this function expects a black line (empty list) to
make a paragraph break, and two consecutive blank lines to make a page break.
Unlike the other functions here, this function is not a wrapper around the CLI and
- it just returns the string, non status.
+ it just returns the string, with no status.
+
+convert_prealigned_text_to_offline_html():
+ same as convert_prealigned_text_to_readalong, but also creates an offline HTML file.
+
+See their respective docstrings for more details.
"""
import io
import logging
+import os
+import tempfile
from dataclasses import dataclass
-from pathlib import Path
from typing import Optional, Sequence, Tuple, Union
import click
+from lxml import etree
from readalongs import cli
from readalongs.align import create_ras_from_text
from readalongs.log import LOGGER
from readalongs.text.add_ids_to_xml import add_ids
+from readalongs.text.make_package import (
+ DEFAULT_HEADER,
+ DEFAULT_SUBHEADER,
+ DEFAULT_TITLE,
+ create_web_component_html,
+)
from readalongs.text.util import parse_xml
from readalongs.util import JoinerCallbackForClick, get_langs_deferred
def align(
- textfile: Union[str, Path],
- audiofile: Union[str, Path],
- output_base: Union[str, Path],
+ textfile: Union[str, os.PathLike],
+ audiofile: Union[str, os.PathLike],
+ output_base: Union[str, os.PathLike],
language: Sequence[str] = (),
output_formats: Sequence[str] = (),
- **kwargs
+ **kwargs,
) -> Tuple[int, Optional[Exception], str]:
"""Run the "readalongs align" command from within a Python script.
@@ -119,10 +132,10 @@ def align(
def make_xml(
- plaintextfile: Union[str, Path],
- xmlfile: Union[str, Path],
+ plaintextfile: Union[str, os.PathLike],
+ xmlfile: Union[str, os.PathLike],
language: Sequence[str],
- **kwargs
+ **kwargs,
) -> Tuple[int, Optional[Exception], str]:
"""Run the "readalongs make-xml" command from within a Python script.
@@ -144,7 +157,7 @@ def make_xml(
if isinstance(plaintextfile, click.utils.LazyFile)
else plaintextfile
)
- xmlfile = str(xmlfile) if isinstance(xmlfile, Path) else xmlfile
+ xmlfile = str(xmlfile) if isinstance(xmlfile, os.PathLike) else xmlfile
logging_stream = io.StringIO()
logging_handler = logging.StreamHandler(logging_stream)
try:
@@ -210,7 +223,7 @@ def __init__(
self.is_word = is_word if is_word is not None else bool(time is not None)
-def convert_to_readalong(
+def convert_prealigned_text_to_readalong(
sentences: Sequence[Sequence[Token]],
language: Sequence[str] = ("und",),
) -> str:
@@ -224,10 +237,8 @@ def convert_to_readalong(
(has no functional effect since g2p is not applied, it's only metadata)
Returns:
- str: the readalong XML string, ready to print to a .readalong file
+ str: the readalong XML file contents, ready to print to .readalong
"""
- from lxml import etree
-
xml_text = create_ras_from_text(
["".join(token.text for token in sentence) for sentence in sentences],
language,
@@ -259,3 +270,47 @@ def convert_to_readalong(
).decode("utf8")
return xml_text + "\n"
+
+
+def convert_prealigned_text_to_offline_html(
+ sentences: Sequence[Sequence[Token]],
+ audio_file_name: Union[str, os.PathLike],
+ language: Sequence[str] = ("und",),
+ title: str = DEFAULT_TITLE,
+ header: str = DEFAULT_HEADER,
+ subheader: str = DEFAULT_SUBHEADER,
+) -> Tuple[str, str]:
+ """Convert a list of sentences/paragraphs/pages of tokens, with corresponding audio,
+ into a readalong Offline HTML
+
+ Args:
+ sentences: a list of sentences, each of which is a list of Token objects
+ Paragraph breaks are marked by a empty sentence (i.e., an empty list)
+ Page breaks are marked by two empty sentences in a row
+ audio_file_name: the name of the audio file to be used in the offline HTML
+ language: list of languages to declare at the top of the readalong
+ (has no functional effect since g2p is not applied, it's only metadata)
+ title: optional title, will fill the HTML
tag
+ header: optional header, will fill the readalong
+ subheader: optional subheader, will fill the readalong
+
+ Returns:
+ (html_contents, readalong_contents):
+ - the readalong Offline HTML file contents, ready to print to .html
+ - the readalong XML file contents, ready to print to .readalong
+ """
+
+ readalong_xml = convert_prealigned_text_to_readalong(sentences, language)
+ try:
+ readalong_file = tempfile.NamedTemporaryFile(
+ "w", encoding="utf8", delete=False, suffix=".readalong"
+ )
+ readalong_file.write(readalong_xml)
+ readalong_file.close()
+ # print(readalong_file.name)
+ offline_html = create_web_component_html(
+ readalong_file.name, audio_file_name, title, header, subheader
+ )
+ return offline_html, readalong_xml
+ finally:
+ os.unlink(readalong_file.name)
diff --git a/readalongs/text/make_package.py b/readalongs/text/make_package.py
index 8f821f31..3f657423 100644
--- a/readalongs/text/make_package.py
+++ b/readalongs/text/make_package.py
@@ -16,7 +16,7 @@
import os
from base64 import b64encode
from mimetypes import guess_type
-from typing import Any
+from typing import Any, Union
from lxml import etree
@@ -36,7 +36,6 @@
-
{title}
@@ -57,11 +56,11 @@
DEFAULT_SUBHEADER = "Your read-along subtitle goes here"
-def encode_from_path(path: str) -> str:
+def encode_from_path(path: Union[str, os.PathLike]) -> str:
"""Encode file from bytes to b64 string with data and mime signature
Args:
- path (str): path to file
+ path: path to file
Returns:
str: base64 string with data and mime signature
@@ -118,8 +117,8 @@ def encode_from_path(path: str) -> str:
def create_web_component_html(
- ras_path: str,
- audio_path: str,
+ ras_path: Union[str, os.PathLike],
+ audio_path: Union[str, os.PathLike],
title=DEFAULT_TITLE,
header=DEFAULT_HEADER,
subheader=DEFAULT_SUBHEADER,
diff --git a/readalongs/text/util.py b/readalongs/text/util.py
index 3c3c6780..874f6150 100644
--- a/readalongs/text/util.py
+++ b/readalongs/text/util.py
@@ -14,7 +14,6 @@
from collections import OrderedDict
from datetime import datetime
from io import TextIOWrapper
-from pathlib import Path
from typing import IO, Union
from lxml import etree
@@ -104,7 +103,7 @@ def is_do_not_align(element):
return dna in ("true", "True", "TRUE", "1")
-def load_xml(input_path: Union[str, Path, IO]) -> etree.ElementTree:
+def load_xml(input_path: Union[str, os.PathLike, IO]) -> etree.ElementTree:
"""Safely load an XML file with etree.parse to respect encoding
Return: the root of the XML etree
diff --git a/test/test_api.py b/test/test_api.py
index 7499de13..70a1bfdc 100755
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -97,36 +97,38 @@ def test_deprecated_prepare(self):
api.prepare(self.data_dir / "ej-fra.txt", os.devnull, ("fra",))
self.assertIn("deprecated", "\n".join(cm.output))
+ sentences_to_convert = [
+ [
+ api.Token("Bonjöûr,", 0.2, 1.0),
+ api.Token(" "),
+ api.Token("hello", 1.0, 0.2),
+ api.Token("!"),
+ ],
+ [api.Token("Sentence2", 4.2, 0.2), api.Token("!")],
+ [],
+ [api.Token("Paragraph2", 4.2, 0.2), api.Token(".")],
+ [],
+ [],
+ [
+ api.Token("("),
+ api.Token('"'),
+ api.Token("Page2", 5.2, 0.2),
+ api.Token("."),
+ api.Token('"'),
+ api.Token(")"),
+ ],
+ ]
+
def test_convert_to_readalong(self):
- sentences = [
- [
- api.Token("Bonjöûr,", 0.2, 1.0),
- api.Token(" "),
- api.Token("hello", 1.0, 0.2),
- api.Token("!"),
- ],
- [api.Token("Sentence2", 4.2, 0.2), api.Token("!")],
- [],
- [api.Token("Paragraph2", 4.2, 0.2), api.Token(".")],
- [],
- [],
- [
- api.Token("("),
- api.Token('"'),
- api.Token("Page2", 5.2, 0.2),
- api.Token("."),
- api.Token('"'),
- api.Token(")"),
- ],
- ]
-
- readalong = api.convert_to_readalong(sentences)
+
+ readalong = api.convert_prealigned_text_to_readalong(self.sentences_to_convert)
# print(readalong)
# Make the reference by calling align with the same text and adjusting
# things we expect to be different.
sentences_as_text = "\n".join(
- "".join(token.text for token in sentence) for sentence in sentences
+ "".join(token.text for token in sentence)
+ for sentence in self.sentences_to_convert
)
with open(self.tempdir / "sentences.txt", "w", encoding="utf8") as f:
f.write(sentences_as_text)
@@ -152,6 +154,23 @@ def test_convert_to_readalong(self):
readalong = re.sub(r"dur=\".*?\"", 'dur="ddd"', readalong)
self.assertEqual(readalong, align_result)
+ def test_convert_to_offline_html(self):
+ html, _ = api.convert_prealigned_text_to_offline_html(
+ self.sentences_to_convert,
+ str(self.data_dir / "noise.mp3"),
+ subheader="by Jove!",
+ )
+ # with open("test.html", "w", encoding="utf8") as f:
+ # f.write(html)
+ # print(html)
+ self.assertIn("", html)
+ self.assertIn("by Jove!", html)
+
if __name__ == "__main__":
main()