Skip to content

Commit

Permalink
feat: structure align output into www and Offline-HTML dirs
Browse files Browse the repository at this point in the history
  • Loading branch information
joanise committed Jul 12, 2024
1 parent 29ec752 commit 1413271
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 45 deletions.
4 changes: 2 additions & 2 deletions docs/outputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ With that in mind, there are a variety of different output formats that can be c

## Simple web deployment

The default output generated by the CLI's `readalongs align` command and the `www` folder included in the Web Bundle download format from the [Web App](web-app.md) include everything you need to host your ReadAlong on your own server.
The `www` folder generated by the CLI's `readalongs align` command or included in the Web Bundle download format from the [Web App](web-app.md) include everything you need to host your ReadAlong on your own server.

Copy the whole folder with all its contents to your web server, and that should work as is on most web servers. The `index.html` provided will generate a page that shows only your readalong.

Expand Down Expand Up @@ -47,7 +47,7 @@ Below is an example of a minimal implementation in a basic standalone html page.
The above assumes the following structure, which is the default provided in the Web Bundle generated from the web app:
```txt
web
www
├── assets
│ ├── sample.wav
│ └── sample.readalong
Expand Down
43 changes: 28 additions & 15 deletions readalongs/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@
from readalongs.text.convert_xml import convert_xml
from readalongs.text.make_dict import make_dict
from readalongs.text.make_fsg import make_fsg
from readalongs.text.make_package import create_web_component_html
from readalongs.text.make_package import (
DEFAULT_HEADER,
DEFAULT_SUBHEADER,
DEFAULT_TITLE,
create_web_component_html,
)
from readalongs.text.tokenize_xml import tokenize_xml
from readalongs.text.util import (
get_word_text,
Expand Down Expand Up @@ -941,7 +946,12 @@ def save_readalong(
output_formats=output_formats,
)

ras_path = output_base + ".readalong"
bundle_path = os.path.join(output_dir, "www")
if not os.path.exists(bundle_path):
os.mkdir(bundle_path)
bundle_base = os.path.join(bundle_path, output_basename)

ras_path = bundle_base + ".readalong"
save_xml(ras_path, align_results["tokenized"])

if "xhtml" in output_formats:
Expand All @@ -950,41 +960,44 @@ def save_readalong(
save_xml(tokenized_xhtml_path, align_results["tokenized"])

audio_path = save_audio(
audiofile=audiofile, output_base=output_base, audiosegment=audiosegment
audiofile=audiofile, output_base=bundle_base, audiosegment=audiosegment
)

if "html" in output_formats:
html_out_path = output_base + ".html"
offline_html_dir = os.path.join(output_dir, "Offline-HTML")
html_out_path = os.path.join(offline_html_dir, output_basename + ".html")
html_out = create_web_component_html(
ras_path,
audio_path,
config.get("title", "Title goes here"),
config.get("header", "Header goes here"),
config.get("subheader", ""),
config.get("title", DEFAULT_TITLE),
config.get("header", DEFAULT_HEADER),
config.get("subheader", DEFAULT_SUBHEADER),
config.get("theme", "light"),
)
if not os.path.exists(offline_html_dir):
os.mkdir(offline_html_dir)
with open(html_out_path, "w", encoding="utf-8") as f:
f.write(html_out)

save_minimal_index_html(
os.path.join(output_dir, "index.html"),
os.path.join(bundle_path, "index.html"),
os.path.basename(ras_path),
os.path.basename(audio_path),
config.get("title", "Title goes here"),
config.get("header", "Header goes here"),
config.get("subheader", ""),
config.get("title", DEFAULT_TITLE),
config.get("header", DEFAULT_HEADER),
config.get("subheader", DEFAULT_SUBHEADER),
config.get("theme", "light"),
)

# Copy the image files to the output's asset directory, if any are found
if "images" in config:
save_images(config=config, output_dir=output_dir)
save_images(config=config, output_dir=bundle_path)
save_readme_txt(
os.path.join(output_dir, "readme.txt"),
os.path.join(bundle_path, "readme.txt"),
os.path.basename(ras_path),
os.path.basename(audio_path),
config.get("header", "Header goes here"),
config.get("subheader", ""),
config.get("header", DEFAULT_HEADER),
config.get("subheader", DEFAULT_SUBHEADER),
config.get("theme", "light"),
)

Expand Down
11 changes: 8 additions & 3 deletions readalongs/text/make_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@
"""


DEFAULT_TITLE = "ReadAlong-Studio for Interactive Storytelling"
DEFAULT_HEADER = "Your read-along title goes here"
DEFAULT_SUBHEADER = "Your read-along subtitle goes here"


def encode_from_path(path: str) -> str:
"""Encode file from bytes to b64 string with data and mime signature
Expand Down Expand Up @@ -115,9 +120,9 @@ def encode_from_path(path: str) -> str:
def create_web_component_html(
ras_path: str,
audio_path: str,
title="Title goes here",
header="Header goes here",
subheader="Subheader goes here",
title=DEFAULT_TITLE,
header=DEFAULT_HEADER,
subheader=DEFAULT_SUBHEADER,
theme="light",
) -> str:
import requests # Defer expensive import
Expand Down
50 changes: 25 additions & 25 deletions test/test_align_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
"""

import os
import pathlib
import tempfile
from os.path import exists, join
from pathlib import Path
from unittest import main

from basic_test_case import BasicTestCase
Expand All @@ -30,7 +30,7 @@ class TestAlignCli(BasicTestCase):

def test_invoke_align(self):
"""Basic readalongs align invocation and some variants"""
output = join(self.tempdir, "output")
output = self.tempdir / "output"
with open("image-for-page1.jpg", "wb"):
pass
# Run align from plain text
Expand All @@ -50,44 +50,44 @@ def test_invoke_align(self):
join(self.data_dir, "sample-config.json"),
self.add_bom(join(self.data_dir, "ej-fra.txt")),
join(self.data_dir, "ej-fra.m4a"),
output,
str(output),
],
)
# print(results.output)
self.assertEqual(results.exit_code, 0)
expected_output_files = [
"output.readalong",
"output.m4a",
"index.html",
"www/output.readalong",
"www/output.m4a",
"www/index.html",
"output.TextGrid",
"output.eaf",
"output_sentences.srt",
"output_sentences.vtt",
"output_words.srt",
"output_words.vtt",
"readme.txt",
"www/readme.txt",
]
for f in expected_output_files:
self.assertTrue(
exists(join(output, f)), f"successful alignment should have created {f}"
(output / f).exists(), f"successful alignment should have created {f}"
)
with open(join(output, "index.html"), encoding="utf8") as f:
with open(output / "www/index.html", encoding="utf8") as f:
self.assertIn(
'<read-along href="output.readalong" audio="output.m4a"',
f.read(),
)
self.assertTrue(
exists(join(output, "tempfiles", "output.tokenized.readalong")),
(output / "tempfiles/output.tokenized.readalong").exists(),
"alignment with -s should have created tempfiles/output.tokenized.readalong",
)
with open(
join(output, "tempfiles", "output.tokenized.readalong"),
output / "tempfiles/output.tokenized.readalong",
"r",
encoding="utf-8",
) as f:
self.assertNotIn("\ufeff", f.read())
self.assertTrue(
exists(join(output, "assets", "image-for-page1.jpg")),
(output / "www/assets/image-for-page1.jpg").exists(),
"alignment with image files should have copied image-for-page1.jpg to assets",
)
self.assertIn("image-for-page2.jpg is accessible ", results.stdout)
Expand All @@ -99,9 +99,9 @@ def test_invoke_align(self):
# Move the alignment output to compare with further down
# We cannot just output to a different name because changing the output file name
# changes the contents of the output.
output1 = output + "1"
output1 = str(output) + "1"
os.rename(output, output1)
self.assertFalse(exists(output), "os.rename() should have moved dir")
self.assertFalse(output.exists(), "os.rename() should have moved dir")

# Run align again, but on an XML input file with various added DNA text
results_dna = self.runner.invoke(
Expand All @@ -116,22 +116,22 @@ def test_invoke_align(self):
join(self.data_dir, "sample-config.json"),
self.add_bom(join(self.data_dir, "ej-fra-dna.readalong")),
join(self.data_dir, "ej-fra.m4a"),
output,
str(output),
],
)
self.assertEqual(results_dna.exit_code, 0)
# print(results_dna.stdout)
self.assertTrue(
exists(join(output, "output.readalong")),
(output / "www/output.readalong").exists(),
"successful alignment with DNA should have created output.readalong",
)
self.assertTrue(
exists(join(output, "output.xhtml")),
(output / "output.xhtml").exists(),
"successful alignment with -o xhtml should have created output.xhtml",
)
self.assertIn("Please copy image-for-page1.jpg to ", results_dna.stdout)
self.assertFalse(
exists(join(output, "assets", "image-for-page1.jpg")),
(output / "www/assets/image-for-page1.jpg").exists(),
"image-for-page1.jpg was not on disk, cannot have been copied",
)
self.assertIn(
Expand All @@ -144,7 +144,7 @@ def test_invoke_align(self):
[
join(self.data_dir, "ej-fra-dna.readalong"),
join(self.data_dir, "ej-fra.m4a"),
output,
str(output),
],
)
self.assertNotEqual(results_output_exists.exit_code, 0)
Expand All @@ -158,7 +158,7 @@ def test_invoke_align(self):
[
join(self.data_dir, "ej-fra-dna.readalong"),
join(self.data_dir, "ej-fra.m4a"),
join(output, "output.readalong"),
str(output / "www/output.readalong"),
],
)
self.assertNotEqual(results_output_is_regular_file, 0)
Expand Down Expand Up @@ -187,11 +187,11 @@ def test_align_with_package(self):
# print(results_html.output)
self.assertEqual(results_html.exit_code, 0)
self.assertTrue(
exists(join(output, "html.html")),
"succesful html alignment should have created html/html.html",
exists(join(output, "Offline-HTML", "html.html")),
"succesful html alignment should have created html/Offline-HTML/html.html",
)

with open(join(output, "html.html"), "rb") as fhtml:
with open(join(output, "Offline-HTML", "html.html"), "rb") as fhtml:
path_bytes = fhtml.read()
htmldoc = fromstring(path_bytes)
b64_pattern = r"data:[\w\/\-\+]*;base64,\w*"
Expand All @@ -205,7 +205,7 @@ def test_align_with_package(self):
def not_test_permission_denied(self):
"""Non-portable test to make sure denied permission triggers an error -- disabled"""
# This test is not stable, just disable it.
# It apparently also does not work correctly on M1 Macs either, even in Docker.
# It apparently does not work correctly on M1 Macs either, even in Docker.

import platform

Expand Down Expand Up @@ -623,7 +623,7 @@ def slurp_text(filename, encoding):
self.assertNotEqual(slurp_bin(base_file), slurp_bin(bom_file))
self.assertEqual(b"\xef\xbb\xbf" + slurp_bin(base_file), slurp_bin(bom_file))

bom_file_pathlib = self.add_bom(pathlib.Path(base_file))
bom_file_pathlib = self.add_bom(Path(base_file))
self.assertEqual(
slurp_text(base_file, "utf-8"), slurp_text(bom_file_pathlib, "utf-8-sig")
)
Expand Down

0 comments on commit 1413271

Please sign in to comment.