Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: structure align output into www and Offline-HTML dirs #231

Merged
merged 3 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions docs/outputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ With that in mind, there are a variety of different output formats that can be c

## Simple web deployment

The default output generated by the CLI's `readalongs align` command and the `www` folder included in the Web Bundle download format from the [Web App](web-app.md) include everything you need to host your ReadAlong on your own server.
The `www` folder generated by the CLI's `readalongs align` command or included in the Web Bundle download format from the [Web App](web-app.md) includes everything you need to host your ReadAlong on your own server.

Copy the whole folder with all its contents to your web server, and that should work as is on most web servers. The `index.html` provided will generate a page that shows only your readalong.

The elements of the `index.html` can also be copied into a larger page with other contents, or multiple readalongs -- see [Web Component details](#web-component-details) below.
The elements of the `index.html` can also be copied into a larger page with other contents or multiple readalongs -- see [Web Component details](#web-component-details) below.

## WordPress deployment

Expand Down Expand Up @@ -47,14 +47,15 @@ Below is an example of a minimal implementation in a basic standalone html page.
The above assumes the following structure, which is the default provided in the Web Bundle generated from the web app:

```txt
web
www
joanise marked this conversation as resolved.
Show resolved Hide resolved
├── assets
│ ├── sample.wav
│ └── sample.readalong
└── index.html
├── index.html
└── readme.txt
```

Then you can host your site anywhere, or run it locally (`cd web && python3 -m http.server` for example)
Then you can host your site anywhere, or run it locally (`cd www && python3 -m http.server` for example)

## Single-file HTML

Expand Down
43 changes: 28 additions & 15 deletions readalongs/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@
from readalongs.text.convert_xml import convert_xml
from readalongs.text.make_dict import make_dict
from readalongs.text.make_fsg import make_fsg
from readalongs.text.make_package import create_web_component_html
from readalongs.text.make_package import (
DEFAULT_HEADER,
DEFAULT_SUBHEADER,
DEFAULT_TITLE,
create_web_component_html,
)
from readalongs.text.tokenize_xml import tokenize_xml
from readalongs.text.util import (
get_word_text,
Expand Down Expand Up @@ -941,7 +946,12 @@ def save_readalong(
output_formats=output_formats,
)

ras_path = output_base + ".readalong"
bundle_path = os.path.join(output_dir, "www")
if not os.path.exists(bundle_path):
os.mkdir(bundle_path)
bundle_base = os.path.join(bundle_path, output_basename)

ras_path = bundle_base + ".readalong"
save_xml(ras_path, align_results["tokenized"])

if "xhtml" in output_formats:
Expand All @@ -950,41 +960,44 @@ def save_readalong(
save_xml(tokenized_xhtml_path, align_results["tokenized"])

audio_path = save_audio(
audiofile=audiofile, output_base=output_base, audiosegment=audiosegment
audiofile=audiofile, output_base=bundle_base, audiosegment=audiosegment
)

if "html" in output_formats:
html_out_path = output_base + ".html"
offline_html_dir = os.path.join(output_dir, "Offline-HTML")
html_out_path = os.path.join(offline_html_dir, output_basename + ".html")
joanise marked this conversation as resolved.
Show resolved Hide resolved
html_out = create_web_component_html(
ras_path,
audio_path,
config.get("title", "Title goes here"),
config.get("header", "Header goes here"),
config.get("subheader", ""),
config.get("title", DEFAULT_TITLE),
config.get("header", DEFAULT_HEADER),
config.get("subheader", DEFAULT_SUBHEADER),
config.get("theme", "light"),
)
if not os.path.exists(offline_html_dir):
os.mkdir(offline_html_dir)
with open(html_out_path, "w", encoding="utf-8") as f:
f.write(html_out)

save_minimal_index_html(
os.path.join(output_dir, "index.html"),
os.path.join(bundle_path, "index.html"),
os.path.basename(ras_path),
os.path.basename(audio_path),
config.get("title", "Title goes here"),
config.get("header", "Header goes here"),
config.get("subheader", ""),
config.get("title", DEFAULT_TITLE),
config.get("header", DEFAULT_HEADER),
config.get("subheader", DEFAULT_SUBHEADER),
config.get("theme", "light"),
)

# Copy the image files to the output's asset directory, if any are found
if "images" in config:
save_images(config=config, output_dir=output_dir)
save_images(config=config, output_dir=bundle_path)
save_readme_txt(
os.path.join(output_dir, "readme.txt"),
os.path.join(bundle_path, "readme.txt"),
os.path.basename(ras_path),
os.path.basename(audio_path),
config.get("header", "Header goes here"),
config.get("subheader", ""),
config.get("header", DEFAULT_HEADER),
config.get("subheader", DEFAULT_SUBHEADER),
config.get("theme", "light"),
)

Expand Down
11 changes: 8 additions & 3 deletions readalongs/text/make_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@
"""


DEFAULT_TITLE = "ReadAlong-Studio for Interactive Storytelling"
DEFAULT_HEADER = "Your read-along title goes here"
DEFAULT_SUBHEADER = "Your read-along subtitle goes here"


def encode_from_path(path: str) -> str:
"""Encode file from bytes to b64 string with data and mime signature

Expand Down Expand Up @@ -115,9 +120,9 @@ def encode_from_path(path: str) -> str:
def create_web_component_html(
ras_path: str,
audio_path: str,
title="Title goes here",
header="Header goes here",
subheader="Subheader goes here",
title=DEFAULT_TITLE,
header=DEFAULT_HEADER,
subheader=DEFAULT_SUBHEADER,
theme="light",
) -> str:
import requests # Defer expensive import
Expand Down
3 changes: 2 additions & 1 deletion readalongs/text/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from collections import OrderedDict
from datetime import datetime
from io import TextIOWrapper
from pathlib import Path
from typing import IO, Union
from unicodedata import normalize

Expand Down Expand Up @@ -104,7 +105,7 @@ def is_do_not_align(element):
return dna in ("true", "True", "TRUE", "1")


def load_xml(input_path: Union[str, IO]) -> etree.ElementTree:
def load_xml(input_path: Union[str, Path, IO]) -> etree.ElementTree:
"""Safely load an XML file with etree.parse to respect encoding

Return: the root of the XML etree
Expand Down
50 changes: 25 additions & 25 deletions test/test_align_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
"""

import os
import pathlib
import tempfile
from os.path import exists, join
from pathlib import Path
from unittest import main

from basic_test_case import BasicTestCase
Expand All @@ -30,7 +30,7 @@ class TestAlignCli(BasicTestCase):

def test_invoke_align(self):
"""Basic readalongs align invocation and some variants"""
output = join(self.tempdir, "output")
output = self.tempdir / "output"
with open("image-for-page1.jpg", "wb"):
pass
# Run align from plain text
Expand All @@ -50,44 +50,44 @@ def test_invoke_align(self):
join(self.data_dir, "sample-config.json"),
self.add_bom(join(self.data_dir, "ej-fra.txt")),
join(self.data_dir, "ej-fra.m4a"),
output,
str(output),
],
)
# print(results.output)
self.assertEqual(results.exit_code, 0)
expected_output_files = [
"output.readalong",
"output.m4a",
"index.html",
"www/output.readalong",
"www/output.m4a",
"www/index.html",
"output.TextGrid",
"output.eaf",
"output_sentences.srt",
"output_sentences.vtt",
"output_words.srt",
"output_words.vtt",
"readme.txt",
"www/readme.txt",
]
for f in expected_output_files:
self.assertTrue(
exists(join(output, f)), f"successful alignment should have created {f}"
(output / f).exists(), f"successful alignment should have created {f}"
)
with open(join(output, "index.html"), encoding="utf8") as f:
with open(output / "www/index.html", encoding="utf8") as f:
self.assertIn(
'<read-along href="output.readalong" audio="output.m4a"',
f.read(),
)
self.assertTrue(
exists(join(output, "tempfiles", "output.tokenized.readalong")),
(output / "tempfiles/output.tokenized.readalong").exists(),
"alignment with -s should have created tempfiles/output.tokenized.readalong",
)
with open(
join(output, "tempfiles", "output.tokenized.readalong"),
output / "tempfiles/output.tokenized.readalong",
"r",
encoding="utf-8",
) as f:
self.assertNotIn("\ufeff", f.read())
self.assertTrue(
exists(join(output, "assets", "image-for-page1.jpg")),
(output / "www/assets/image-for-page1.jpg").exists(),
"alignment with image files should have copied image-for-page1.jpg to assets",
)
self.assertIn("image-for-page2.jpg is accessible ", results.stdout)
Expand All @@ -99,9 +99,9 @@ def test_invoke_align(self):
# Move the alignment output to compare with further down
# We cannot just output to a different name because changing the output file name
# changes the contents of the output.
output1 = output + "1"
output1 = str(output) + "1"
os.rename(output, output1)
self.assertFalse(exists(output), "os.rename() should have moved dir")
self.assertFalse(output.exists(), "os.rename() should have moved dir")

# Run align again, but on an XML input file with various added DNA text
results_dna = self.runner.invoke(
Expand All @@ -116,22 +116,22 @@ def test_invoke_align(self):
join(self.data_dir, "sample-config.json"),
self.add_bom(join(self.data_dir, "ej-fra-dna.readalong")),
join(self.data_dir, "ej-fra.m4a"),
output,
str(output),
],
)
self.assertEqual(results_dna.exit_code, 0)
# print(results_dna.stdout)
self.assertTrue(
exists(join(output, "output.readalong")),
(output / "www/output.readalong").exists(),
"successful alignment with DNA should have created output.readalong",
)
self.assertTrue(
exists(join(output, "output.xhtml")),
(output / "output.xhtml").exists(),
"successful alignment with -o xhtml should have created output.xhtml",
)
self.assertIn("Please copy image-for-page1.jpg to ", results_dna.stdout)
self.assertFalse(
exists(join(output, "assets", "image-for-page1.jpg")),
(output / "www/assets/image-for-page1.jpg").exists(),
"image-for-page1.jpg was not on disk, cannot have been copied",
)
self.assertIn(
Expand All @@ -144,7 +144,7 @@ def test_invoke_align(self):
[
join(self.data_dir, "ej-fra-dna.readalong"),
join(self.data_dir, "ej-fra.m4a"),
output,
str(output),
],
)
self.assertNotEqual(results_output_exists.exit_code, 0)
Expand All @@ -158,7 +158,7 @@ def test_invoke_align(self):
[
join(self.data_dir, "ej-fra-dna.readalong"),
join(self.data_dir, "ej-fra.m4a"),
join(output, "output.readalong"),
str(output / "www/output.readalong"),
],
)
self.assertNotEqual(results_output_is_regular_file, 0)
Expand Down Expand Up @@ -187,11 +187,11 @@ def test_align_with_package(self):
# print(results_html.output)
self.assertEqual(results_html.exit_code, 0)
self.assertTrue(
exists(join(output, "html.html")),
"succesful html alignment should have created html/html.html",
exists(join(output, "Offline-HTML", "html.html")),
"successful html alignment should have created html/Offline-HTML/html.html",
)

with open(join(output, "html.html"), "rb") as fhtml:
with open(join(output, "Offline-HTML", "html.html"), "rb") as fhtml:
path_bytes = fhtml.read()
htmldoc = fromstring(path_bytes)
b64_pattern = r"data:[\w\/\-\+]*;base64,\w*"
Expand All @@ -205,7 +205,7 @@ def test_align_with_package(self):
def not_test_permission_denied(self):
"""Non-portable test to make sure denied permission triggers an error -- disabled"""
# This test is not stable, just disable it.
# It apparently also does not work correctly on M1 Macs either, even in Docker.
# It apparently does not work correctly on M1 Macs either, even in Docker.

import platform

Expand Down Expand Up @@ -623,7 +623,7 @@ def slurp_text(filename, encoding):
self.assertNotEqual(slurp_bin(base_file), slurp_bin(bom_file))
self.assertEqual(b"\xef\xbb\xbf" + slurp_bin(base_file), slurp_bin(bom_file))

bom_file_pathlib = self.add_bom(pathlib.Path(base_file))
bom_file_pathlib = self.add_bom(Path(base_file))
self.assertEqual(
slurp_text(base_file, "utf-8"), slurp_text(bom_file_pathlib, "utf-8-sig")
)
Expand Down
8 changes: 4 additions & 4 deletions test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ def test_call_align(self):
self.assertTrue(exception is None)
self.assertIn("Words (<w>) not present; tokenizing", log)
expected_output_files = (
"output.readalong",
"output.m4a",
"www/output.readalong",
"www/output.m4a",
"output.TextGrid",
"output_sentences.srt",
"output_words.srt",
"index.html",
"output.html",
"www/index.html",
"Offline-HTML/output.html",
)
for f in expected_output_files:
self.assertTrue(
Expand Down
6 changes: 3 additions & 3 deletions test/test_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_align_sample(self):
if process.returncode != 0:
LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
# Check Result
raspath = Path(output_path)
raspath = Path(output_path) / "www"
ras_files = raspath.glob("*.readalong")
self.assertTrue(
next(ras_files, False),
Expand All @@ -108,7 +108,7 @@ def test_align_removed(self):
if process.returncode != 0:
LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
# Check Result
raspath = Path(output_path)
raspath = Path(output_path) / "www"
ras_files = raspath.glob("*.readalong")
self.assertTrue(
next(ras_files, False),
Expand All @@ -135,7 +135,7 @@ def test_align_muted(self):
if process.returncode != 0:
LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
# Check Result
raspath = Path(output_path)
raspath = Path(output_path) / "www"
ras_files = raspath.glob("*.readalong")
self.assertTrue(
next(ras_files, False),
Expand Down
Loading
Loading