ReadAlongs · joanise · Jul 15, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 15, 2024
diff --git a/docs/outputs.md b/docs/outputs.md
@@ -5,11 +5,11 @@ With that in mind, there are a variety of different output formats that can be c
 
 ## Simple web deployment
 
-The default output generated by the CLI's `readalongs align` command and the `www` folder included in the Web Bundle download format from the [Web App](web-app.md) include everything you need to host your ReadAlong on your own server.
+The `www` folder generated by the CLI's `readalongs align` command or included in the Web Bundle download format from the [Web App](web-app.md) includes everything you need to host your ReadAlong on your own server.
 
 Copy the whole folder with all its contents to your web server, and that should work as is on most web servers. The `index.html` provided will generate a page that shows only your readalong.
 
-The elements of the `index.html` can also be copied into a larger page with other contents, or multiple readalongs -- see [Web Component details](#web-component-details) below.
+The elements of the `index.html` can also be copied into a larger page with other contents or multiple readalongs -- see [Web Component details](#web-component-details) below.
 
 ## WordPress deployment
 
@@ -47,14 +47,15 @@ Below is an example of a minimal implementation in a basic standalone html page.
 The above assumes the following structure, which is the default provided in the Web Bundle generated from the web app:
 
 ```txt
-web
+www
 ├── assets
 │   ├── sample.wav
 │   └── sample.readalong
-└── index.html
+├── index.html
+└── readme.txt
 ```
 
-Then you can host your site anywhere, or run it locally (`cd web && python3 -m http.server` for example)
+Then you can host your site anywhere, or run it locally (`cd www && python3 -m http.server` for example)
 
 ## Single-file HTML
 

diff --git a/readalongs/align.py b/readalongs/align.py
@@ -40,7 +40,12 @@
 from readalongs.text.convert_xml import convert_xml
 from readalongs.text.make_dict import make_dict
 from readalongs.text.make_fsg import make_fsg
-from readalongs.text.make_package import create_web_component_html
+from readalongs.text.make_package import (
+    DEFAULT_HEADER,
+    DEFAULT_SUBHEADER,
+    DEFAULT_TITLE,
+    create_web_component_html,
+)
 from readalongs.text.tokenize_xml import tokenize_xml
 from readalongs.text.util import (
     get_word_text,
@@ -941,7 +946,12 @@ def save_readalong(
             output_formats=output_formats,
         )
 
-    ras_path = output_base + ".readalong"
+    bundle_path = os.path.join(output_dir, "www")
+    if not os.path.exists(bundle_path):
+        os.mkdir(bundle_path)
+    bundle_base = os.path.join(bundle_path, output_basename)
+
+    ras_path = bundle_base + ".readalong"
     save_xml(ras_path, align_results["tokenized"])
 
     if "xhtml" in output_formats:
@@ -950,41 +960,44 @@ def save_readalong(
         save_xml(tokenized_xhtml_path, align_results["tokenized"])
 
     audio_path = save_audio(
-        audiofile=audiofile, output_base=output_base, audiosegment=audiosegment
+        audiofile=audiofile, output_base=bundle_base, audiosegment=audiosegment
     )
 
     if "html" in output_formats:
-        html_out_path = output_base + ".html"
+        offline_html_dir = os.path.join(output_dir, "Offline-HTML")
+        html_out_path = os.path.join(offline_html_dir, output_basename + ".html")
         html_out = create_web_component_html(
             ras_path,
             audio_path,
-            config.get("title", "Title goes here"),
-            config.get("header", "Header goes here"),
-            config.get("subheader", ""),
+            config.get("title", DEFAULT_TITLE),
+            config.get("header", DEFAULT_HEADER),
+            config.get("subheader", DEFAULT_SUBHEADER),
             config.get("theme", "light"),
         )
+        if not os.path.exists(offline_html_dir):
+            os.mkdir(offline_html_dir)
         with open(html_out_path, "w", encoding="utf-8") as f:
             f.write(html_out)
 
     save_minimal_index_html(
-        os.path.join(output_dir, "index.html"),
+        os.path.join(bundle_path, "index.html"),
         os.path.basename(ras_path),
         os.path.basename(audio_path),
-        config.get("title", "Title goes here"),
-        config.get("header", "Header goes here"),
-        config.get("subheader", ""),
+        config.get("title", DEFAULT_TITLE),
+        config.get("header", DEFAULT_HEADER),
+        config.get("subheader", DEFAULT_SUBHEADER),
         config.get("theme", "light"),
     )
 
     # Copy the image files to the output's asset directory, if any are found
     if "images" in config:
-        save_images(config=config, output_dir=output_dir)
+        save_images(config=config, output_dir=bundle_path)
     save_readme_txt(
-        os.path.join(output_dir, "readme.txt"),
+        os.path.join(bundle_path, "readme.txt"),
         os.path.basename(ras_path),
         os.path.basename(audio_path),
-        config.get("header", "Header goes here"),
-        config.get("subheader", ""),
+        config.get("header", DEFAULT_HEADER),
+        config.get("subheader", DEFAULT_SUBHEADER),
         config.get("theme", "light"),
     )
 

diff --git a/readalongs/text/make_package.py b/readalongs/text/make_package.py
@@ -52,6 +52,11 @@
 """
 
 
+DEFAULT_TITLE = "ReadAlong-Studio for Interactive Storytelling"
+DEFAULT_HEADER = "Your read-along title goes here"
+DEFAULT_SUBHEADER = "Your read-along subtitle goes here"
+
+
 def encode_from_path(path: str) -> str:
     """Encode file from bytes to b64 string with data and mime signature
 
@@ -115,9 +120,9 @@ def encode_from_path(path: str) -> str:
 def create_web_component_html(
     ras_path: str,
     audio_path: str,
-    title="Title goes here",
-    header="Header goes here",
-    subheader="Subheader goes here",
+    title=DEFAULT_TITLE,
+    header=DEFAULT_HEADER,
+    subheader=DEFAULT_SUBHEADER,
     theme="light",
 ) -> str:
     import requests  # Defer expensive import

diff --git a/readalongs/text/util.py b/readalongs/text/util.py
@@ -14,6 +14,7 @@
 from collections import OrderedDict
 from datetime import datetime
 from io import TextIOWrapper
+from pathlib import Path
 from typing import IO, Union
 from unicodedata import normalize
 
@@ -104,7 +105,7 @@ def is_do_not_align(element):
     return dna in ("true", "True", "TRUE", "1")
 
 
-def load_xml(input_path: Union[str, IO]) -> etree.ElementTree:
+def load_xml(input_path: Union[str, Path, IO]) -> etree.ElementTree:
     """Safely load an XML file with etree.parse to respect encoding
 
     Return: the root of the XML etree

diff --git a/test/test_align_cli.py b/test/test_align_cli.py
@@ -5,9 +5,9 @@
 """
 
 import os
-import pathlib
 import tempfile
 from os.path import exists, join
+from pathlib import Path
 from unittest import main
 
 from basic_test_case import BasicTestCase
@@ -30,7 +30,7 @@ class TestAlignCli(BasicTestCase):
 
     def test_invoke_align(self):
         """Basic readalongs align invocation and some variants"""
-        output = join(self.tempdir, "output")
+        output = self.tempdir / "output"
         with open("image-for-page1.jpg", "wb"):
             pass
         # Run align from plain text
@@ -50,44 +50,44 @@ def test_invoke_align(self):
                 join(self.data_dir, "sample-config.json"),
                 self.add_bom(join(self.data_dir, "ej-fra.txt")),
                 join(self.data_dir, "ej-fra.m4a"),
-                output,
+                str(output),
             ],
         )
         # print(results.output)
         self.assertEqual(results.exit_code, 0)
         expected_output_files = [
-            "output.readalong",
-            "output.m4a",
-            "index.html",
+            "www/output.readalong",
+            "www/output.m4a",
+            "www/index.html",
             "output.TextGrid",
             "output.eaf",
             "output_sentences.srt",
             "output_sentences.vtt",
             "output_words.srt",
             "output_words.vtt",
-            "readme.txt",
+            "www/readme.txt",
         ]
         for f in expected_output_files:
             self.assertTrue(
-                exists(join(output, f)), f"successful alignment should have created {f}"
+                (output / f).exists(), f"successful alignment should have created {f}"
             )
-        with open(join(output, "index.html"), encoding="utf8") as f:
+        with open(output / "www/index.html", encoding="utf8") as f:
             self.assertIn(
                 '<read-along href="output.readalong" audio="output.m4a"',
                 f.read(),
             )
         self.assertTrue(
-            exists(join(output, "tempfiles", "output.tokenized.readalong")),
+            (output / "tempfiles/output.tokenized.readalong").exists(),
             "alignment with -s should have created tempfiles/output.tokenized.readalong",
         )
         with open(
-            join(output, "tempfiles", "output.tokenized.readalong"),
+            output / "tempfiles/output.tokenized.readalong",
             "r",
             encoding="utf-8",
         ) as f:
             self.assertNotIn("\ufeff", f.read())
         self.assertTrue(
-            exists(join(output, "assets", "image-for-page1.jpg")),
+            (output / "www/assets/image-for-page1.jpg").exists(),
             "alignment with image files should have copied image-for-page1.jpg to assets",
         )
         self.assertIn("image-for-page2.jpg is accessible ", results.stdout)
@@ -99,9 +99,9 @@ def test_invoke_align(self):
         # Move the alignment output to compare with further down
         # We cannot just output to a different name because changing the output file name
         # changes the contents of the output.
-        output1 = output + "1"
+        output1 = str(output) + "1"
         os.rename(output, output1)
-        self.assertFalse(exists(output), "os.rename() should have moved dir")
+        self.assertFalse(output.exists(), "os.rename() should have moved dir")
 
         # Run align again, but on an XML input file with various added DNA text
         results_dna = self.runner.invoke(
@@ -116,22 +116,22 @@ def test_invoke_align(self):
                 join(self.data_dir, "sample-config.json"),
                 self.add_bom(join(self.data_dir, "ej-fra-dna.readalong")),
                 join(self.data_dir, "ej-fra.m4a"),
-                output,
+                str(output),
             ],
         )
         self.assertEqual(results_dna.exit_code, 0)
         # print(results_dna.stdout)
         self.assertTrue(
-            exists(join(output, "output.readalong")),
+            (output / "www/output.readalong").exists(),
             "successful alignment with DNA should have created output.readalong",
         )
         self.assertTrue(
-            exists(join(output, "output.xhtml")),
+            (output / "output.xhtml").exists(),
             "successful alignment with -o xhtml should have created output.xhtml",
         )
         self.assertIn("Please copy image-for-page1.jpg to ", results_dna.stdout)
         self.assertFalse(
-            exists(join(output, "assets", "image-for-page1.jpg")),
+            (output / "www/assets/image-for-page1.jpg").exists(),
             "image-for-page1.jpg was not on disk, cannot have been copied",
         )
         self.assertIn(
@@ -144,7 +144,7 @@ def test_invoke_align(self):
             [
                 join(self.data_dir, "ej-fra-dna.readalong"),
                 join(self.data_dir, "ej-fra.m4a"),
-                output,
+                str(output),
             ],
         )
         self.assertNotEqual(results_output_exists.exit_code, 0)
@@ -158,7 +158,7 @@ def test_invoke_align(self):
             [
                 join(self.data_dir, "ej-fra-dna.readalong"),
                 join(self.data_dir, "ej-fra.m4a"),
-                join(output, "output.readalong"),
+                str(output / "www/output.readalong"),
             ],
         )
         self.assertNotEqual(results_output_is_regular_file, 0)
@@ -187,11 +187,11 @@ def test_align_with_package(self):
         # print(results_html.output)
         self.assertEqual(results_html.exit_code, 0)
         self.assertTrue(
-            exists(join(output, "html.html")),
-            "succesful html alignment should have created html/html.html",
+            exists(join(output, "Offline-HTML", "html.html")),
+            "successful html alignment should have created html/Offline-HTML/html.html",
         )
 
-        with open(join(output, "html.html"), "rb") as fhtml:
+        with open(join(output, "Offline-HTML", "html.html"), "rb") as fhtml:
             path_bytes = fhtml.read()
         htmldoc = fromstring(path_bytes)
         b64_pattern = r"data:[\w\/\-\+]*;base64,\w*"
@@ -205,7 +205,7 @@ def test_align_with_package(self):
     def not_test_permission_denied(self):
         """Non-portable test to make sure denied permission triggers an error -- disabled"""
         # This test is not stable, just disable it.
-        # It apparently also does not work correctly on M1 Macs either, even in Docker.
+        # It apparently does not work correctly on M1 Macs either, even in Docker.
 
         import platform
 
@@ -623,7 +623,7 @@ def slurp_text(filename, encoding):
         self.assertNotEqual(slurp_bin(base_file), slurp_bin(bom_file))
         self.assertEqual(b"\xef\xbb\xbf" + slurp_bin(base_file), slurp_bin(bom_file))
 
-        bom_file_pathlib = self.add_bom(pathlib.Path(base_file))
+        bom_file_pathlib = self.add_bom(Path(base_file))
         self.assertEqual(
             slurp_text(base_file, "utf-8"), slurp_text(bom_file_pathlib, "utf-8-sig")
         )

diff --git a/test/test_api.py b/test/test_api.py
@@ -34,13 +34,13 @@ def test_call_align(self):
         self.assertTrue(exception is None)
         self.assertIn("Words (<w>) not present; tokenizing", log)
         expected_output_files = (
-            "output.readalong",
-            "output.m4a",
+            "www/output.readalong",
+            "www/output.m4a",
             "output.TextGrid",
             "output_sentences.srt",
             "output_words.srt",
-            "index.html",
-            "output.html",
+            "www/index.html",
+            "Offline-HTML/output.html",
         )
         for f in expected_output_files:
             self.assertTrue(

diff --git a/test/test_audio.py b/test/test_audio.py
@@ -81,7 +81,7 @@ def test_align_sample(self):
         if process.returncode != 0:
             LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
         # Check Result
-        raspath = Path(output_path)
+        raspath = Path(output_path) / "www"
         ras_files = raspath.glob("*.readalong")
         self.assertTrue(
             next(ras_files, False),
@@ -108,7 +108,7 @@ def test_align_removed(self):
         if process.returncode != 0:
             LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
         # Check Result
-        raspath = Path(output_path)
+        raspath = Path(output_path) / "www"
         ras_files = raspath.glob("*.readalong")
         self.assertTrue(
             next(ras_files, False),
@@ -135,7 +135,7 @@ def test_align_muted(self):
         if process.returncode != 0:
             LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
         # Check Result
-        raspath = Path(output_path)
+        raspath = Path(output_path) / "www"
         ras_files = raspath.glob("*.readalong")
         self.assertTrue(
             next(ras_files, False),