From 42d886b7deddd8fdbbcfc94ed4444c9c071b24f5 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Wed, 31 Jan 2024 15:27:13 -0500 Subject: [PATCH 1/6] test: make test/run.py DRYer, make dev the default suite, and optimize a bit While we do not need to really optimize test suites, test_web_api.py load time was annoying me while testing run.py, so I tucked its expensive imports away. --- test/run.py | 26 ++++++++------ test/test_web_api.py | 86 +++++++++++++++++++++++++++----------------- 2 files changed, 69 insertions(+), 43 deletions(-) diff --git a/test/run.py b/test/run.py index 456af8ef..51dcf71c 100755 --- a/test/run.py +++ b/test/run.py @@ -94,16 +94,23 @@ def describe_suite(suite: TestSuite): ) +SUITES = ["all", "dev", "e2e", "prod", "api", "other"] + + def run_tests(suite: str, describe: bool = False) -> bool: """Run the specified test suite. Args: - suite: one of "all", "dev", etc specifying which suite to run + suite: one of SUITES, "dev" if the empty string describe: if True, list all the test cases instead of running them. Returns: True iff success """ + if not suite: + LOGGER.info("No test suite specified, defaulting to dev.") + suite = "dev" + if suite == "e2e": test_suite = TestSuite(e2e_tests) elif suite == "api": @@ -116,8 +123,7 @@ def run_tests(suite: str, describe: bool = False) -> bool: test_suite = TestSuite(other_tests) else: LOGGER.error( - "Sorry, you need to select a Test Suite to run, one of: " - "dev, all (or prod), e2e, other" + "Sorry, you need to select a Test Suite to run, one of: " + " ".join(SUITES) ) return False @@ -126,7 +132,10 @@ def run_tests(suite: str, describe: bool = False) -> bool: return True else: runner = TextTestRunner(verbosity=3) - return runner.run(test_suite).wasSuccessful() + success = runner.run(test_suite).wasSuccessful() + if not success: + LOGGER.error("Some tests failed. Please see log above.") + return success if __name__ == "__main__": @@ -134,11 +143,6 @@ def run_tests(suite: str, describe: bool = False) -> bool: if describe: sys.argv.remove("--describe") - try: - result = run_tests(sys.argv[1], describe) - if not result: - LOGGER.error("Some tests failed. Please see log above.") - sys.exit(1) - except IndexError: - LOGGER.error("Please specify a test suite to run: i.e. 'dev' or 'all'") + result = run_tests("" if len(sys.argv) <= 1 else sys.argv[1], describe) + if not result: sys.exit(1) diff --git a/test/test_web_api.py b/test/test_web_api.py index 5115b378..b927709a 100755 --- a/test/test_web_api.py +++ b/test/test_web_api.py @@ -6,7 +6,6 @@ from unittest import main from basic_test_case import BasicTestCase -from fastapi.testclient import TestClient from lxml import etree from readalongs.log import LOGGER @@ -14,12 +13,21 @@ from readalongs.text.convert_xml import convert_xml from readalongs.text.tokenize_xml import tokenize_xml from readalongs.util import get_langs -from readalongs.web_api import OutputFormat, create_grammar, web_api_app - -API_CLIENT = TestClient(web_api_app) class TestWebApi(BasicTestCase): + _API_CLIENT = None + + @property + def API_CLIENT(self): + from fastapi.testclient import TestClient + + from readalongs.web_api import web_api_app + + if TestWebApi._API_CLIENT is None: + TestWebApi._API_CLIENT = TestClient(web_api_app) + return TestWebApi._API_CLIENT + def slurp_data_file(self, filename: str) -> str: """Convenience function to slurp a whole file in self.data_dir""" with open(os.path.join(self.data_dir, filename), encoding="utf8") as f: @@ -32,17 +40,17 @@ def test_assemble_from_plain_text(self): "type": "text/plain", "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 200) def test_bad_path(self): # Test a request to a path that doesn't exist - response = API_CLIENT.get("/pathdoesntexist") + response = self.API_CLIENT.get("/pathdoesntexist") self.assertEqual(response.status_code, 404) def test_bad_method(self): # Test a request to a valid path with a bad method - response = API_CLIENT.get("/api/v1/assemble") + response = self.API_CLIENT.get("/api/v1/assemble") self.assertEqual(response.status_code, 405) def test_assemble_from_xml(self): @@ -53,7 +61,7 @@ def test_assemble_from_xml(self): "type": "application/readalong+xml", "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 200) def test_illformed_xml(self): @@ -63,7 +71,7 @@ def test_illformed_xml(self): "type": "application/readalong+xml", "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 422) def test_invalid_ras(self): @@ -73,7 +81,7 @@ def test_invalid_ras(self): "type": "application/readalong+xml", "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 422) def test_create_grammar(self): @@ -84,6 +92,8 @@ def test_create_grammar(self): tokenized = tokenize_xml(parsed) ids_added = add_ids(tokenized) g2ped, valid = convert_xml(ids_added) + from readalongs.web_api import create_grammar + word_dict, text = create_grammar(g2ped) self.assertTrue(valid) self.assertEqual(len(word_dict), len(text.split())) @@ -96,7 +106,7 @@ def test_bad_g2p(self): "type": "text/plain", "text_languages": ["test"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertIn("No language called", response.json()["detail"]) self.assertEqual(response.status_code, 422) @@ -107,7 +117,7 @@ def test_g2p_faiture(self): "type": "text/plain", "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 422) content = response.json() self.assertIn("No valid g2p conversion", content["detail"]) @@ -119,7 +129,7 @@ def test_no_words(self): "type": "text/plain", "text_languages": ["eng"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 422) content = response.json() self.assertIn("Could not find any words", content["detail"]) @@ -132,7 +142,7 @@ def test_empty_g2p(self): "type": "text/plain", "text_languages": ["eng", "und"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) self.assertEqual(response.status_code, 422) content_log = response.json()["detail"] for message_part in ["The output of the g2p process", "24", "23", "is empty"]: @@ -140,7 +150,7 @@ def test_empty_g2p(self): def test_langs(self): # Test the langs endpoint - response = API_CLIENT.get("/api/v1/langs") + response = self.API_CLIENT.get("/api/v1/langs") codes = [x["code"] for x in response.json()] self.assertEqual(set(codes), set(get_langs()[0])) self.assertEqual(codes, list(sorted(codes))) @@ -156,7 +166,7 @@ def test_logs(self): "debug": True, "text_languages": ["fra", "und"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) content = response.json() # print("Content", content) self.assertIn('Could not g2p "ña" as French', content["log"]) @@ -169,7 +179,7 @@ def test_debug(self): "debug": True, "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) content = response.json() self.assertEqual(content["input"], request) self.assertGreater(len(content["tokenized"]), 10) @@ -182,7 +192,7 @@ def test_debug(self): "type": "text/plain", "text_languages": ["fra"], } - response = API_CLIENT.post("/api/v1/assemble", json=request) + response = self.API_CLIENT.post("/api/v1/assemble", json=request) content = response.json() self.assertIsNone(content["input"]) self.assertIsNone(content["tokenized"]) @@ -214,7 +224,9 @@ def test_convert_to_TextGrid(self): "dur": 83.1, "ras": self.hej_verden_xml, } - response = API_CLIENT.post("/api/v1/convert_alignment/textgrid", json=request) + response = self.API_CLIENT.post( + "/api/v1/convert_alignment/textgrid", json=request + ) self.assertEqual(response.status_code, 200) self.assertIn("aligned.TextGrid", response.headers["content-disposition"]) self.assertEqual( @@ -276,7 +288,9 @@ class = "IntervalTier" request = { "ras": self.hej_verden_xml, } - response = API_CLIENT.post("/api/v1/convert_alignment/textgrid", json=request) + response = self.API_CLIENT.post( + "/api/v1/convert_alignment/textgrid", json=request + ) self.assertEqual(response.status_code, 200) self.assertIn("aligned.TextGrid", response.headers["content-disposition"]) self.assertNotIn("xmax = 83.100000", response.text) @@ -286,7 +300,7 @@ def test_convert_to_eaf(self): "dur": 83.1, "ras": self.hej_verden_xml, } - response = API_CLIENT.post("/api/v1/convert_alignment/eaf", json=request) + response = self.API_CLIENT.post("/api/v1/convert_alignment/eaf", json=request) self.assertEqual(response.status_code, 200) self.assertIn(" Date: Thu, 1 Feb 2024 12:27:01 -0500 Subject: [PATCH 2/6] fix: bump fastapi to minimum Pydantic v2 compatible version --- requirements.min.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.min.txt b/requirements.min.txt index 56b67dca..f54b21d3 100644 --- a/requirements.min.txt +++ b/requirements.min.txt @@ -5,8 +5,7 @@ click>=8.0.4 coloredlogs>=10.0 # Need to install this specifically from git as the sdist is broken editdistance @ git+https://github.com/roy-ht/editdistance.git@v0.6.2 ; python_version == '3.12' -fastapi>=0.78.0 -# Note: when we move g2p to >=2, we will need fastapi>=0.100.0 which requires httpx +fastapi>=0.100.0 g2p>=1.1.20230822, <2.1 httpx>=0.24.1 lxml==4.9.4 From acf2c6664f19ed37410710259298c1fc7be290da Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Thu, 1 Feb 2024 13:25:27 -0500 Subject: [PATCH 3/6] ci: bump actions to node20 version since node16 is deprecated --- .github/workflows/matrix-tests.yml | 4 ++-- .github/workflows/tests.yml | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/matrix-tests.yml b/.github/workflows/matrix-tests.yml index 7e7160ec..c784b613 100644 --- a/.github/workflows/matrix-tests.yml +++ b/.github/workflows/matrix-tests.yml @@ -13,10 +13,10 @@ jobs: python-version: ["3.8", "3.9", "3.10", "3.11"] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: FedericoCarboni/setup-ffmpeg@v3 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install Python dependencies diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dcdfba9c..13693ad3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,12 +9,12 @@ jobs: # #no-ci in the commit log flags commit we don't want CI-validated if: ${{ !contains(github.event.head_commit.message, '#no-ci') }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: FedericoCarboni/setup-ffmpeg@v3 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.7" cache: "pip" @@ -46,7 +46,7 @@ jobs: # Legal check: make sure we don't have or introduce GPL dependencies if pip-licenses | grep -v 'Artistic License' | grep -v LGPL | grep GNU; then echo 'Please avoid introducing *GPL dependencies'; false; fi - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: directory: ./test token: ${{ secrets.CODECOV_TOKEN }} # optional but apparently makes upload more reliable @@ -82,12 +82,12 @@ jobs: runs-on: windows-latest if: ${{ !contains(github.event.head_commit.message, '#no-ci') }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: FedericoCarboni/setup-ffmpeg@v3 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.7" cache: "pip" From 2ecadb61c1d3bc008f9b542972b639a200d818c9 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Thu, 1 Feb 2024 12:27:52 -0500 Subject: [PATCH 4/6] test: quiet the test suites by quenching pointless DEBUG logs audio_utils.py: we only ever want to see errors from pydub, no debug info, so set the pydub.converter logger to WARNING. This mostly has the effect of quietting previously very verbose test suites, but in general we just don't want to see DEBUG and INFO from pydub, WARNING and ERROR messages are plenty. basic_test_case.py: don't start by setting the log level to DEBUG, and in tearDown(), when it's DEBUG reset it to INFO, so an individual case can trigger DEBUG, e.g., with --debug, and test the effect, without making the rest of the suite verbose. remaining wish item I can't solve: test_force_align.py calls align_audio with debug_aligner=True once, for coverage, but I cannot figure out how to capture the logs from SoundSwallower, its own C stdout/stderr do not seem to be redirectable from Python. --- readalongs/audio_utils.py | 4 ++++ test/basic_test_case.py | 8 +++++++- test/test_force_align.py | 4 ++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/readalongs/audio_utils.py b/readalongs/audio_utils.py index 7ba85f7c..ade658a3 100644 --- a/readalongs/audio_utils.py +++ b/readalongs/audio_utils.py @@ -4,12 +4,16 @@ in millisecond slices and lets us manipulate them as if they were simple lists. """ +import logging from typing import Union from pydub import AudioSegment from readalongs.log import LOGGER +# quiet pydub's logging +logging.getLogger("pydub.converter").setLevel(logging.WARNING) + def join_section(audio: AudioSegment, audio_to_insert: AudioSegment, start: int): """Given two AudioSegments, insert the second into the first at start (ms)""" diff --git a/test/basic_test_case.py b/test/basic_test_case.py index 5f10c27d..719feccc 100644 --- a/test/basic_test_case.py +++ b/test/basic_test_case.py @@ -1,5 +1,6 @@ """Common base class for the ReadAlongs test suites""" +import logging import tempfile from pathlib import Path from unittest import TestCase @@ -20,7 +21,6 @@ class BasicTestCase(TestCase): text_file = self.data_dir / "ej-fra.txt" """ - LOGGER.setLevel("DEBUG") data_dir = Path(__file__).parent / "data" # Set this to True to keep the temp dirs after running, for manual inspection @@ -59,3 +59,9 @@ def tearDown(self): """ if not self.keep_temp_dir_after_running: self.tempdirobj.cleanup() + + if LOGGER.level == logging.DEBUG: + # LOGGER.error("Logging level is DEBUG") + # Some test cases can set the logging level to DEBUG when they pass + # --debug to a CLI command, but don't let that affect subsequent tests. + LOGGER.setLevel(logging.INFO) diff --git a/test/test_force_align.py b/test/test_force_align.py index 43032357..d27e2d4c 100755 --- a/test/test_force_align.py +++ b/test/test_force_align.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ -Test force-alignment with SoundsSwallower FSG search from Python API +Test force-alignment with SoundSwallower FSG search from Python API """ import os @@ -26,7 +26,7 @@ class TestForceAlignment(BasicTestCase): - """Unit testing suite for forced-alignment with SoundsSwallower""" + """Unit testing suite for forced-alignment with SoundSwallower""" def test_align(self): """Basic alignment test case with XML input""" From 0268efb21f21646c3a0daade2514251e253e2f46 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Fri, 2 Feb 2024 12:07:58 -0500 Subject: [PATCH 5/6] ci: matrix test Python 3.12 too --- .github/workflows/matrix-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/matrix-tests.yml b/.github/workflows/matrix-tests.yml index c784b613..b5cdec1f 100644 --- a/.github/workflows/matrix-tests.yml +++ b/.github/workflows/matrix-tests.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 From e36e762ea177b5db9f35e78e364ab6b863c221ef Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Fri, 2 Feb 2024 14:34:01 -0500 Subject: [PATCH 6/6] docs: document how our builds on Heroku are controlled --- readme-heroku.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 readme-heroku.md diff --git a/readme-heroku.md b/readme-heroku.md new file mode 100644 index 00000000..06f56443 --- /dev/null +++ b/readme-heroku.md @@ -0,0 +1,17 @@ +Our production Heroku deployment is controlled by the following files: + - `Procfile`: tells Heroku what command to launch in each Dyno; + - `runtime.txt`: tells Heroku which run-time engine to use (i.e., which version of Python); + + Heroku detects Python by default, but `runtime.txt` lets us specify/bump the version as needed; + - `requirements.txt`: tells Heroku what our production dependencies are. + +Updating `g2p`: + - By default, `g2p` only gets updated for `readalong-studio` on Heroku when: + - we make a new release of `g2p` on PyPI + - we update `requirements.min.txt` here to ask for that release + - Manual override: it is also possible to update g2p to the current commit on the `main` branch using this procedure: + - Change `requirements.min.txt` to specify `g2p @ git+https://github.com/roedoejet/g2p@main`. + - Commit that change to `main` in a sandbox connected to Heroku. + - `git push heroku main` + - This will force a build which will fetch the main branch of g2p from GitHub. + - Subsequent builds reuse the cached g2p, so they'll reuse this one. Exception: if `runtime.txt` is updated, a fresh build is done and g2p will be reverted to the latest published version.