diff --git a/src/extractor/cli.py b/src/extractor/cli.py index 548459d..306fe30 100644 --- a/src/extractor/cli.py +++ b/src/extractor/cli.py @@ -29,11 +29,11 @@ def main() -> None: parser.add_argument("out_dir", help="Output directory", type=empty_directory) parser.add_argument( "--scrape-root", - '-S', + "-S", help="Root directory of an HTML scrape", type=directory, required=False, - default=None + default=None, ) parser.add_argument( "--json-prefix", diff --git a/src/extractor/extractors/posts.py b/src/extractor/extractors/posts.py index 78f6aea..a6bce9b 100644 --- a/src/extractor/extractors/posts.py +++ b/src/extractor/extractors/posts.py @@ -108,7 +108,9 @@ def load_posts( lambda link: load_scrape(scrape_urls_files, link) ) posts_df[["language", "translations"]] = posts_df.apply( - lambda r: extract_translations(r["scrape_bs"], r["link"], translation_pickers), + lambda r: extract_translations( + r["scrape_bs"], r["link"], translation_pickers + ), axis=1, ) else: diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py index 84cbf8c..9652f85 100644 --- a/tests/extract/test_extract.py +++ b/tests/extract/test_extract.py @@ -1,7 +1,6 @@ from pathlib import Path import pandas as pd - from extractor.extract import WPExtractor @@ -24,9 +23,7 @@ def _assert_output_valid(out_dir: Path): def test_extract_no_scrape(datadir): - extractor = WPExtractor( - json_root=datadir / "json", - ) + extractor = WPExtractor(json_root=datadir / "json") extractor.extract() _assert_extractor_valid(extractor) @@ -38,15 +35,12 @@ def test_extract_no_scrape(datadir): def test_extract_scrape(datadir): - extractor = WPExtractor( - json_root=datadir / "json", - scrape_root=datadir / "scrape" - ) + extractor = WPExtractor(json_root=datadir / "json", scrape_root=datadir / "scrape") extractor.extract() _assert_extractor_valid(extractor) - assert 'translations' in extractor.posts.columns + assert "translations" in extractor.posts.columns out_dir = datadir / "out_json" out_dir.mkdir()