Skip to content

Commit

Permalink
improve subparser documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
freddyheppell committed Jul 3, 2024
1 parent efe2404 commit 8c4af22
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 5 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,9 @@ For each resolved link, translation, or media, a destination is set containing i
### 5. Export

The columns of each type are subset and exported as a JSON file each.

## Acknowledgements and License

This software is made available under the terms of the [Apache License version 2.0](LICENSE).

A portion of this software (contained within the `extractor.dl` module) is adapted from [WPJsonScraper](https://github.com/MickaelWalter/wp-json-scraper) by Mickael Walter, made available under the terms of the [MIT license](src/extractor/dl/LICENSE.txt).
2 changes: 1 addition & 1 deletion src/extractor/cli/_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

def register_dl_parser(subparsers):
"""Register the `dl` subcommand."""
parser_dl = subparsers.add_parser("dl")
parser_dl = subparsers.add_parser("dl", help="Download a site's content using the WordPress REST API.")
parser_dl.add_argument(
"target",
type=str,
Expand Down
2 changes: 1 addition & 1 deletion src/extractor/cli/_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def register_extract_parser(subparsers):
"""Register the `extract` subcommand."""
parser_extract = subparsers.add_parser("extract")
parser_extract = subparsers.add_parser("extract", help="Convert the downloaded data files into a dataset.")

parser_extract.add_argument(
"json_root", help="JSON dump of the site", type=directory
Expand Down
10 changes: 7 additions & 3 deletions src/extractor/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from tqdm.auto import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

from importlib.metadata import version
from extractor.cli._dl import do_dl, register_dl_parser
from extractor.cli._extract import do_extract, register_extract_parser

Expand All @@ -16,14 +16,18 @@ def _exec_command(args):
else:
raise ValueError("Unknown command")

def _get_version():
return version("wp-site-extractor")

def main() -> None:
"""Entrypoint for CLI."""
parser = argparse.ArgumentParser(
prog="wordpress-site-extractor",
description="Extracts posts from wordpress dump",
description="Create datasets from WordPress sites using the REST API",
)

parser.add_argument('--version', action='version', version="%(prog)s " + _get_version())

parser.add_argument(
"--log",
"-l",
Expand All @@ -40,7 +44,7 @@ def main() -> None:
)

subparsers = parser.add_subparsers(
dest="command", required=True, title="subcommands"
dest="command", required=True, title="commands",
)

register_extract_parser(subparsers)
Expand Down
1 change: 1 addition & 0 deletions src/extractor/dl/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def download_media(media, output_folder):
"""
files_number = 0
for m in tqdm(media, unit="media"):
# TODO: make this use the same session as the initial download
r = requests.get(m, stream=True)
if r.status_code == 200:
http_path = urlparse.urlparse(m).path.split("/")
Expand Down

0 comments on commit 8c4af22

Please sign in to comment.