diff --git a/docs/usage/download.md b/docs/usage/download.md index 5ebc676..23ae03b 100644 --- a/docs/usage/download.md +++ b/docs/usage/download.md @@ -14,6 +14,10 @@ $ wpextract dl target out_json `out_json` : Directory to output to +**optional arguments** +`--json-prefix JSON_PREFIX` +: Output files with a prefix, e.g. supplying _20240101-example_ will output posts to `out_dir/20240101-example-posts.json` + **skip data** `--no-categories` `--no-media` `--no-pages` `--no-posts` `--no-tags` `--no-users` diff --git a/src/wpextract/cli/_dl.py b/src/wpextract/cli/_dl.py index 5faeb0a..c2b34fe 100644 --- a/src/wpextract/cli/_dl.py +++ b/src/wpextract/cli/_dl.py @@ -3,6 +3,7 @@ from wpextract.cli._shared import _register_shared from wpextract.dl.downloader import WPDownloader from wpextract.dl.requestsession import RequestSession +from wpextract.util.args import directory dl_types = ["categories", "media", "pages", "posts", "tags", "users"] @@ -19,7 +20,7 @@ def register_dl_parser(subparsers): ) parser_dl.add_argument( "out_json", - type=str, + type=directory, help="the path of the output JSON file", ) parser_dl.add_argument( @@ -28,6 +29,14 @@ def register_dl_parser(subparsers): default=None, help="Path to download media files, skipped if not supplied.", ) + parser_dl.add_argument( + "--json-prefix", + "-P", + help="Prefix to the JSON files", + type=str, + required=False, + default=None, + ) _register_shared(parser_dl) @@ -42,7 +51,7 @@ def register_dl_parser(subparsers): parser_dl.set_defaults(**{dl_type: True for dl_type in dl_types}) auth_group = parser_dl.add_argument_group("authentication") - auth_group.add_argument("--proxy", "-P", help="Define a proxy server to use") + auth_group.add_argument("--proxy", help="Define a proxy server to use") auth_group.add_argument( "--auth", help="Define HTTP Basic credentials in format username:password" ) @@ -129,6 +138,7 @@ def do_dl(parser, args: Namespace): out_path=args.out_json, data_types=types_to_dl, session=session, + json_prefix=args.json_prefix, ) downloader.download() diff --git a/src/wpextract/dl/downloader.py b/src/wpextract/dl/downloader.py index 206d6e5..76d66d7 100644 --- a/src/wpextract/dl/downloader.py +++ b/src/wpextract/dl/downloader.py @@ -17,6 +17,7 @@ def __init__( out_path: Path, data_types: List[str], session: Optional[RequestSession] = None, + json_prefix: Optional[str] = None, ): """Initializes the WPDownloader object. @@ -25,6 +26,7 @@ def __init__( out_path: the output path for the downloaded data data_types: set of data types to download session : request session. Will be created from default constructor if not provided. + json_prefix: prefix to prepend to JSON file names """ self.target = target self.out_path = out_path @@ -32,6 +34,7 @@ def __init__( self.session = session if session else RequestSession() self._test_session() self.scanner = WPApi(self.target, session=self.session) + self.json_prefix = json_prefix def _test_session(self): try: @@ -40,7 +43,6 @@ def _test_session(self): except Exception as e: logging.error("Failed to connect to the server") raise e - exit(0) def download(self): """Download and export the requested data lists.""" @@ -131,7 +133,8 @@ def _list_obj(self, obj_type, start=None, limit=None, cache=True): WPDownloader.export_decorator( export_func=prop["export_func"], export_str=prop["obj_name"].lower(), - json=self.out_path, + json_path=self.out_path, + json_prefix=self.json_prefix, values=obj_list, ) except WordPressApiNotV2: @@ -142,9 +145,13 @@ def _list_obj(self, obj_type, start=None, limit=None, cache=True): @staticmethod def export_decorator( # noqa: D102 - export_func, export_str, json, values, kwargs=None + export_func, export_str, json_path: Path, json_prefix: str, values, kwargs=None ): kwargs = kwargs or {} - if json is not None: - json_file = json + "-" + export_str - export_func(values, Exporter.JSON, json_file, **kwargs) + + filename = export_str + ".json" + if json_prefix is not None: + filename = json_prefix + "-" + filename + + json_file = json_path / filename + export_func(values, Exporter.JSON, json_file, **kwargs) diff --git a/src/wpextract/dl/exporter.py b/src/wpextract/dl/exporter.py index 7a44dbf..79a0535 100644 --- a/src/wpextract/dl/exporter.py +++ b/src/wpextract/dl/exporter.py @@ -146,21 +146,6 @@ def setup_export(vlist, parameters_to_unescape): return exported_list - @staticmethod - def prepare_filename(filename, fmt): - """Returns a filename with the proper extension according to the given format - - Args: - filename: the filename to clean - fmt: the file format - - Returns: - the cleaned filename - """ - if filename[-5:] != ".json" and fmt == Exporter.JSON: - filename += ".json" - return filename - @staticmethod def write_file(filename, fmt, data): """Writes content to the given file using the given format. @@ -200,7 +185,6 @@ def export_posts( [["title", "rendered"], ["content", "rendered"], ["excerpt", "rendered"]], ) - filename = Exporter.prepare_filename(filename, fmt) Exporter.write_file(filename, fmt, exported_posts) return len(exported_posts) @@ -221,7 +205,6 @@ def export_categories(categories, fmt, filename): [], ) - filename = Exporter.prepare_filename(filename, fmt) Exporter.write_file(filename, fmt, exported_categories) return len(exported_categories) @@ -237,8 +220,6 @@ def export_tags(tags, fmt, filename): Returns: the length of the list written to the file """ - filename = Exporter.prepare_filename(filename, fmt) - exported_tags = tags # It seems that no modification will be done for this one, so no deepcopy Exporter.write_file(filename, fmt, exported_tags) return len(exported_tags) @@ -255,8 +236,6 @@ def export_users(users, fmt, filename): Returns: the length of the list written to the file """ - filename = Exporter.prepare_filename(filename, fmt) - exported_users = users # It seems that no modification will be done for this one, so no deepcopy Exporter.write_file(filename, fmt, exported_users) return len(exported_users) @@ -285,7 +264,6 @@ def export_pages(pages, fmt, filename, parent_pages=None, users=None): ], ) - filename = Exporter.prepare_filename(filename, fmt) Exporter.write_file(filename, fmt, exported_pages) return len(exported_pages) @@ -312,7 +290,6 @@ def export_media(media, fmt, filename, users=None): ], ) - filename = Exporter.prepare_filename(filename, fmt) Exporter.write_file(filename, fmt, exported_media) return len(exported_media) @@ -354,6 +331,5 @@ def export_comments_interactive( [["content", "rendered"]], ) - filename = Exporter.prepare_filename(filename, fmt) Exporter.write_file(filename, fmt, exported_comments) return len(exported_comments)