Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/prefix consistency #21

Merged
merged 3 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/usage/download.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ $ wpextract dl target out_json
`out_json`
: Directory to output to

**optional arguments**
`--json-prefix JSON_PREFIX`
: Output files with a prefix, e.g. supplying _20240101-example_ will output posts to `out_dir/20240101-example-posts.json`

**skip data**

`--no-categories` `--no-media` `--no-pages` `--no-posts` `--no-tags` `--no-users`
Expand Down
14 changes: 12 additions & 2 deletions src/wpextract/cli/_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from wpextract.cli._shared import _register_shared
from wpextract.dl.downloader import WPDownloader
from wpextract.dl.requestsession import RequestSession
from wpextract.util.args import directory

dl_types = ["categories", "media", "pages", "posts", "tags", "users"]

Expand All @@ -19,7 +20,7 @@ def register_dl_parser(subparsers):
)
parser_dl.add_argument(
"out_json",
type=str,
type=directory,
help="the path of the output JSON file",
)
parser_dl.add_argument(
Expand All @@ -28,6 +29,14 @@ def register_dl_parser(subparsers):
default=None,
help="Path to download media files, skipped if not supplied.",
)
parser_dl.add_argument(
"--json-prefix",
"-P",
help="Prefix to the JSON files",
type=str,
required=False,
default=None,
)

_register_shared(parser_dl)

Expand All @@ -42,7 +51,7 @@ def register_dl_parser(subparsers):
parser_dl.set_defaults(**{dl_type: True for dl_type in dl_types})

auth_group = parser_dl.add_argument_group("authentication")
auth_group.add_argument("--proxy", "-P", help="Define a proxy server to use")
auth_group.add_argument("--proxy", help="Define a proxy server to use")
auth_group.add_argument(
"--auth", help="Define HTTP Basic credentials in format username:password"
)
Expand Down Expand Up @@ -129,6 +138,7 @@ def do_dl(parser, args: Namespace):
out_path=args.out_json,
data_types=types_to_dl,
session=session,
json_prefix=args.json_prefix,
)

downloader.download()
Expand Down
19 changes: 13 additions & 6 deletions src/wpextract/dl/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
out_path: Path,
data_types: List[str],
session: Optional[RequestSession] = None,
json_prefix: Optional[str] = None,
):
"""Initializes the WPDownloader object.

Expand All @@ -25,13 +26,15 @@ def __init__(
out_path: the output path for the downloaded data
data_types: set of data types to download
session : request session. Will be created from default constructor if not provided.
json_prefix: prefix to prepend to JSON file names
"""
self.target = target
self.out_path = out_path
self.data_types = data_types
self.session = session if session else RequestSession()
self._test_session()
self.scanner = WPApi(self.target, session=self.session)
self.json_prefix = json_prefix

def _test_session(self):
try:
Expand All @@ -40,7 +43,6 @@ def _test_session(self):
except Exception as e:
logging.error("Failed to connect to the server")
raise e
exit(0)

def download(self):
"""Download and export the requested data lists."""
Expand Down Expand Up @@ -131,7 +133,8 @@ def _list_obj(self, obj_type, start=None, limit=None, cache=True):
WPDownloader.export_decorator(
export_func=prop["export_func"],
export_str=prop["obj_name"].lower(),
json=self.out_path,
json_path=self.out_path,
json_prefix=self.json_prefix,
values=obj_list,
)
except WordPressApiNotV2:
Expand All @@ -142,9 +145,13 @@ def _list_obj(self, obj_type, start=None, limit=None, cache=True):

@staticmethod
def export_decorator( # noqa: D102
export_func, export_str, json, values, kwargs=None
export_func, export_str, json_path: Path, json_prefix: str, values, kwargs=None
):
kwargs = kwargs or {}
if json is not None:
json_file = json + "-" + export_str
export_func(values, Exporter.JSON, json_file, **kwargs)

filename = export_str + ".json"
if json_prefix is not None:
filename = json_prefix + "-" + filename

json_file = json_path / filename
export_func(values, Exporter.JSON, json_file, **kwargs)
24 changes: 0 additions & 24 deletions src/wpextract/dl/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,6 @@ def setup_export(vlist, parameters_to_unescape):

return exported_list

@staticmethod
def prepare_filename(filename, fmt):
"""Returns a filename with the proper extension according to the given format

Args:
filename: the filename to clean
fmt: the file format

Returns:
the cleaned filename
"""
if filename[-5:] != ".json" and fmt == Exporter.JSON:
filename += ".json"
return filename

@staticmethod
def write_file(filename, fmt, data):
"""Writes content to the given file using the given format.
Expand Down Expand Up @@ -200,7 +185,6 @@ def export_posts(
[["title", "rendered"], ["content", "rendered"], ["excerpt", "rendered"]],
)

filename = Exporter.prepare_filename(filename, fmt)
Exporter.write_file(filename, fmt, exported_posts)
return len(exported_posts)

Expand All @@ -221,7 +205,6 @@ def export_categories(categories, fmt, filename):
[],
)

filename = Exporter.prepare_filename(filename, fmt)
Exporter.write_file(filename, fmt, exported_categories)
return len(exported_categories)

Expand All @@ -237,8 +220,6 @@ def export_tags(tags, fmt, filename):
Returns:
the length of the list written to the file
"""
filename = Exporter.prepare_filename(filename, fmt)

exported_tags = tags # It seems that no modification will be done for this one, so no deepcopy
Exporter.write_file(filename, fmt, exported_tags)
return len(exported_tags)
Expand All @@ -255,8 +236,6 @@ def export_users(users, fmt, filename):
Returns:
the length of the list written to the file
"""
filename = Exporter.prepare_filename(filename, fmt)

exported_users = users # It seems that no modification will be done for this one, so no deepcopy
Exporter.write_file(filename, fmt, exported_users)
return len(exported_users)
Expand Down Expand Up @@ -285,7 +264,6 @@ def export_pages(pages, fmt, filename, parent_pages=None, users=None):
],
)

filename = Exporter.prepare_filename(filename, fmt)
Exporter.write_file(filename, fmt, exported_pages)
return len(exported_pages)

Expand All @@ -312,7 +290,6 @@ def export_media(media, fmt, filename, users=None):
],
)

filename = Exporter.prepare_filename(filename, fmt)
Exporter.write_file(filename, fmt, exported_media)
return len(exported_media)

Expand Down Expand Up @@ -354,6 +331,5 @@ def export_comments_interactive(
[["content", "rendered"]],
)

filename = Exporter.prepare_filename(filename, fmt)
Exporter.write_file(filename, fmt, exported_comments)
return len(exported_comments)