-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Restore-datadir #238
base: main
Are you sure you want to change the base?
Restore-datadir #238
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,20 +13,23 @@ | |
|
||
|
||
def backup_data_directory( | ||
timestamp: Optional[bool] = True, dir_backup: Optional[Union[str, Path]] = None | ||
dir_backup: Optional[Union[str, Path]] = None, | ||
timestamp: Optional[bool] = True, | ||
file_suffix: Optional[str] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is the right name. The initial understanding of this would be the last part of the name (e.g. |
||
): | ||
""" | ||
Backup the Brightway2 data directory to a `.tar.gz` (compressed tar archive) in a specified directory, or in the user's home directory by default. | ||
|
||
The file name is of the form "brightway2-data-backup.{timestamp}.tar.gz", unless `timestamp` is False, in which case the file name is "brightway2-data-backup.tar.gz". | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of extra filename parts, why not allow users to set the filename itself? This seems more reasonable than "brightway2-data-backup{file_suffix}{timestamp}.tar.gz", which is a mouthful. |
||
The file name is of the form "brightway2-data-backup{file_suffix}{timestamp}.tar.gz", unless `timestamp` is False, in which case the file name is "brightway2-data-backup{file_suffix}.tar.gz". | ||
|
||
Parameters | ||
---------- | ||
timestamp : bool, optional | ||
If True, append a timestamp to the backup file name. | ||
|
||
dir_backup : str, Path, optional | ||
Directory to backup. If None, use the user's home directory. | ||
timestamp : bool, optional | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Give a spec (e.g. ISO 8601) or example of the timestamp |
||
If True, append a timestamp to the backup file name. | ||
file_suffix : str, optional | ||
A suffix to append to the file name. | ||
|
||
Raises | ||
------ | ||
|
@@ -41,12 +44,16 @@ def backup_data_directory( | |
>>> bw2io.bw2setup() | ||
>>> bw2io.backup.backup_data_directory() | ||
Creating backup archive - this could take a few minutes... | ||
|
||
See Also | ||
-------- | ||
bw2io.backup.restore_data_directory: To restore a data directory from a backup. | ||
""" | ||
|
||
dir_backup = Path(dir_backup or Path.home()) | ||
|
||
# Check if the backup directory exists and is writable | ||
if not dir_backup.exists(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice catch |
||
if not dir_backup.is_dir(): | ||
raise FileNotFoundError(f"The directory {dir_backup} does not exist.") | ||
if not os.access(dir_backup, os.W_OK): | ||
raise PermissionError(f"The directory {dir_backup} is not writable.") | ||
|
@@ -55,40 +62,155 @@ def backup_data_directory( | |
timestamp_str = ( | ||
f'.{datetime.datetime.now().strftime("%d-%B-%Y-%I-%M%p")}' if timestamp else "" | ||
) | ||
backup_filename = f"brightway2-data-backup{timestamp_str}.tar.gz" | ||
backup_filename = f"brightway2-data-backup{file_suffix}{timestamp_str}.tar.gz" | ||
fp = dir_backup / backup_filename | ||
|
||
# Create the backup archive | ||
print("Creating backup archive of data directory - this could take a few minutes...") | ||
print( | ||
"Creating backup archive of data directory - this could take a few minutes..." | ||
) | ||
with tarfile.open(fp, "w:gz") as tar: | ||
data_directory = Path(projects._base_data_dir) | ||
tar.add(data_directory, arcname=data_directory.name) | ||
|
||
print(f"Saved to: {fp}") | ||
|
||
return fp | ||
|
||
|
||
def restore_data_directory( | ||
fp: Union[str, Path], | ||
new_data_dir: Optional[Union[str, Path]] = None, | ||
): | ||
""" | ||
Restores a brightway data directory from a tar.gz file. If the data directory already exists, you must confirm that you want to delete it. | ||
If ``new_data_dir`` is specified, the data directory will be restored to that location. Otherwise, the data directory will be restored to the default location. e.g: ``~/.local/share/brightway3`` on Linux. | ||
|
||
Parameters | ||
---------- | ||
fp (Union[str, Path]): | ||
The file path of the tar.gz file. | ||
new_data_dir (Optional[Union[str, Path]]): | ||
The new data directory path. | ||
|
||
Raises | ||
------ | ||
ValueError: | ||
If the file doesn't exist | ||
PermissionError: | ||
If the data directory is not writable. | ||
Exception: | ||
If there's an attempted path traversal in the tar file. | ||
|
||
Returns | ||
------- | ||
data_dir (Path): | ||
The path of the restored data directory. | ||
|
||
See Also | ||
-------- | ||
bw2io.backup.backup_data_directory: To backup the data directory. | ||
""" | ||
|
||
# check if file exists | ||
fp = Path(fp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we are doing this, then I don't see any reason to allow |
||
if not fp.is_file(): | ||
raise ValueError(f"Can't find file at path: {fp}") | ||
|
||
# if new_data_dir, set the environment variable (this will not be persistent, I only know how to do that on Linux or in a venv...) | ||
if new_data_dir: | ||
os.environ["BRIGHTWAY2_DIR"] = str(new_data_dir) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this will work. If I understand correctly, the idea is that after importing the data directory, you can use it from Instead, I think you should add an input argument to this function: |
||
data_dir = Path(new_data_dir) | ||
else: | ||
data_dir = Path(projects._base_data_dir) | ||
|
||
# Confirm that the user wants to overwrite the data directory, if it exists | ||
if data_dir.is_dir(): | ||
confirm_overwrite = input( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, no user input allowed. Make it an input argument with a scary name and default |
||
f"This will overwrite your existing brightway data directory '{data_dir}'.\nAre you really, really sure that you want to do that? (y/n) " | ||
) | ||
if confirm_overwrite.lower() == "y": | ||
shutil.rmtree(data_dir) | ||
else: | ||
print("Aborting...") | ||
return | ||
|
||
# Extract the tar file | ||
print( | ||
"Restoring brightway data directory from backup archive - this could take a few minutes..." | ||
) | ||
with tempfile.TemporaryDirectory() as td: | ||
with tarfile.open(fp, "r:gz") as tar: | ||
|
||
def is_within_directory(directory, target): | ||
abs_directory = os.path.abspath(directory) | ||
abs_target = os.path.abspath(target) | ||
|
||
prefix = os.path.commonprefix([abs_directory, abs_target]) | ||
|
||
return prefix == abs_directory | ||
|
||
def safe_extract(tar, path=".", members=None, *, numeric_owner=False): | ||
for member in tar.getmembers(): | ||
member_path = os.path.join(path, member.name) | ||
if not is_within_directory(path, member_path): | ||
raise Exception("Attempted Path Traversal in Tar File") | ||
|
||
tar.extractall(path, members, numeric_owner=numeric_owner) | ||
|
||
safe_extract(tar, td) | ||
|
||
# Find single extracted directory; don't know it ahead of time | ||
extracted_dir = [ | ||
(Path(td) / dirname) | ||
for dirname in Path(td).iterdir() | ||
if (Path(td) / dirname).is_dir() | ||
] | ||
extracted_path = extracted_dir[0] | ||
shutil.copytree(extracted_path, data_dir) | ||
|
||
print(f"Restored brightway data to directory: {data_dir}") | ||
|
||
# this block is maybe totally the wrong way to do it --> could be done better or removed | ||
if data_dir != Path(projects._base_data_dir): | ||
projects.change_base_directories(data_dir) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably better to give a reference to https://docs.brightway.dev/en/latest/content/faq/data_management.html#how-do-i-change-my-data-directory |
||
print( | ||
f""" | ||
Environmental variable 'BRIGHTWAY2_DIR' now set to {projects._base_data_dir} | ||
To use this data directory in future sessions, you must configure it. | ||
For example: | ||
\t * in a python script, use | ||
\t `os.environ['BRIGHTWAY2_DIR'] = <path to data directory> before importing bw2data, | ||
or `projects.change_base_directories(data_dir)`, after importing bw2data. | ||
\t * or in the terminal: `export BRIGHTWAY2_DIR=<path to data directory>` | ||
\t (add this to your venv activate script to make it persistent) | ||
""" | ||
) | ||
|
||
return data_dir | ||
|
||
|
||
def backup_project_directory( | ||
project: str, | ||
timestamp: Optional[bool] = True, | ||
dir_backup: Optional[Union[str, Path]] = None, | ||
timestamp: Optional[bool] = True, | ||
file_suffix: Optional[str] = None, | ||
): | ||
""" | ||
Backup project data directory to a ``.tar.gz`` (compressed tar archive) in the user's home directory, or a directory specified by ``dir_backup``. | ||
|
||
File name is of the form ``brightway2-project-{project}-backup{timestamp}.tar.gz``, unless ``timestamp`` is False, in which case the file name is ``brightway2-project-{project}-backup.tar.gz``. | ||
File name is of the form ``brightway2-project-{project}-backup{file_suffix}{timestamp}.tar.gz``, unless ``timestamp`` is False, in which case the file name is ``brightway2-project-{project}-backup{file_suffix}.tar.gz``. | ||
|
||
Parameters | ||
---------- | ||
project : str | ||
Name of the project to backup. | ||
|
||
timestamp : bool, optional | ||
If True, append a timestamp to the backup file name. | ||
|
||
dir_backup : str, Path, optional | ||
Directory to backup. If None, use the default (home)). | ||
file_suffix : str, optional | ||
A suffix to append to the file name. | ||
|
||
Returns | ||
------- | ||
|
@@ -98,7 +220,7 @@ def backup_project_directory( | |
Raises | ||
------ | ||
ValueError | ||
If the project does not exist. | ||
If the project does not exist. | ||
FileNotFoundError | ||
If the backup directory does not exist. | ||
PermissionError | ||
|
@@ -120,8 +242,12 @@ def backup_project_directory( | |
if not os.access(dir_backup, os.W_OK): | ||
raise PermissionError(f"The directory {dir_backup} is not writable.") | ||
|
||
timestamp_str = datetime.datetime.now().strftime("%d-%B-%Y-%I-%M%p") if timestamp else "" | ||
backup_filename = f"brightway2-project-{project}-backup{timestamp_str}.tar.gz" | ||
timestamp_str = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Second time - factor this out into a standalone function. |
||
datetime.datetime.now().strftime("%d-%B-%Y-%I-%M%p") if timestamp else "" | ||
) | ||
backup_filename = ( | ||
f"brightway2-project-{project}-backup{file_suffix}{timestamp_str}.tar.gz" | ||
) | ||
fp = dir_backup / backup_filename | ||
|
||
dir_path = Path(projects._base_data_dir) / safe_filename(project) | ||
|
@@ -171,8 +297,8 @@ def restore_project_directory( | |
-------- | ||
bw2io.backup.backup_project_directory: To backup a project directory. | ||
""" | ||
fp_path = Path(fp) | ||
if not fp_path.is_file(): | ||
fp = Path(fp) | ||
if not fp.is_file(): | ||
raise ValueError(f"Can't find file at path: {fp}") | ||
|
||
def get_project_name(fp): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't love changing argument order, but we haven't released with this yet and the new order is more logical, so OK.