Skip to content

Commit

Permalink
Feat: describe enhancement or feature (Issue #41) (#106)
Browse files Browse the repository at this point in the history
* Add dry run argument

* add dry run arg to config object

* Add dry run functionality

* lint

* lint

* lint
  • Loading branch information
zoidy authored and HafeezOJ committed Oct 3, 2024
1 parent 823efc0 commit df8c07c
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 17 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ ReBACH is run via the command line as outlined in the 'How to Run' section of th
- user - required: Your user email address on AP Trust
- token - required: Your user secret token on AP Trust
- items_per_page - Maximum number of object to be return per page by the API
- alt_identifier_starts_with - Prefix for alternate identifier in AP Trust
- alt_identifier_starts_with - Prefix for alternate identifier in AP Trust
- retries - required: Number of times the script should retry API or file system calls if it is unable to connect. Defaults to 3
- retries_wait - required: Number of seconds the script should wait between call retries if it is unable to connect. Defaults to 10
- preservation_storage_location - required: The file system location where the preservation folders/packages should be created
Expand All @@ -54,6 +54,7 @@ These parameters are only available on the command line.
|`--xfg` | The path to the configuration file to use.|
|`--ids` | A comma-separated list of article IDs to process. E.g., 12345,12356|
|`--continue-on-error`| If there is an error during the item processing stage for a given item, skip it and continue to the next item.|
|`--dry-run` | Runs all operations, excluding any that involve writing any storage medium |

## Execution notes
- ReBACH will attempt to fetch all items in the institutional instance. Items that are not published (curation_status != 'approved') will be ignored.
Expand Down
3 changes: 3 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def get_args():
help='list of article and/or collection IDs to process. E.g., "2323,4353,5454"')
parser.add_argument('--continue-on-error', action='store_true',
help='If an item encounters an error during the processing stage, continue to the next item.')
parser.add_argument('--dry-run', action='store_true',
help='Fetch, match and verify items only. Do not download, delete, or upload to preservation any files.')
args = parser.parse_args()


Expand Down Expand Up @@ -72,6 +74,7 @@ def main():
config_obj = Config(env_file)

config_obj.add_setting(name='continue-on-error', value=args.continue_on_error)
config_obj.add_setting(name='dry-run', value=args.dry_run)

figshare_config = config_obj.figshare_config()
system_config = config_obj.system_config()
Expand Down
47 changes: 36 additions & 11 deletions figshare/Article.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,10 @@ def __check_file_hash(self, files, version_data, folder_path):
# delete directory if validation failed.
if (delete_folder is True):
self.logs.write_log_in_file("error", f"Validation failed, deleting {preservation_storage_location + folder_path}.", True)
self.delete_folder(preservation_storage_location + folder_path)
if self.system_config['dry-run'] == 'False':
self.delete_folder(preservation_storage_location + folder_path)
else:
self.logs.write_log_in_file("info", "*Dry Run* Folder not deleted.", True)
process_article = True

return process_article
Expand Down Expand Up @@ -1024,8 +1027,14 @@ def process_articles(self, articles):

if (version_data["matched"] is True):
self.logs.write_log_in_file("info", f"------- Processing article {article} version {version_data['version']}.", True)

# call pre process script function for each matched item.
value_pre_process = self.pre_process_script_function()
if self.system_config['dry-run'] == 'False':
value_pre_process = self.pre_process_script_function()
else:
value_pre_process = 0
self.logs.write_log_in_file("info", "*Dry Run* Skipping pre processing.", True)

if (value_pre_process == 0):
self.logs.write_log_in_file("info", "Pre-processing script finished successfully.", True)
# check main folder exists in preservation storage.
Expand All @@ -1042,24 +1051,40 @@ def process_articles(self, articles):
else:
self.logs.write_log_in_file("info", "Exists and is empty", True)
check_files = False
# delete folder if validation fails
self.delete_folder(check_dir)
# call post process script function for each matched item. Code 5 corresponds to step 5 of S4.4 in the spec.
value_post_process = self.processor.post_process_script_function("Article", check_dir, value_pre_process, 5)
if (value_post_process != 0):
self.logs.write_log_in_file("error", f"{version_data['id']} version {version_data['version']} - "
+ "Post-processing script error found.", True)

if self.system_config['dry-run'] == 'False':
# delete folder if validation fails
self.delete_folder(check_dir)
# call post process script function for each matched item. Code 5 corresponds to step 5 of S4.4 in the spec.
value_post_process = self.processor.post_process_script_function("Article", check_dir, value_pre_process, 5)
if (value_post_process != 0):
self.logs.write_log_in_file("error", f"{version_data['id']} version {version_data['version']} - "
+ "Post-processing script error found.", True)
else:
self.logs.write_log_in_file("info", "*Dry Run* File download and post-processing with "
+ f"{self.system_config['post_process_script_command']} skipped.", True)

break
else:
self.logs.write_log_in_file("info", "Does not exist. Folder will be created", True)
value_post_process = 0
if self.system_config['dry-run'] == 'False':
self.logs.write_log_in_file("info", "Does not exist. Folder will be created", True)
else:
self.logs.write_log_in_file("info", "*Dru Run* Does not exist. Folder will not be created", True)

# end check main folder exists in preservation storage.
# check required files exist in curation UAL_RDM folder
self.logs.write_log_in_file("info", "Checking required files exist in associated curation "
+ f"folder {curation_storage_location}.", True)
copy_files = self.__can_copy_files(version_data)
if self.__final_process(check_files, copy_files, check_dir, version_data, folder_name, version_no, value_pre_process):

if self.system_config['dry-run'] == 'False':
if self.__final_process(check_files, copy_files, check_dir, version_data, folder_name, version_no, value_pre_process):
processed_count += 1
else:
processed_count += 1
self.logs.write_log_in_file("info", "*Dry Run* File download and post-processing with "
+ f"{self.system_config['post_process_script_command']} skipped.", True)
else:
self.logs.write_log_in_file("error", "Pre-processing script failed. Running post-processing script.", True)
# call post process script function for each matched item.
Expand Down
18 changes: 13 additions & 5 deletions figshare/Collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,21 @@ def process_collections(self, collections):
version["license"] = json.loads('{"value": 2,"name": "CC0","url": "https://creativecommons.org/publicdomain/zero/1.0/"}')

self.logs.write_log_in_file("info", f"------- Processing collection {collection} version {version['version']}.", True)
self.__save_json_in_metadata(collection, version, folder_name)
collection_preservation_path = self.preservation_storage_location + os.path.basename(os.path.dirname(os.path.dirname(folder_name)))
value_post_process = self.processor.post_process_script_function("Collection", collection_preservation_path)
if (value_post_process != 0):
self.logs.write_log_in_file("error", f"collection {collection} - post-processing script failed.", True)

if self.system_config['dry-run'] == 'False':
self.__save_json_in_metadata(collection, version, folder_name)
collection_preservation_path = self.preservation_storage_location + \
os.path.basename(os.path.dirname(os.path.dirname(folder_name)))
value_post_process = self.processor.post_process_script_function("Collection", collection_preservation_path)
if (value_post_process != 0):
self.logs.write_log_in_file("error", f"collection {collection} - post-processing script failed.", True)
else:
processed_count += 1
else:
self.logs.write_log_in_file("info", "*Dry Run* File download and post-processing with "
+ f"{self.system_config['post_process_script_command']} skipped.", True)
processed_count += 1

return processed_count, self.already_preserved_counts_dict

"""
Expand Down

0 comments on commit df8c07c

Please sign in to comment.