From 44031c088a62d985170a9e603a4ea58c0309a2f3 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Wed, 14 Feb 2024 11:06:14 +0100 Subject: [PATCH 1/2] Modified dv-dataset-destroy-migration-placeholder so that it also recognizes easy-migration.zip as a placeholder file, and not only and easy-migration folder. --- src/datastation/dataverse/destroy_placeholder_dataset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/datastation/dataverse/destroy_placeholder_dataset.py b/src/datastation/dataverse/destroy_placeholder_dataset.py index 94f3f6e..79e294c 100644 --- a/src/datastation/dataverse/destroy_placeholder_dataset.py +++ b/src/datastation/dataverse/destroy_placeholder_dataset.py @@ -12,9 +12,9 @@ def matches(description): return matches - -def has_directory_label_different_from(file_metadata, dir_label): - return 'directoryLabel' not in file_metadata or file_metadata['directoryLabel'] != dir_label +def is_migration_file(file_metadata): + return ('directoryLabel' in file_metadata and file_metadata['directoryLabel'] == 'easy-migration') or \ + ('directoryLabel' not in file_metadata and file_metadata['label'] != 'easy-migration.zip') def destroy_placeholder_dataset(dataset_api: DatasetApi, description_text_pattern, csv_report: CsvReport, @@ -45,7 +45,7 @@ def destroy_placeholder_dataset(dataset_api: DatasetApi, description_text_patter messages.append(f"Found {len(files)} files <= 4: OK") non_easy_migration_files = list( - filter(lambda m: has_directory_label_different_from(m, 'easy-migration'), files)) + filter(lambda m: not is_migration_file(m), files)) logging.debug(f"Non easy-migration files: {non_easy_migration_files}") if len(non_easy_migration_files) > 0: From 3f68389ed9b0a8fc53d867a246f6c7182f1b2c3b Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Wed, 14 Feb 2024 12:05:13 +0100 Subject: [PATCH 2/2] - Implemented missing dry-run option for dv-dataset-destroy-migration-placeholder - Refactored filtering on datasets with only 'easy-migration' files + also allowing easy-migration.zip as 'easy-migration' file (formerly, easy-migration was expected to be a directory containing up to 4 files). --- pyproject.toml | 2 +- .../dataverse/destroy_placeholder_dataset.py | 5 +++-- ...v_dataset_destroy_migration_placeholder.py | 3 ++- src/tests/test_destroy_placeholder_dataset.py | 22 +++++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 src/tests/test_destroy_placeholder_dataset.py diff --git a/pyproject.toml b/pyproject.toml index 89e54c0..c910aea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dans-datastation-tools" -version = "0.38.0" +version = "0.39.0" description = "Command line utilities for Data Station application management" authors = ["DANS-KNAW"] packages = [ diff --git a/src/datastation/dataverse/destroy_placeholder_dataset.py b/src/datastation/dataverse/destroy_placeholder_dataset.py index 79e294c..60d4174 100644 --- a/src/datastation/dataverse/destroy_placeholder_dataset.py +++ b/src/datastation/dataverse/destroy_placeholder_dataset.py @@ -12,9 +12,10 @@ def matches(description): return matches + def is_migration_file(file_metadata): return ('directoryLabel' in file_metadata and file_metadata['directoryLabel'] == 'easy-migration') or \ - ('directoryLabel' not in file_metadata and file_metadata['label'] != 'easy-migration.zip') + ('directoryLabel' not in file_metadata and file_metadata['label'] == 'easy-migration.zip') def destroy_placeholder_dataset(dataset_api: DatasetApi, description_text_pattern, csv_report: CsvReport, @@ -50,7 +51,7 @@ def destroy_placeholder_dataset(dataset_api: DatasetApi, description_text_patter if len(non_easy_migration_files) > 0: blocker = True - messages.append(f"Files other than 'easy-migration' found: {len(non_easy_migration_files)}: BLOCKER") + messages.append(f"Files other than 'easy-migration/*' or 'easy-migration.zip' found: {len(non_easy_migration_files)}: BLOCKER") else: messages.append("Only found easy-migration files: OK") diff --git a/src/datastation/dv_dataset_destroy_migration_placeholder.py b/src/datastation/dv_dataset_destroy_migration_placeholder.py index 1121e6b..3a7a76e 100644 --- a/src/datastation/dv_dataset_destroy_migration_placeholder.py +++ b/src/datastation/dv_dataset_destroy_migration_placeholder.py @@ -28,7 +28,8 @@ def main(): batch_processor.process_pids(pids, callback=lambda pid, csv_report: destroy_placeholder_dataset(dataverse.dataset(pid), description_text_pattern, - csv_report)) + csv_report, + dry_run=args.dry_run)) if __name__ == '__main__': diff --git a/src/tests/test_destroy_placeholder_dataset.py b/src/tests/test_destroy_placeholder_dataset.py new file mode 100644 index 0000000..d321d49 --- /dev/null +++ b/src/tests/test_destroy_placeholder_dataset.py @@ -0,0 +1,22 @@ +import datastation.dataverse.destroy_placeholder_dataset + + +def test_is_migration_file_returns_true_for_file_with_label_easy_migration_dot_zip(): + file_metadata = {'label': 'easy-migration.zip'} + assert datastation.dataverse.destroy_placeholder_dataset.is_migration_file(file_metadata) == True + + +def test_is_migration_file_returns_true_for_file_with_directory_label_easy_migration(): + file_metadata = {'directoryLabel': 'easy-migration'} + assert datastation.dataverse.destroy_placeholder_dataset.is_migration_file(file_metadata) == True + + +def test_is_migration_file_returns_false_for_file_with_label_not_easy_migration_dot_zip(): + file_metadata = {'label': 'not-easy-migration.zip'} + assert datastation.dataverse.destroy_placeholder_dataset.is_migration_file(file_metadata) == False + + +def test_is_migration_file_returns_false_for_file_with_directory_label_not_easy_migration(): + file_metadata = {'directoryLabel': 'not-easy-migration'} + assert datastation.dataverse.destroy_placeholder_dataset.is_migration_file(file_metadata) == False +