diff --git a/src/nautilus_librarian/domain/file_locator.py b/src/nautilus_librarian/domain/file_locator.py index 30d88df..692e49b 100644 --- a/src/nautilus_librarian/domain/file_locator.py +++ b/src/nautilus_librarian/domain/file_locator.py @@ -10,7 +10,13 @@ class FileNotFoundException(Exception): class BaseImageNotFoundError(FileNotFoundError): - """Raised when a base image does not exist""" + """Raised when a Base image does not exist""" + + pass + + +class ExpectedGoldImageError(Exception): + """Raised when a Gold image is expected""" pass @@ -23,7 +29,10 @@ def get_base_image_filename_from_gold_image(gold_image: Filename) -> Filename: return gold_image.generate_base_image_filename() -def get_base_image_absolute_path(git_repo_dir, gold_image): +def get_base_image_absolute_path_from_gold(git_repo_dir, gold_image: Filename): + if not gold_image.is_gold_image: + raise ExpectedGoldImageError(f"Expected Gold image: {gold_image}") + corresponding_base_image = gold_image.generate_base_image_filename() corresponding_base_image_relative_path = ( file_locator(corresponding_base_image) + "/" + str(corresponding_base_image) diff --git a/src/nautilus_librarian/mods/dvc/domain/dvc_diff_parser.py b/src/nautilus_librarian/mods/dvc/domain/dvc_diff_parser.py index ff05907..4de0af1 100644 --- a/src/nautilus_librarian/mods/dvc/domain/dvc_diff_parser.py +++ b/src/nautilus_librarian/mods/dvc/domain/dvc_diff_parser.py @@ -8,7 +8,7 @@ def __init__(self, dvc_diff: dict) -> None: self.added_list = None self.deleted_list = None self.modified_list = None - self.renamed_list = None + self.parse() @staticmethod @@ -68,24 +68,24 @@ def filter( exclude_renamed=False, only_basename=False, ): - files = [] + all_files = [] if not exclude_added: - files = files + self.added_list + all_files = all_files + self.added_list if not exclude_deleted: - files = files + self.deleted_list + all_files = all_files + self.deleted_list if not exclude_modified: - files = files + self.modified_list + all_files = all_files + self.modified_list if only_basename: - files = self.basenames_of(files) + all_files = self.basenames_of(all_files) if not exclude_renamed: if only_basename: - files = files + self.basenames_of_old_and_new(self.renamed_list) + all_files = all_files + self.basenames_of_old_and_new(self.renamed_list) else: - files = files + self.renamed_list + all_files = all_files + self.renamed_list - return files + return all_files diff --git a/src/nautilus_librarian/mods/dvc/domain/utils.py b/src/nautilus_librarian/mods/dvc/domain/utils.py index e430fad..b98ab47 100644 --- a/src/nautilus_librarian/mods/dvc/domain/utils.py +++ b/src/nautilus_librarian/mods/dvc/domain/utils.py @@ -1,7 +1,15 @@ from nautilus_librarian.mods.dvc.domain.dvc_diff_parser import DvcDiffParser +from nautilus_librarian.mods.namecodes.domain.filename_filters import ( + filter_media_library_files, +) +from nautilus_librarian.mods.namecodes.domain.validate_filenames import ( + is_a_library_file, +) +# TODO: move this file to app domain. DVC mod should be generic. -def extract_added_and_modified_and_renamed_files_from_dvc_diff( + +def extract_all_added_and_modified_and_renamed_files_from_dvc_diff( dvc_diff_json, only_basename=True ): """ @@ -16,7 +24,18 @@ def extract_added_and_modified_and_renamed_files_from_dvc_diff( Output: ['data/000001/32/000001-32.600.2.tif'] """ dvc_diff = DvcDiffParser.from_json(dvc_diff_json) - return dvc_diff.filter(exclude_deleted=True, only_basename=only_basename) + all_files = dvc_diff.filter(exclude_deleted=True, only_basename=only_basename) + return all_files + + +def extract_added_and_modified_and_renamed_files_from_dvc_diff( + dvc_diff_json, only_basename=True +): + all_files = extract_all_added_and_modified_and_renamed_files_from_dvc_diff( + dvc_diff_json, only_basename + ) + files = filter_media_library_files(all_files) + return files def extract_all_changed_files_from_dvc_diff(dvc_diff_json, only_basename=True): @@ -32,7 +51,9 @@ def extract_all_changed_files_from_dvc_diff(dvc_diff_json, only_basename=True): Output: ['data/000001/32/000001-32.600.2.tif'] """ dvc_diff = DvcDiffParser.from_json(dvc_diff_json) - return dvc_diff.filter(only_basename=only_basename) + all_files = dvc_diff.filter(only_basename=only_basename) + files = filter_media_library_files(all_files) + return files def extract_deleted_files_from_dvc_diff(dvc_diff_json, only_basename=True): @@ -48,13 +69,15 @@ def extract_deleted_files_from_dvc_diff(dvc_diff_json, only_basename=True): Output: ['data/000001/32/000001-32.600.2.tif'] """ dvc_diff = DvcDiffParser.from_json(dvc_diff_json) - return dvc_diff.filter( + all_files = dvc_diff.filter( exclude_added=True, exclude_modified=True, exclude_deleted=False, exclude_renamed=True, only_basename=only_basename, ) + files = filter_media_library_files(all_files) + return files def extract_added_and_modified_files_from_dvc_diff(dvc_diff_json, only_basename=True): @@ -70,9 +93,11 @@ def extract_added_and_modified_files_from_dvc_diff(dvc_diff_json, only_basename= Output: ['data/000001/32/000001-32.600.2.tif'] """ dvc_diff = DvcDiffParser.from_json(dvc_diff_json) - return dvc_diff.filter( + all_files = dvc_diff.filter( exclude_deleted=True, exclude_renamed=True, only_basename=only_basename ) + files = filter_media_library_files(all_files) + return files def extract_modified_files_from_dvc_diff(dvc_diff_json, only_basename=True): @@ -88,12 +113,14 @@ def extract_modified_files_from_dvc_diff(dvc_diff_json, only_basename=True): Output: ['data/000001/32/000001-32.600.2.tif'] """ dvc_diff = DvcDiffParser.from_json(dvc_diff_json) - return dvc_diff.filter( + all_files = dvc_diff.filter( exclude_added=True, exclude_deleted=True, exclude_renamed=True, only_basename=only_basename, ) + files = filter_media_library_files(all_files) + return files def extract_renamed_files_from_dvc_diff(dvc_diff_json, only_basename=True): @@ -125,7 +152,7 @@ def extract_renamed_files_from_dvc_diff(dvc_diff_json, only_basename=True): ] """ dvc_diff = DvcDiffParser.from_json(dvc_diff_json) - return dvc_diff.filter( + all_files = dvc_diff.filter( exclude_added=True, exclude_modified=True, exclude_deleted=True, @@ -133,6 +160,11 @@ def extract_renamed_files_from_dvc_diff(dvc_diff_json, only_basename=True): only_basename=only_basename, ) + media_files = list( + filter(lambda filename: is_a_library_file(filename["new"]), all_files) + ) + return media_files + def extract_list_of_media_file_changes_from_dvc_diff_output( dvc_diff, only_basename=True diff --git a/src/nautilus_librarian/typer/commands/workflows/actions/delete_base_images_action.py b/src/nautilus_librarian/typer/commands/workflows/actions/delete_base_images_action.py index 79fc954..de9b07c 100644 --- a/src/nautilus_librarian/typer/commands/workflows/actions/delete_base_images_action.py +++ b/src/nautilus_librarian/typer/commands/workflows/actions/delete_base_images_action.py @@ -11,7 +11,7 @@ ) -def get_base_image_absolute_path(git_repo_dir, gold_image): +def get_base_image_absolute_path_from_gold(git_repo_dir, gold_image): corresponding_base_image = gold_image.generate_base_image_filename() corresponding_base_image_relative_path = ( file_locator(corresponding_base_image) + "/" + str(corresponding_base_image) @@ -41,7 +41,9 @@ def delete_base_images(dvc_diff, git_repo_dir): for filename in filenames: gold_filename = Filename(filename) - base_filename = get_base_image_absolute_path(git_repo_dir, gold_filename) + base_filename = get_base_image_absolute_path_from_gold( + git_repo_dir, gold_filename + ) if path.exists(f"{base_filename}.dvc"): remove_base_pointer_and_file_if_exists(base_filename, dvc_services) messages.append( diff --git a/src/nautilus_librarian/typer/commands/workflows/actions/rename_base_images_action.py b/src/nautilus_librarian/typer/commands/workflows/actions/rename_base_images_action.py index 6060bf8..3d43c8d 100644 --- a/src/nautilus_librarian/typer/commands/workflows/actions/rename_base_images_action.py +++ b/src/nautilus_librarian/typer/commands/workflows/actions/rename_base_images_action.py @@ -2,7 +2,7 @@ from nautilus_librarian.domain.dvc_services_api import DvcServicesApi from nautilus_librarian.domain.file_locator import ( - get_base_image_absolute_path, + get_base_image_absolute_path_from_gold, guard_that_base_image_exists, ) from nautilus_librarian.mods.dvc.domain.utils import extract_renamed_files_from_dvc_diff @@ -23,23 +23,33 @@ def rename_base_images(dvc_diff, git_repo_dir): """ It renames previously generated base images when gold images are renamed """ - filenames = extract_renamed_files_from_dvc_diff(dvc_diff, only_basename=False) + all_renamed_files = extract_renamed_files_from_dvc_diff( + dvc_diff, only_basename=False + ) - if dvc_diff == "{}" or filenames == []: + # Filter Gold renamed images + gold_renamed_images = list( + filter( + lambda filename: Filename(filename["old"]).is_gold_image(), + all_renamed_files, + ) + ) + + if dvc_diff == "{}" or gold_renamed_images == []: return ActionResult( - ResultCode.CONTINUE, [Message("No Gold image renames found")] + ResultCode.CONTINUE, [Message("No Gold renamed images found")] ) messages = [] - for filename in filenames: + for filename in gold_renamed_images: try: gold_filename_old = Filename(filename["old"]) - gold_filename_new = Filename(filename["new"]) - base_filename_old = get_base_image_absolute_path( + base_filename_old = get_base_image_absolute_path_from_gold( git_repo_dir, gold_filename_old ) - base_filename_new = get_base_image_absolute_path( + gold_filename_new = Filename(filename["new"]) + base_filename_new = get_base_image_absolute_path_from_gold( git_repo_dir, gold_filename_new ) guard_that_base_image_exists(base_filename_old) diff --git a/src/nautilus_librarian/typer/commands/workflows/actions/validate_filenames.py b/src/nautilus_librarian/typer/commands/workflows/actions/validate_filenames.py index e4d94cd..c5d5425 100644 --- a/src/nautilus_librarian/typer/commands/workflows/actions/validate_filenames.py +++ b/src/nautilus_librarian/typer/commands/workflows/actions/validate_filenames.py @@ -1,5 +1,5 @@ from nautilus_librarian.mods.dvc.domain.utils import ( - extract_list_of_media_file_changes_from_dvc_diff_output, + extract_all_added_and_modified_and_renamed_files_from_dvc_diff, get_new_filepath_if_is_a_renaming_dict, ) from nautilus_librarian.mods.namecodes.domain.validate_filenames import ( @@ -15,12 +15,16 @@ def validate_filenames(dvc_diff): """ - It validates all the media file names in the dvc diff. + It validates all the filenames in the dvc diff. """ if dvc_diff == "{}": - return ActionResult(ResultCode.EXIT, [Message("No Gold image changes found")]) + return ActionResult( + ResultCode.EXIT, [Message("No filenames to validate, empty DVC diff")] + ) - filenames = extract_list_of_media_file_changes_from_dvc_diff_output(dvc_diff) + # TODO: we have to review this function if we add files to DVC which do not belong to a media library. + + filenames = extract_all_added_and_modified_and_renamed_files_from_dvc_diff(dvc_diff) messages = [] diff --git a/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_check_images_changes.py b/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_check_images_changes.py index 3473039..35e033d 100644 --- a/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_check_images_changes.py +++ b/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_check_images_changes.py @@ -34,7 +34,7 @@ def given_a_diff_structure_with_changes_it_should_return_an_continue_result_code dvc_diff_with_added_gold_image = { "added": [], "deleted": [ - {"path": "path/to/image"}, + {"path": "data/000001/52/000001-52.600.2.tif"}, ], "modified": [], "renamed": [], diff --git a/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_rename_base_images_action.py b/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_rename_base_images_action.py index 45ea923..f9089d8 100644 --- a/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_rename_base_images_action.py +++ b/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_actions/test_rename_base_images_action.py @@ -21,7 +21,7 @@ def copy_base_image_to_destination(sample_base_image_absolute_path, destination_ ) -def given_a_diff_structure_with_renamed_gold_image_it_should_rename_base_images( +def given_a_diff_structure_with_a_renamed_gold_image_it_should_rename_the_corresponding_base_image( sample_gold_image_absolute_path, sample_base_image_absolute_path, temp_git_dir, @@ -69,3 +69,43 @@ def given_a_diff_structure_with_renamed_gold_image_it_should_rename_base_images( assert result.code == ResultCode.CONTINUE assert path.exists(f"{temp_git_dir}/data/000002/52/000002-52.600.2.tif") assert result.contains_text("successfully renamed to") + + +def given_a_diff_structure_with_a_renamed_not_gold_image_it_should_not_rename_any_base_images( + sample_gold_image_absolute_path, + sample_base_image_absolute_path, + temp_git_dir, + temp_dvc_local_remote_storage_dir, + temp_gpg_home_dir, + git_user, +): + + dvc_diff_with_renamed_base_image = { + "added": [], + "deleted": [], + "modified": [], + "renamed": [ + { + "path": { + "old": "data/000001/52/000001-52.600.2.tif", + "new": "data/000001/52/000002-52.600.2.tif", + } + }, + ], + } + + create_initial_state( + temp_git_dir, + temp_dvc_local_remote_storage_dir, + sample_gold_image_absolute_path, + temp_gpg_home_dir, + git_user, + ) + + result = rename_base_images( + compact_json(dvc_diff_with_renamed_base_image), temp_git_dir + ) + + assert result.code == ResultCode.CONTINUE + assert not path.exists(f"{temp_git_dir}/data/000002/52/000002-52.600.2.tif") + assert not result.contains_text("successfully renamed to") diff --git a/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_gold_images_processing.py b/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_gold_images_processing.py index d471a50..17e7e65 100644 --- a/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_gold_images_processing.py +++ b/tests/test_nautilus_librarian/test_typer/test_commands/test_workflows/test_gold_images_processing.py @@ -124,7 +124,7 @@ def it_should_show_a_message_if_there_is_not_any_change_in_gold_images( }, ) assert result.exit_code == 0 - assert "No Gold image changes found" in result.stdout + assert "No filenames to validate, empty DVC diff" in result.stdout def copy_media_file_to_its_folder(src_media_file_path, git_dir): @@ -188,7 +188,7 @@ def test_gold_images_processing_workflow_command( ✓ data/000001/32/000001-32.600.2.tif pulled from dvc storage ✓ Dimensions of data/000001/32/000001-32.600.2.tif are 1740 x 1160 ✓ Base image of data/000001/32/000001-32.600.2.tif successfully generated - No Gold image renames found + No Gold renamed images found No Gold image deletions found New Gold image found: 000001-32.600.2.tif -> Base image: data/000001/52/000001-52.600.2.tif ✓ """