Skip to content

Commit

Permalink
fix: [#96] rename Base image only when Gold is renamed
Browse files Browse the repository at this point in the history
I have also fixed other methods in the module:

src/nautilus_librarian/mods/dvc/domain/utils.py

Becuase functions were returning all changes in dvc diff not only
changes in the media libray files.
  • Loading branch information
josecelano committed Mar 1, 2022
1 parent 5631142 commit cf214fd
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 36 deletions.
13 changes: 11 additions & 2 deletions src/nautilus_librarian/domain/file_locator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ class FileNotFoundException(Exception):


class BaseImageNotFoundError(FileNotFoundError):
"""Raised when a base image does not exist"""
"""Raised when a Base image does not exist"""

pass


class ExpectedGoldImageError(Exception):
"""Raised when a Gold image is expected"""

pass

Expand All @@ -23,7 +29,10 @@ def get_base_image_filename_from_gold_image(gold_image: Filename) -> Filename:
return gold_image.generate_base_image_filename()


def get_base_image_absolute_path(git_repo_dir, gold_image):
def get_base_image_absolute_path_from_gold(git_repo_dir, gold_image: Filename):
if not gold_image.is_gold_image:
raise ExpectedGoldImageError(f"Expected Gold image: {gold_image}")

corresponding_base_image = gold_image.generate_base_image_filename()
corresponding_base_image_relative_path = (
file_locator(corresponding_base_image) + "/" + str(corresponding_base_image)
Expand Down
18 changes: 9 additions & 9 deletions src/nautilus_librarian/mods/dvc/domain/dvc_diff_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def __init__(self, dvc_diff: dict) -> None:
self.added_list = None
self.deleted_list = None
self.modified_list = None
self.renamed_list = None

self.parse()

@staticmethod
Expand Down Expand Up @@ -68,24 +68,24 @@ def filter(
exclude_renamed=False,
only_basename=False,
):
files = []
all_files = []

if not exclude_added:
files = files + self.added_list
all_files = all_files + self.added_list

if not exclude_deleted:
files = files + self.deleted_list
all_files = all_files + self.deleted_list

if not exclude_modified:
files = files + self.modified_list
all_files = all_files + self.modified_list

if only_basename:
files = self.basenames_of(files)
all_files = self.basenames_of(all_files)

if not exclude_renamed:
if only_basename:
files = files + self.basenames_of_old_and_new(self.renamed_list)
all_files = all_files + self.basenames_of_old_and_new(self.renamed_list)
else:
files = files + self.renamed_list
all_files = all_files + self.renamed_list

return files
return all_files
46 changes: 39 additions & 7 deletions src/nautilus_librarian/mods/dvc/domain/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from nautilus_librarian.mods.dvc.domain.dvc_diff_parser import DvcDiffParser
from nautilus_librarian.mods.namecodes.domain.filename_filters import (
filter_media_library_files,
)
from nautilus_librarian.mods.namecodes.domain.validate_filenames import (
is_a_library_file,
)

# TODO: move this file to app domain. DVC mod should be generic.

def extract_added_and_modified_and_renamed_files_from_dvc_diff(

def extract_all_added_and_modified_and_renamed_files_from_dvc_diff(
dvc_diff_json, only_basename=True
):
"""
Expand All @@ -16,7 +24,18 @@ def extract_added_and_modified_and_renamed_files_from_dvc_diff(
Output: ['data/000001/32/000001-32.600.2.tif']
"""
dvc_diff = DvcDiffParser.from_json(dvc_diff_json)
return dvc_diff.filter(exclude_deleted=True, only_basename=only_basename)
all_files = dvc_diff.filter(exclude_deleted=True, only_basename=only_basename)
return all_files


def extract_added_and_modified_and_renamed_files_from_dvc_diff(
dvc_diff_json, only_basename=True
):
all_files = extract_all_added_and_modified_and_renamed_files_from_dvc_diff(
dvc_diff_json, only_basename
)
files = filter_media_library_files(all_files)
return files


def extract_all_changed_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Expand All @@ -32,7 +51,9 @@ def extract_all_changed_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Output: ['data/000001/32/000001-32.600.2.tif']
"""
dvc_diff = DvcDiffParser.from_json(dvc_diff_json)
return dvc_diff.filter(only_basename=only_basename)
all_files = dvc_diff.filter(only_basename=only_basename)
files = filter_media_library_files(all_files)
return files


def extract_deleted_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Expand All @@ -48,13 +69,15 @@ def extract_deleted_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Output: ['data/000001/32/000001-32.600.2.tif']
"""
dvc_diff = DvcDiffParser.from_json(dvc_diff_json)
return dvc_diff.filter(
all_files = dvc_diff.filter(
exclude_added=True,
exclude_modified=True,
exclude_deleted=False,
exclude_renamed=True,
only_basename=only_basename,
)
files = filter_media_library_files(all_files)
return files


def extract_added_and_modified_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Expand All @@ -70,9 +93,11 @@ def extract_added_and_modified_files_from_dvc_diff(dvc_diff_json, only_basename=
Output: ['data/000001/32/000001-32.600.2.tif']
"""
dvc_diff = DvcDiffParser.from_json(dvc_diff_json)
return dvc_diff.filter(
all_files = dvc_diff.filter(
exclude_deleted=True, exclude_renamed=True, only_basename=only_basename
)
files = filter_media_library_files(all_files)
return files


def extract_modified_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Expand All @@ -88,12 +113,14 @@ def extract_modified_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Output: ['data/000001/32/000001-32.600.2.tif']
"""
dvc_diff = DvcDiffParser.from_json(dvc_diff_json)
return dvc_diff.filter(
all_files = dvc_diff.filter(
exclude_added=True,
exclude_deleted=True,
exclude_renamed=True,
only_basename=only_basename,
)
files = filter_media_library_files(all_files)
return files


def extract_renamed_files_from_dvc_diff(dvc_diff_json, only_basename=True):
Expand Down Expand Up @@ -125,14 +152,19 @@ def extract_renamed_files_from_dvc_diff(dvc_diff_json, only_basename=True):
]
"""
dvc_diff = DvcDiffParser.from_json(dvc_diff_json)
return dvc_diff.filter(
all_files = dvc_diff.filter(
exclude_added=True,
exclude_modified=True,
exclude_deleted=True,
exclude_renamed=False,
only_basename=only_basename,
)

media_files = list(
filter(lambda filename: is_a_library_file(filename["new"]), all_files)
)
return media_files


def extract_list_of_media_file_changes_from_dvc_diff_output(
dvc_diff, only_basename=True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)


def get_base_image_absolute_path(git_repo_dir, gold_image):
def get_base_image_absolute_path_from_gold(git_repo_dir, gold_image):
corresponding_base_image = gold_image.generate_base_image_filename()
corresponding_base_image_relative_path = (
file_locator(corresponding_base_image) + "/" + str(corresponding_base_image)
Expand Down Expand Up @@ -41,7 +41,9 @@ def delete_base_images(dvc_diff, git_repo_dir):

for filename in filenames:
gold_filename = Filename(filename)
base_filename = get_base_image_absolute_path(git_repo_dir, gold_filename)
base_filename = get_base_image_absolute_path_from_gold(
git_repo_dir, gold_filename
)
if path.exists(f"{base_filename}.dvc"):
remove_base_pointer_and_file_if_exists(base_filename, dvc_services)
messages.append(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from nautilus_librarian.domain.dvc_services_api import DvcServicesApi
from nautilus_librarian.domain.file_locator import (
get_base_image_absolute_path,
get_base_image_absolute_path_from_gold,
guard_that_base_image_exists,
)
from nautilus_librarian.mods.dvc.domain.utils import extract_renamed_files_from_dvc_diff
Expand All @@ -23,23 +23,33 @@ def rename_base_images(dvc_diff, git_repo_dir):
"""
It renames previously generated base images when gold images are renamed
"""
filenames = extract_renamed_files_from_dvc_diff(dvc_diff, only_basename=False)
all_renamed_files = extract_renamed_files_from_dvc_diff(
dvc_diff, only_basename=False
)

if dvc_diff == "{}" or filenames == []:
# Filter Gold renamed images
gold_renamed_images = list(
filter(
lambda filename: Filename(filename["old"]).is_gold_image(),
all_renamed_files,
)
)

if dvc_diff == "{}" or gold_renamed_images == []:
return ActionResult(
ResultCode.CONTINUE, [Message("No Gold image renames found")]
ResultCode.CONTINUE, [Message("No Gold renamed images found")]
)

messages = []

for filename in filenames:
for filename in gold_renamed_images:
try:
gold_filename_old = Filename(filename["old"])
gold_filename_new = Filename(filename["new"])
base_filename_old = get_base_image_absolute_path(
base_filename_old = get_base_image_absolute_path_from_gold(
git_repo_dir, gold_filename_old
)
base_filename_new = get_base_image_absolute_path(
gold_filename_new = Filename(filename["new"])
base_filename_new = get_base_image_absolute_path_from_gold(
git_repo_dir, gold_filename_new
)
guard_that_base_image_exists(base_filename_old)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from nautilus_librarian.mods.dvc.domain.utils import (
extract_list_of_media_file_changes_from_dvc_diff_output,
extract_all_added_and_modified_and_renamed_files_from_dvc_diff,
get_new_filepath_if_is_a_renaming_dict,
)
from nautilus_librarian.mods.namecodes.domain.validate_filenames import (
Expand All @@ -15,12 +15,16 @@

def validate_filenames(dvc_diff):
"""
It validates all the media file names in the dvc diff.
It validates all the filenames in the dvc diff.
"""
if dvc_diff == "{}":
return ActionResult(ResultCode.EXIT, [Message("No Gold image changes found")])
return ActionResult(
ResultCode.EXIT, [Message("No filenames to validate, empty DVC diff")]
)

filenames = extract_list_of_media_file_changes_from_dvc_diff_output(dvc_diff)
# TODO: we have to review this function if we add files to DVC which do not belong to a media library.

filenames = extract_all_added_and_modified_and_renamed_files_from_dvc_diff(dvc_diff)

messages = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def given_a_diff_structure_with_changes_it_should_return_an_continue_result_code
dvc_diff_with_added_gold_image = {
"added": [],
"deleted": [
{"path": "path/to/image"},
{"path": "data/000001/52/000001-52.600.2.tif"},
],
"modified": [],
"renamed": [],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def copy_base_image_to_destination(sample_base_image_absolute_path, destination_
)


def given_a_diff_structure_with_renamed_gold_image_it_should_rename_base_images(
def given_a_diff_structure_with_a_renamed_gold_image_it_should_rename_the_corresponding_base_image(
sample_gold_image_absolute_path,
sample_base_image_absolute_path,
temp_git_dir,
Expand Down Expand Up @@ -69,3 +69,43 @@ def given_a_diff_structure_with_renamed_gold_image_it_should_rename_base_images(
assert result.code == ResultCode.CONTINUE
assert path.exists(f"{temp_git_dir}/data/000002/52/000002-52.600.2.tif")
assert result.contains_text("successfully renamed to")


def given_a_diff_structure_with_a_renamed_not_gold_image_it_should_not_rename_any_base_images(
sample_gold_image_absolute_path,
sample_base_image_absolute_path,
temp_git_dir,
temp_dvc_local_remote_storage_dir,
temp_gpg_home_dir,
git_user,
):

dvc_diff_with_renamed_base_image = {
"added": [],
"deleted": [],
"modified": [],
"renamed": [
{
"path": {
"old": "data/000001/52/000001-52.600.2.tif",
"new": "data/000001/52/000002-52.600.2.tif",
}
},
],
}

create_initial_state(
temp_git_dir,
temp_dvc_local_remote_storage_dir,
sample_gold_image_absolute_path,
temp_gpg_home_dir,
git_user,
)

result = rename_base_images(
compact_json(dvc_diff_with_renamed_base_image), temp_git_dir
)

assert result.code == ResultCode.CONTINUE
assert not path.exists(f"{temp_git_dir}/data/000002/52/000002-52.600.2.tif")
assert not result.contains_text("successfully renamed to")
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def it_should_show_a_message_if_there_is_not_any_change_in_gold_images(
},
)
assert result.exit_code == 0
assert "No Gold image changes found" in result.stdout
assert "No filenames to validate, empty DVC diff" in result.stdout


def copy_media_file_to_its_folder(src_media_file_path, git_dir):
Expand Down Expand Up @@ -188,7 +188,7 @@ def test_gold_images_processing_workflow_command(
✓ data/000001/32/000001-32.600.2.tif pulled from dvc storage
✓ Dimensions of data/000001/32/000001-32.600.2.tif are 1740 x 1160
✓ Base image of data/000001/32/000001-32.600.2.tif successfully generated
No Gold image renames found
No Gold renamed images found
No Gold image deletions found
New Gold image found: 000001-32.600.2.tif -> Base image: data/000001/52/000001-52.600.2.tif ✓
"""
Expand Down

0 comments on commit cf214fd

Please sign in to comment.