From 17ee9bc538c4dcb5479b99d34e7059fe24ad7faf Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Wed, 6 Mar 2024 10:47:15 +0100 Subject: [PATCH 01/15] Improve usability of Directory datatype --- .../config/sample/datatypes_conf.xml.sample | 9 +- .../converters/archive_to_directory.xml | 33 +++++ .../datatypes/converters/tar_to_directory.xml | 2 +- lib/galaxy/datatypes/data.py | 137 ++++++++++++++++++ 4 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 lib/galaxy/datatypes/converters/archive_to_directory.xml diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 0e026b5a7b0a..b62d930bb515 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -288,13 +288,14 @@ - + + + - - - + + diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml new file mode 100644 index 000000000000..9ccdba28a30e --- /dev/null +++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml @@ -0,0 +1,33 @@ + + + + galaxy-util + + + + + + + + + + + + + + + + + + + + diff --git a/lib/galaxy/datatypes/converters/tar_to_directory.xml b/lib/galaxy/datatypes/converters/tar_to_directory.xml index 59354b39b5fd..0160746283da 100644 --- a/lib/galaxy/datatypes/converters/tar_to_directory.xml +++ b/lib/galaxy/datatypes/converters/tar_to_directory.xml @@ -1,7 +1,7 @@ - galaxy-util + galaxy-util mkdir '$output1.files_path'; diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 0024adea6155..eb4d23c9926b 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1212,6 +1212,143 @@ def regex_line_dataprovider( class Directory(Data): """Class representing a directory of files.""" + # The behavior of this class is intended to be similar to that of + # composite types, but with arbitrary structure of extra_files. + + # A prioritized list of files in the directory that, if present, can serve + # as a replacement for a composite type's primary dataset. + recognized_index_files = [] + + # Directories converted from archives, and possibly others, have a single + # root folder inside the directory. The root_folder attribute lets tools + # access that folder without first exploring the directory tree themselves. + # Will be set to the empty string for flat directories. + MetadataElement( + name="root_folder", + default=None, + desc="Name of the root folder of the directory", + readonly=True, + optional=False, + visible=False, + ) + + # Capture the actual index file obtained from the recognized_index_files + # list for a concrete directory. + MetadataElement( + name="index_file", + default=None, + desc="Name of the index file relative to the root folder", + readonly=True, + optional=False, + visible=False, + ) + + MetadataElement( + name="folder_size", + default=0, + desc="Total size of the folder in bytes", + readonly=True, + optional=False, + visible=False, + ) + + @classmethod + def _get_size(cls, root_path): + total_size = 0 + for dirpath, dirnames, filenames in os.walk(root_path): + for f in filenames: + fp = os.path.join(dirpath, f) + # skip if it is symbolic link + if not os.path.islink(fp): + total_size += os.path.getsize(fp) + return total_size + + def set_meta(self, dataset: DatasetProtocol, **kwd): + efp = dataset.extra_files_path + efp_items = os.listdir(efp) + root_folder_name = efp_items[0] + if len(efp_items) == 1 and os.path.isdir(os.path.join(efp, root_folder_name)): + dataset.metadata.root_folder = root_folder_name + else: + dataset.metadata.root_folder = "" + index_file = None + for f in self.recognized_index_files: + if os.path.isfile(os.path.join(efp, root_folder_name, f)): + index_file = f + break + dataset.metadata.index_file = index_file + dataset.metadata.folder_size = self._get_size(os.path.join(efp, dataset.metadata.root_folder)) + + def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: + if not dataset.dataset.purged: + dataset.peek = f"{dataset.metadata.root_folder}" + dataset.blurb = nice_size(dataset.metadata.folder_size) + else: + dataset.peek = "file does not exist" + dataset.blurb = "file purged from disk" + + def _archive_main_file( + self, archive: ZipstreamWrapper, display_name: str, data_filename: str + ) -> Tuple[bool, str, str]: + """Overwrites the method to not do anything. + + No main file gets added to a directory archive. + """ + error, msg, messagetype = False, "", "" + return error, msg, messagetype + + def display_data( + self, + trans, + dataset: DatasetHasHidProtocol, + preview: bool = False, + filename: Optional[str] = None, + to_ext: Optional[str] = None, + **kwd, + ): + headers = kwd.get("headers", {}) + # Prevent IE8 from sniffing content type since we're explicit about it. This prevents intentionally text/plain + # content from being rendered in the browser + headers["X-Content-Type-Options"] = "nosniff" + if to_ext: + # Download the directory structure as an archive + trans.log_event(f"Download directory for dataset id: {str(dataset.id)}") + return self._archive_composite_dataset(trans, dataset, headers, do_action=kwd.get("do_action", "zip")) + if preview: + root_folder = dataset.metadata.root_folder + index_file = dataset.metadata.index_file + if not filename or filename == 'index': + if root_folder is not None and index_file: + # display the index file in lieu of the empty primary dataset + file_path = os.path.join(dataset.extra_files_path, root_folder, index_file) + self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] + return self._yield_user_file_content( + trans, + dataset, + file_path, + headers + ), headers + elif root_folder: + # delegate to the parent method, which knows how to display + # a directory + filename = root_folder + else: + # No meaningful display available, show an info message instead + self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] + if root_folder is None: + return util.smart_str("Cannot generate preview with incomplete metadata. Resetting metadata may help.") + elif self.recognized_index_files: + return util.smart_str("None of the known key files for the datatype present. Is the datatype format set correctly?") + else: + return util.smart_str("No preview available for this dataset."), headers + + return super().display_data( + trans, + dataset=dataset, + filename=filename, + **kwd, + ) + class GenericAsn1(Text): """Class for generic ASN.1 text format""" From 549700d6e2585e6ddc7325290452a602bde2221b Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Fri, 8 Mar 2024 22:51:30 +0100 Subject: [PATCH 02/15] Use dataset's total_size property instead of calculating it again --- lib/galaxy/datatypes/data.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index eb4d23c9926b..11c676f141ca 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1243,26 +1243,6 @@ class Directory(Data): visible=False, ) - MetadataElement( - name="folder_size", - default=0, - desc="Total size of the folder in bytes", - readonly=True, - optional=False, - visible=False, - ) - - @classmethod - def _get_size(cls, root_path): - total_size = 0 - for dirpath, dirnames, filenames in os.walk(root_path): - for f in filenames: - fp = os.path.join(dirpath, f) - # skip if it is symbolic link - if not os.path.islink(fp): - total_size += os.path.getsize(fp) - return total_size - def set_meta(self, dataset: DatasetProtocol, **kwd): efp = dataset.extra_files_path efp_items = os.listdir(efp) @@ -1277,12 +1257,11 @@ def set_meta(self, dataset: DatasetProtocol, **kwd): index_file = f break dataset.metadata.index_file = index_file - dataset.metadata.folder_size = self._get_size(os.path.join(efp, dataset.metadata.root_folder)) def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: if not dataset.dataset.purged: dataset.peek = f"{dataset.metadata.root_folder}" - dataset.blurb = nice_size(dataset.metadata.folder_size) + dataset.blurb = nice_size(dataset.dataset.total_size) else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk" From 8d62497a00ca90122e528f67d004a15776c8b1f3 Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Fri, 8 Mar 2024 23:21:14 +0100 Subject: [PATCH 03/15] Add auto-converters for tar.gz and tar.bz2 as suggested by @bernt-matthias --- lib/galaxy/config/sample/datatypes_conf.xml.sample | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index b62d930bb515..a8e798143f8b 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -292,7 +292,7 @@ - + From 9c0f86e250a72f9913290e649e4207a87af23c3b Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Fri, 8 Mar 2024 23:39:40 +0100 Subject: [PATCH 04/15] Make new converter testable --- lib/galaxy/datatypes/converters/archive_to_directory.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml index 9ccdba28a30e..d0c56c154c09 100644 --- a/lib/galaxy/datatypes/converters/archive_to_directory.xml +++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml @@ -18,6 +18,9 @@ + + + @@ -25,6 +28,7 @@ + From f40138ac1f4f3f1895d6c4a5d64579814bd658d3 Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Fri, 8 Mar 2024 23:46:57 +0100 Subject: [PATCH 05/15] Add file_ext and missing type annotation --- lib/galaxy/datatypes/data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 11c676f141ca..7c9fa7f80194 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1212,12 +1212,14 @@ def regex_line_dataprovider( class Directory(Data): """Class representing a directory of files.""" + file_ext = "directory" + # The behavior of this class is intended to be similar to that of # composite types, but with arbitrary structure of extra_files. # A prioritized list of files in the directory that, if present, can serve # as a replacement for a composite type's primary dataset. - recognized_index_files = [] + recognized_index_files: List[str] = [] # Directories converted from archives, and possibly others, have a single # root folder inside the directory. The root_folder attribute lets tools From 7cc7efcedaaf95f9e7e169dda148fd472449f99c Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Fri, 8 Mar 2024 23:54:19 +0100 Subject: [PATCH 06/15] Fix linting --- lib/galaxy/datatypes/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 7c9fa7f80194..eb282fcfaa46 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1315,7 +1315,7 @@ def display_data( filename = root_folder else: # No meaningful display available, show an info message instead - self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] + self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] if root_folder is None: return util.smart_str("Cannot generate preview with incomplete metadata. Resetting metadata may help.") elif self.recognized_index_files: From e8e852cf2913971ec3aa1abd0ae2bfa279163e8b Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Sat, 9 Mar 2024 00:05:09 +0100 Subject: [PATCH 07/15] Fix code formatting --- lib/galaxy/datatypes/data.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index eb282fcfaa46..3212ddea409d 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1298,17 +1298,12 @@ def display_data( if preview: root_folder = dataset.metadata.root_folder index_file = dataset.metadata.index_file - if not filename or filename == 'index': + if not filename or filename == "index": if root_folder is not None and index_file: # display the index file in lieu of the empty primary dataset file_path = os.path.join(dataset.extra_files_path, root_folder, index_file) self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] - return self._yield_user_file_content( - trans, - dataset, - file_path, - headers - ), headers + return self._yield_user_file_content(trans, dataset, file_path, headers), headers elif root_folder: # delegate to the parent method, which knows how to display # a directory @@ -1317,9 +1312,13 @@ def display_data( # No meaningful display available, show an info message instead self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] if root_folder is None: - return util.smart_str("Cannot generate preview with incomplete metadata. Resetting metadata may help.") + return util.smart_str( + "Cannot generate preview with incomplete metadata. Resetting metadata may help." + ) elif self.recognized_index_files: - return util.smart_str("None of the known key files for the datatype present. Is the datatype format set correctly?") + return util.smart_str( + "None of the known key files for the datatype present. Is the datatype format set correctly?" + ) else: return util.smart_str("No preview available for this dataset."), headers From 400e048c8bb665008dc21efb7be0872a037030e7 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:32:42 +0200 Subject: [PATCH 08/15] Drop index-related metadata from Directory datatype This will go into a sub-datatype when needed. --- lib/galaxy/datatypes/data.py | 72 ------------------------------------ 1 file changed, 72 deletions(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 3212ddea409d..c074bbdc4b1a 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1217,10 +1217,6 @@ class Directory(Data): # The behavior of this class is intended to be similar to that of # composite types, but with arbitrary structure of extra_files. - # A prioritized list of files in the directory that, if present, can serve - # as a replacement for a composite type's primary dataset. - recognized_index_files: List[str] = [] - # Directories converted from archives, and possibly others, have a single # root folder inside the directory. The root_folder attribute lets tools # access that folder without first exploring the directory tree themselves. @@ -1234,17 +1230,6 @@ class Directory(Data): visible=False, ) - # Capture the actual index file obtained from the recognized_index_files - # list for a concrete directory. - MetadataElement( - name="index_file", - default=None, - desc="Name of the index file relative to the root folder", - readonly=True, - optional=False, - visible=False, - ) - def set_meta(self, dataset: DatasetProtocol, **kwd): efp = dataset.extra_files_path efp_items = os.listdir(efp) @@ -1253,12 +1238,6 @@ def set_meta(self, dataset: DatasetProtocol, **kwd): dataset.metadata.root_folder = root_folder_name else: dataset.metadata.root_folder = "" - index_file = None - for f in self.recognized_index_files: - if os.path.isfile(os.path.join(efp, root_folder_name, f)): - index_file = f - break - dataset.metadata.index_file = index_file def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: if not dataset.dataset.purged: @@ -1278,57 +1257,6 @@ def _archive_main_file( error, msg, messagetype = False, "", "" return error, msg, messagetype - def display_data( - self, - trans, - dataset: DatasetHasHidProtocol, - preview: bool = False, - filename: Optional[str] = None, - to_ext: Optional[str] = None, - **kwd, - ): - headers = kwd.get("headers", {}) - # Prevent IE8 from sniffing content type since we're explicit about it. This prevents intentionally text/plain - # content from being rendered in the browser - headers["X-Content-Type-Options"] = "nosniff" - if to_ext: - # Download the directory structure as an archive - trans.log_event(f"Download directory for dataset id: {str(dataset.id)}") - return self._archive_composite_dataset(trans, dataset, headers, do_action=kwd.get("do_action", "zip")) - if preview: - root_folder = dataset.metadata.root_folder - index_file = dataset.metadata.index_file - if not filename or filename == "index": - if root_folder is not None and index_file: - # display the index file in lieu of the empty primary dataset - file_path = os.path.join(dataset.extra_files_path, root_folder, index_file) - self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] - return self._yield_user_file_content(trans, dataset, file_path, headers), headers - elif root_folder: - # delegate to the parent method, which knows how to display - # a directory - filename = root_folder - else: - # No meaningful display available, show an info message instead - self._clean_and_set_mime_type(trans, "text/plain", headers) # type: ignore[arg-type] - if root_folder is None: - return util.smart_str( - "Cannot generate preview with incomplete metadata. Resetting metadata may help." - ) - elif self.recognized_index_files: - return util.smart_str( - "None of the known key files for the datatype present. Is the datatype format set correctly?" - ) - else: - return util.smart_str("No preview available for this dataset."), headers - - return super().display_data( - trans, - dataset=dataset, - filename=filename, - **kwd, - ) - class GenericAsn1(Text): """Class for generic ASN.1 text format""" From 50aa181be7b4d559513f9a4582db94c82916d974 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:10:22 +0200 Subject: [PATCH 09/15] Drop root_folder metadata from Directory datatype This should probably go in sub-classes that expect specific directory structures. --- lib/galaxy/datatypes/data.py | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index c074bbdc4b1a..255f145144ce 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1217,31 +1217,9 @@ class Directory(Data): # The behavior of this class is intended to be similar to that of # composite types, but with arbitrary structure of extra_files. - # Directories converted from archives, and possibly others, have a single - # root folder inside the directory. The root_folder attribute lets tools - # access that folder without first exploring the directory tree themselves. - # Will be set to the empty string for flat directories. - MetadataElement( - name="root_folder", - default=None, - desc="Name of the root folder of the directory", - readonly=True, - optional=False, - visible=False, - ) - - def set_meta(self, dataset: DatasetProtocol, **kwd): - efp = dataset.extra_files_path - efp_items = os.listdir(efp) - root_folder_name = efp_items[0] - if len(efp_items) == 1 and os.path.isdir(os.path.join(efp, root_folder_name)): - dataset.metadata.root_folder = root_folder_name - else: - dataset.metadata.root_folder = "" - def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: if not dataset.dataset.purged: - dataset.peek = f"{dataset.metadata.root_folder}" + dataset.peek = f"{dataset.extra_files_path}" dataset.blurb = nice_size(dataset.dataset.total_size) else: dataset.peek = "file does not exist" From ff346b2a4fb3cd554dc716e21fd0d4dd573322bf Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:19:02 +0200 Subject: [PATCH 10/15] Do not set peek for directories --- lib/galaxy/datatypes/data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 255f145144ce..27ed78e3aa61 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1219,7 +1219,6 @@ class Directory(Data): def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: if not dataset.dataset.purged: - dataset.peek = f"{dataset.extra_files_path}" dataset.blurb = nice_size(dataset.dataset.total_size) else: dataset.peek = "file does not exist" From b1e4313f729b7a6bebcf1605b6d60dc1857057ca Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:42:07 +0200 Subject: [PATCH 11/15] Set output format to directory --- lib/galaxy/datatypes/converters/archive_to_directory.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml index d0c56c154c09..45d4a18f4562 100644 --- a/lib/galaxy/datatypes/converters/archive_to_directory.xml +++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml @@ -23,7 +23,7 @@ - + From 4de9896c45bf4d25fa0a11250641a2c8871a108c Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 11 Oct 2024 12:24:45 +0200 Subject: [PATCH 12/15] Copy metadata to galaxy.json in archive_to_directory converter --- lib/galaxy/datatypes/converters/archive_to_directory.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml index 45d4a18f4562..b346b4a49b62 100644 --- a/lib/galaxy/datatypes/converters/archive_to_directory.xml +++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml @@ -4,6 +4,7 @@ galaxy-util directory - - + + From bb3ae45725eb072fb1d8756fa8a7f04467e28a37 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 11 Oct 2024 12:59:30 +0200 Subject: [PATCH 13/15] Revert back to use provided_metadata_file --- lib/galaxy/datatypes/converters/archive_to_directory.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml index b346b4a49b62..d0c56c154c09 100644 --- a/lib/galaxy/datatypes/converters/archive_to_directory.xml +++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml @@ -4,7 +4,6 @@ galaxy-util directory - + From 6f0713bf27ec7d229ac27af8347139da8d8bbe1a Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:33:25 +0200 Subject: [PATCH 14/15] Remove set_peek method from Directory datatype --- lib/galaxy/datatypes/data.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 27ed78e3aa61..66a6d9caa18e 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -1214,16 +1214,6 @@ class Directory(Data): file_ext = "directory" - # The behavior of this class is intended to be similar to that of - # composite types, but with arbitrary structure of extra_files. - - def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: - if not dataset.dataset.purged: - dataset.blurb = nice_size(dataset.dataset.total_size) - else: - dataset.peek = "file does not exist" - dataset.blurb = "file purged from disk" - def _archive_main_file( self, archive: ZipstreamWrapper, display_name: str, data_filename: str ) -> Tuple[bool, str, str]: From 0666fcffdbe30e7555692c66193f0f256f62979b Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:10:18 +0200 Subject: [PATCH 15/15] Add roundtrip test for downloading directories Compressed (Upload) -> Directory (Unpack) -> Compressed (Download) --- lib/galaxy/datatypes/data.py | 1 + lib/galaxy_test/api/test_tools_upload.py | 40 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 66a6d9caa18e..e5fea2612ace 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -465,6 +465,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd): composite_extensions = trans.app.datatypes_registry.get_composite_extensions() composite_extensions.append("html") # for archiving composite datatypes composite_extensions.append("data_manager_json") # for downloading bundles if bundled. + composite_extensions.append("directory") # for downloading directories. if data.extension in composite_extensions: return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip")) diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py index ed32bc92caf0..57ffece984dc 100644 --- a/lib/galaxy_test/api/test_tools_upload.py +++ b/lib/galaxy_test/api/test_tools_upload.py @@ -2,12 +2,14 @@ import os import urllib.parse from base64 import b64encode +from typing import cast import pytest from tusclient import client from galaxy.tool_util.verify.test_data import TestDataResolver from galaxy.util import UNKNOWN +from galaxy.util.compression_utils import decompress_bytes_to_directory from galaxy.util.unittest_utils import ( skip_if_github_down, skip_if_site_down, @@ -639,6 +641,44 @@ def test_upload_composite_from_bad_tar(self, history_id): details = self.dataset_populator.get_history_dataset_details(history_id, dataset=dataset, assert_ok=False) assert details["state"] == "error" + def test_upload_zip_directory_roundtrip(self, history_id): + testdir = TestDataResolver().get_filename("testdir1.zip") + with open(testdir, "rb") as fh: + details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, ext="zip", assert_ok=True) + assert details["file_ext"] == "zip" + + # Convert/unpack zip to directory. + payload = { + "src": "hda", + "id": details["id"], + "source_type": "zip", + "target_type": "directory", + "history_id": history_id, + } + create_response = self._post("tools/CONVERTER_archive_to_directory/convert", data=payload) + self.dataset_populator.wait_for_job(create_response.json()["jobs"][0]["id"], assert_ok=True) + create_response.raise_for_status() + directory_dataset = create_response.json()["outputs"][0] + + # Download (compressed) directory and verify structure. + content = self.dataset_populator.get_history_dataset_content( + history_id, dataset=directory_dataset, to_ext="directory", type="bytes" + ) + dir_path = decompress_bytes_to_directory(cast(bytes, content)) + expected_directory_structure = { + "file1": "File", + "file2": "File", + "dir1": "Directory", + "dir1/file3": "File", + } + assert dir_path.endswith("testdir1") + for path, entry_class in expected_directory_structure.items(): + path = os.path.join(dir_path, path) + if entry_class == "Directory": + assert os.path.isdir(path) + else: + assert os.path.isfile(path) + def test_upload_dbkey(self): with self.dataset_populator.test_history() as history_id: payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")