Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend image metadata #18951

Open
wants to merge 25 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a65b72f
Add tests for `axes` metadata
kostrykin Oct 8, 2024
8c9ba38
Reduce boilerplate code in tests
kostrykin Oct 8, 2024
29dc831
Add `dtype` metadata and tests
kostrykin Oct 8, 2024
d350cca
Add `num_unique_values` metadata and tests
kostrykin Oct 8, 2024
7fa6796
Add `width` and `height` metadata and tests
kostrykin Oct 8, 2024
2414039
Add `channels` metadata and tests
kostrykin Oct 8, 2024
ac29d17
Add `depth` and `frames` metadata and tests
kostrykin Oct 8, 2024
4386580
Fix mypy check
kostrykin Oct 8, 2024
e030464
Fix support for TIFF files with unsupported compression formats
kostrykin Oct 8, 2024
de7b37b
Fix black linting
kostrykin Oct 8, 2024
f9251ee
Add support for TIFF files with multiple series
kostrykin Oct 8, 2024
119dc78
Fix black linting
kostrykin Oct 8, 2024
669e3db
Add type hint for mypy
kostrykin Oct 8, 2024
0f7f64c
Fix tests
kostrykin Oct 9, 2024
48b1808
Rename `series` -> `page`
kostrykin Oct 9, 2024
6117398
Add test for empty TIFF file (no metadata available)
kostrykin Oct 9, 2024
685f653
Add test for corrupted TIFF file and fix metadata extraction for that…
kostrykin Oct 9, 2024
39a726b
Fix linting
kostrykin Oct 9, 2024
4510f27
Fix linting
kostrykin Oct 9, 2024
b199061
Fix linting
kostrykin Oct 9, 2024
f2c874f
Fix linting
kostrykin Oct 9, 2024
1e6701f
Reduce utilization of full image data
kostrykin Oct 9, 2024
6459225
`make format`
kostrykin Oct 9, 2024
f3e20d9
Fix for corrupted TIF images
kostrykin Oct 10, 2024
8ea2ef3
Merge remote-tracking branch 'upstream/dev' into image-metadata/dev
kostrykin Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 152 additions & 1 deletion lib/galaxy/datatypes/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,23 @@
import base64
import json
import logging
from typing import Optional
from typing import (
Any,
Dict,
List,
Optional,
)

import mrcfile
import numpy as np
import tifffile

try:
import PIL
import PIL.Image
except ImportError:
PIL = None # type: ignore[assignment, unused-ignore]

from galaxy.datatypes.binary import Binary
from galaxy.datatypes.metadata import (
FileParameter,
Expand Down Expand Up @@ -50,6 +61,70 @@ class Image(data.Data):
edam_format = "format_3547"
file_ext = ""

MetadataElement(
name="axes",
desc="Axes of the image data",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="dtype",
desc="Data type of the image pixels or voxels",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="num_unique_values",
desc="Number of unique values in the image data (e.g., should be 2 for binary images)",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="width",
desc="Width of the image (in pixels)",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="height",
desc="Height of the image (in pixels)",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="channels",
desc="Number of channels of the image",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="depth",
desc="Depth of the image (number of slices)",
readonly=True,
visible=True,
optional=True,
)

MetadataElement(
name="frames",
desc="Number of frames in the image sequence (number of time steps)",
readonly=True,
visible=True,
optional=True,
)

def __init__(self, **kwd):
super().__init__(**kwd)
self.image_formats = [self.file_ext.upper()]
Expand All @@ -73,6 +148,33 @@ def handle_dataset_as_image(self, hda: DatasetProtocol) -> str:
base64_image_data = base64.b64encode(f.read()).decode("utf-8")
return f"![{name}](data:image/{self.file_ext};base64,{base64_image_data})"

def set_meta(
self, dataset: DatasetProtocol, overwrite: bool = True, metadata_tmp_files_dir: Optional[str] = None, **kwd
) -> None:
"""
Try to populate the metadata of the image using a generic image loading library (pillow), if available.

If an image has two axes, they are assumed to be ``YX``. If an image has three axes, they are assumed to be ``YXC``.
"""
if PIL is not None:
try:
with PIL.Image.open(dataset.get_file_name()) as im:
im_arr = np.array(im)
dataset.metadata.dtype = str(im_arr.dtype)
dataset.metadata.num_unique_values = str(len(np.unique(im)))
dataset.metadata.width = str(im_arr.shape[1])
dataset.metadata.height = str(im_arr.shape[0])
dataset.metadata.depth = "0"
dataset.metadata.frames = "0"
if im_arr.ndim == 2:
dataset.metadata.axes = "YX"
dataset.metadata.channels = "0"
elif im_arr.ndim == 3:
dataset.metadata.axes = "YXC"
dataset.metadata.channels = str(im_arr.shape[2])
except PIL.UnidentifiedImageError:
pass


class Jpg(Image):
edam_format = "format_3579"
Expand Down Expand Up @@ -104,6 +206,9 @@ class Tiff(Image):
def set_meta(
self, dataset: DatasetProtocol, overwrite: bool = True, metadata_tmp_files_dir: Optional[str] = None, **kwd
) -> None:
"""
Populate the metadata of the TIFF image using the tifffile library.
"""
spec_key = "offsets"
offsets_file = dataset.metadata.offsets
if not offsets_file:
Expand All @@ -112,10 +217,56 @@ def set_meta(
)
with tifffile.TiffFile(dataset.get_file_name()) as tif:
offsets = [page.offset for page in tif.pages]

# Aggregate a list of values for each metadata field (one value for each series of the TIFF file)
metadata: Dict[str, List[Any]] = {
key: []
for key in [
"axes",
"dtype",
"width",
"height",
"channels",
"depth",
"frames",
"num_unique_values",
]
}
for series in tif.series:

# Determine the metadata values that should be generally available
metadata["axes"].append(series.axes.upper())
metadata["dtype"].append(series.dtype)

# Determine the metadata values that require reading the image data
try:
im_arr = series.asarray()
except ValueError: # Occurs if the compression of the TIFF file is unsupported
im_arr = None
if im_arr is not None:
axes = metadata["axes"][-1].replace("S", "C")
metadata["width"].append(Tiff._get_axis_size(im_arr, axes, "X"))
metadata["height"].append(Tiff._get_axis_size(im_arr, axes, "Y"))
metadata["channels"].append(Tiff._get_axis_size(im_arr, axes, "C"))
metadata["depth"].append(Tiff._get_axis_size(im_arr, axes, "Z"))
metadata["frames"].append(Tiff._get_axis_size(im_arr, axes, "T"))
metadata["num_unique_values"].append(len(np.unique(im_arr)))

# Populate the metadata fields based on the values determined above
for key, values in metadata.items():
if len(values) > 0:
setattr(dataset.metadata, key, ",".join(str(value) for value in values))

# Populate the "offsets" file and metadata field
with open(offsets_file.get_file_name(), "w") as f:
json.dump(offsets, f)
dataset.metadata.offsets = offsets_file

@staticmethod
def _get_axis_size(im_arr: "np.typing.NDArray", axes: str, axis: str) -> int:
idx = axes.find(axis)
return im_arr.shape[idx] if idx >= 0 else 0


class OMETiff(Tiff):
file_ext = "ome.tiff"
Expand Down
Binary file added test-data/im9_multiseries.tif
Binary file not shown.
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@
<tool file="top_level_data.xml" />
<tool file="validation_hdf5.xml" />
<tool file="validation_image.xml"/>
<tool file="validation_image_metadata.xml"/>
<tool file="validation_zip.xml" />
<tool file="validation_tar.xml" />
<tool file="validation_tar_gz.xml" />
Expand Down
154 changes: 154 additions & 0 deletions test/functional/tools/validation_image_metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
<tool id="validation_image_metadata" name="validation_image_metadata" version="1.0">
<macros>
<!-- Tests are not executed if they have no output checks, thus define a dummy check -->
<xml name="test-output">
<output name="output">
<assert_contents>
<has_size value="0" delta="0" />
</assert_contents>
</output>
</xml>
</macros>
<command><![CDATA[
cat /dev/null > testfile
]]></command>
<inputs>
<!-- Checks for "axes" metadata -->
<param name="input_axes_yx" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="axes" value="YX" />
bgruening marked this conversation as resolved.
Show resolved Hide resolved
</param>
<param name="input_axes_yxc" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="axes" value="YXC" />
</param>
<param name="input_axes_zcyx" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="axes" value="ZCYX" />
</param>
<!-- Checks for "dtype" metadata -->
<param name="input_dtype_uint8" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="dtype" value="uint8" />
</param>
<param name="input_dtype_uint16" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="dtype" value="uint16" />
</param>
<param name="input_dtype_float64" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="dtype" value="float64" />
</param>
<!-- Checks for "num_unique_values" metadata -->
<param name="input_num_unique_values_1" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="num_unique_values" value="1" />
</param>
<param name="input_num_unique_values_2" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="num_unique_values" value="2" />
</param>
<param name="input_num_unique_values_618" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="num_unique_values" value="618" />
</param>
<!-- Checks for "width" metadata -->
<param name="input_width_16" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="width" value="16" />
</param>
<param name="input_width_32" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="width" value="32" />
</param>
<!-- Checks for "height" metadata -->
<param name="input_height_8" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="height" value="8" />
</param>
<param name="input_height_32" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="height" value="32" />
</param>
<!-- Checks for "channels" metadata -->
<param name="input_channels_0" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="channels" value="0" />
</param>
<param name="input_channels_2" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="channels" value="2" />
</param>
<param name="input_channels_3" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="channels" value="3" />
</param>
<!-- Checks for "depth" metadata -->
<param name="input_depth_0" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="depth" value="0" />
</param>
<param name="input_depth_25" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="depth" value="25" />
</param>
<!-- Checks for "frames" metadata -->
<param name="input_frames_0" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="frames" value="0" />
</param>
<param name="input_frames_5" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="frames" value="5" />
</param>
<!-- Checks for TIFF files with multiple series -->
<param name="input_multi_series_tiff" type="data" format="data" optional="true">
<validator type="dataset_metadata_equal" metadata_name="axes" value="YXS,YX" />
<validator type="dataset_metadata_equal" metadata_name="dtype" value="uint8,uint16" />
<validator type="dataset_metadata_equal" metadata_name="num_unique_values" value="2,255" />
<validator type="dataset_metadata_equal" metadata_name="width" value="32,256" />
<validator type="dataset_metadata_equal" metadata_name="height" value="32,256" />
<validator type="dataset_metadata_equal" metadata_name="channels" value="3,0" />
<validator type="dataset_metadata_equal" metadata_name="depth" value="0,0" />
<validator type="dataset_metadata_equal" metadata_name="frames" value="0,0" />
</param>
<!-- Checks for unsupported file formats -->
<param name="input_unsupported_tiff_compression" type="data" format="data" optional="true">
<!-- If the compression of a TIFF is unsupported, the fields "axes" and "dtype" should still be there -->
<validator type="dataset_metadata_equal" metadata_name="axes" value="YX" />
<validator type="dataset_metadata_equal" metadata_name="dtype" value="bool" />
<!-- The other fields should be missing -->
<validator type="dataset_metadata_equal" metadata_name="num_unique_values" value="" />
<validator type="dataset_metadata_equal" metadata_name="width" value="" />
<validator type="dataset_metadata_equal" metadata_name="height" value="" />
<validator type="dataset_metadata_equal" metadata_name="channels" value="" />
<validator type="dataset_metadata_equal" metadata_name="depth" value="" />
<validator type="dataset_metadata_equal" metadata_name="frames" value="" />
</param>
</inputs>
<outputs>
<data name="output" format="data" />
</outputs>
<tests>
<!-- Tests with TIFF files -->
<test>
<param name="input_axes_yx" value="im1_uint8.tif" />
<param name="input_axes_zcyx" value="im6_uint8.tif" />
<param name="input_dtype_uint8" value="im6_uint8.tif" />
<param name="input_dtype_uint16" value="im8_uint16.tif" />
<param name="input_dtype_float64" value="im4_float.tif" />
<param name="input_num_unique_values_2" value="im3_b.tif" />
<param name="input_num_unique_values_618" value="im4_float.tif" />
<param name="input_width_16" value="im7_uint8.tif" /><!-- axes: ZYX -->
<param name="input_width_32" value="im3_b.tif" /><!-- axes: YXS -->
<param name="input_height_8" value="im7_uint8.tif" /><!-- axes: ZYX -->
<param name="input_height_32" value="im3_b.tif" /><!-- axes: YXS -->
<param name="input_channels_0" value="im1_uint8.tif" />
<param name="input_channels_2" value="im5_uint8.tif" /><!-- axes: CYX -->
<param name="input_channels_3" value="im3_b.tif" /><!-- axes: YXS -->
<param name="input_depth_0" value="im1_uint8.tif" /><!-- axes: YXS -->
<param name="input_depth_25" value="im7_uint8.tif" /><!-- axes: ZYX -->
<param name="input_frames_0" value="im1_uint8.tif" /><!-- axes: YXS -->
<param name="input_frames_5" value="im8_uint16.tif" /><!-- axes: TYX -->
<param name="input_multi_series_tiff" value="im9_multiseries.tif" />
<param name="input_unsupported_tiff_compression" value="1.tiff" />
<expand macro="test-output" />
</test>
<!-- Tests with PNG files -->
<test>
<param name="input_axes_yx" value="im1_uint8.png" />
<param name="input_axes_yxc" value="im3_a.png" />
<param name="input_dtype_uint8" value="im1_uint8.png" />
<param name="input_num_unique_values_1" value="im2_a.png" />
<param name="input_num_unique_values_2" value="im2_b.png" />
<param name="input_width_32" value="im2_b.png" />
<param name="input_height_32" value="im2_b.png" />
<param name="input_channels_0" value="im1_uint8.png" />
<param name="input_channels_3" value="im3_a.png" />
<param name="input_depth_0" value="im1_uint8.png" />
<param name="input_frames_0" value="im1_uint8.png" />
<expand macro="test-output" />
</test>
<!-- End of tests -->
</tests>
</tool>
Loading