diff --git a/CHANGELOG.md b/CHANGELOG.md index 0563afae..15e8b658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Fallback to EOT when overflowing BOT. + ### Fixed +- Missing empty BOT when writing EOT. - Fixed accessing settings for PillowEncoder. ## [0.14.0] - 2023-11-29 diff --git a/tests/test_wsidicom_file_target.py b/tests/test_wsidicom_file_target.py index 99bd5678..abfbf4f7 100644 --- a/tests/test_wsidicom_file_target.py +++ b/tests/test_wsidicom_file_target.py @@ -19,6 +19,7 @@ from pydicom.uid import generate_uid from tests.conftest import WsiTestDefinitions from wsidicom import WsiDicom +from wsidicom.file.wsidicom_file_base import OffsetTableType from wsidicom.file.wsidicom_file_target import WsiDicomFileTarget from wsidicom.series.levels import Levels @@ -34,7 +35,9 @@ def test_save_levels( expected_levels_count = len(wsi.levels) # Act - with WsiDicomFileTarget(tmp_path, generate_uid, 1, 16, "bot") as target: + with WsiDicomFileTarget( + tmp_path, generate_uid, 1, 16, OffsetTableType.BASIC, False + ) as target: target.save_levels(wsi.levels) # Assert @@ -57,7 +60,9 @@ def test_save_levels_add_missing( levels_missing_smallest_levels = Levels(levels_larger_than_tile_size) # Act - with WsiDicomFileTarget(tmp_path, generate_uid, 1, 16, "bot", True) as target: + with WsiDicomFileTarget( + tmp_path, generate_uid, 1, 16, OffsetTableType.BASIC, True + ) as target: target.save_levels(levels_missing_smallest_levels) # Assert diff --git a/tests/test_wsidicom_file_writer.py b/tests/test_wsidicom_file_writer.py index 7a9f17c2..979fb613 100644 --- a/tests/test_wsidicom_file_writer.py +++ b/tests/test_wsidicom_file_writer.py @@ -503,7 +503,7 @@ def test_write_pixel_end( # Act with WsiDicomFileWriter.open(filepath, JPEGBaseline8Bit) as write_file: - write_file._write_pixel_data_end() + write_file._write_pixel_data_end_tag() # Assert with WsiDicomTestFile( @@ -659,7 +659,8 @@ def test_create_child( generate_uid, 1, 100, - "bot", + OffsetTableType.BASIC, + False, ) as target: target._save_and_open_level(source_level, wsi.pixel_spacing, 2) diff --git a/wsidicom/errors.py b/wsidicom/errors.py index 63a37908..817d1b4e 100644 --- a/wsidicom/errors.py +++ b/wsidicom/errors.py @@ -96,3 +96,7 @@ class WsiDicomNoResolutionError(Exception): class WsiDicomNotSupportedError(Exception): """Raised if opened instance is not supported.""" + + +class WsiDicomBotOverflow(Exception): + """Raised if image data is to large to fit into basic table offset.""" diff --git a/wsidicom/file/wsidicom_file.py b/wsidicom/file/wsidicom_file.py index d114b628..35edd0d4 100644 --- a/wsidicom/file/wsidicom_file.py +++ b/wsidicom/file/wsidicom_file.py @@ -493,7 +493,10 @@ def _parse_pixel_data(self) -> Tuple[List[Tuple[int, int]], OffsetTableType]: ) if table_type == OffsetTableType.BASIC: table = self._read_bot() - elif table_type == OffsetTableType.EMPTY: + elif ( + table_type == OffsetTableType.EMPTY + or table_type == OffsetTableType.EXTENDED + ): self._read_bot_length() if table_type == OffsetTableType.NONE: diff --git a/wsidicom/file/wsidicom_file_base.py b/wsidicom/file/wsidicom_file_base.py index 15caed1b..c8f97003 100644 --- a/wsidicom/file/wsidicom_file_base.py +++ b/wsidicom/file/wsidicom_file_base.py @@ -29,9 +29,9 @@ class OffsetTableType(Enum): EXTENDED = "EOT" @classmethod - def from_string(cls, offset_table: Optional[str]) -> "OffsetTableType": + def from_string(cls, offset_table: str) -> "OffsetTableType": """Return OffsetTableType parsed from string.""" - if offset_table is None: + if offset_table == "none": return OffsetTableType.NONE if offset_table.strip().lower() == "empty": return OffsetTableType.EMPTY @@ -60,7 +60,8 @@ def __init__( owned: bool = False If the stream should be closed by this instance. """ - self._file = DicomFileLike(stream) + self._stream = stream + self._file = DicomFileLike(self._stream) self._filepath = filepath self._owned = owned self.__enter__() diff --git a/wsidicom/file/wsidicom_file_target.py b/wsidicom/file/wsidicom_file_target.py index 661b0f40..a8571936 100644 --- a/wsidicom/file/wsidicom_file_target.py +++ b/wsidicom/file/wsidicom_file_target.py @@ -40,8 +40,8 @@ def __init__( uid_generator: Callable[..., UID], workers: int, chunk_size: int, - offset_table: Optional[str], - add_missing_levels: bool = False, + offset_table: OffsetTableType, + add_missing_levels: bool, ): """ Create a WsiDicomFileTarget. @@ -57,14 +57,13 @@ def __init__( chunk_size: int Chunk size (number of tiles) to process at a time. Actual chunk size also depends on minimun_chunk_size from image_data. - offset_table: Optional[str] - Offset table to use, 'bot' basic offset table, 'eot' extended - offset table, None - no offset table. - add_missing_levels: bool = False + offset_table: OffsetTableType + Offset table to use. + add_missing_levels: bool If to add missing dyadic levels up to the single tile level. """ self._output_path = output_path - self._offset_table = OffsetTableType.from_string(offset_table) + self._offset_table = offset_table self._filepaths: List[Path] = [] self._opened_files: List[WsiDicomFile] = [] super().__init__(uid_generator, workers, chunk_size, add_missing_levels) @@ -135,7 +134,7 @@ def _save_group(self, group: Group, scale: int = 1) -> List[Path]: filepaths: List[Path] = [] for instances in self._group_instances_to_file(group): uid = self._uid_generator() - filepath = Path(self._output_path).joinpath(uid + ".dcm") + filepath = self._output_path.joinpath(uid + ".dcm") transfer_syntax = instances[0].image_data.transfer_syntax image_data_list = self._list_image_data(instances) focal_planes, optical_paths, tiled_size = self._get_frame_information( diff --git a/wsidicom/file/wsidicom_file_writer.py b/wsidicom/file/wsidicom_file_writer.py index a3c15787..44faeff8 100644 --- a/wsidicom/file/wsidicom_file_writer.py +++ b/wsidicom/file/wsidicom_file_writer.py @@ -13,7 +13,9 @@ # limitations under the License. +import os from datetime import datetime +from io import BytesIO from pathlib import Path from struct import pack from typing import ( @@ -25,14 +27,17 @@ Optional, Sequence, Tuple, + Union, ) from pydicom.dataset import Dataset, FileMetaDataset, validate_file_meta from pydicom.encaps import itemize_frame +from pydicom.filebase import DicomFileLike from pydicom.filewriter import write_dataset, write_file_meta_info from pydicom.tag import ItemTag, SequenceDelimiterTag, Tag from pydicom.uid import UID, UncompressedTransferSyntaxes +from wsidicom.errors import WsiDicomBotOverflow from wsidicom.file.wsidicom_file_base import OffsetTableType, WsiDicomFileBase from wsidicom.geometry import Point, Region, Size from wsidicom.instance import ImageData @@ -88,10 +93,7 @@ def open(cls, file: Path, transfer_syntax: UID) -> "WsiDicomFileWriter": WsiDicomFileWriter WsiDicomFileWriter for file. """ - stream = open( - file, - "w+b", - ) + stream = open(file, "w+b") return cls( stream, transfer_syntax.is_little_endian, @@ -156,6 +158,65 @@ def write( data, dataset.NumberOfFrames, workers, chunk_size, offset_table, scale ) + def copy_with_table( + self, + copy_from: Union[BytesIO, BinaryIO], + offset_table: OffsetTableType, + dataset_end: int, + pixels_end: int, + frame_positions: List[int], + ) -> List[int]: + """Copy dataset and pixel data from other file to this. + + Parameters + ---------- + copy_from: DicomFileLike + File to copy from. + offset_table: OffsetTableType + Offset table to use in new file. + dataset_end: int + Position of EOT or PixelData tag in copy_from. + pixels_end: int + End of PixelData in copy_from. + frame_positions: List[int] + List of frame positions in copy_from, relative to start of file. + + Returns + ---------- + List[int] + List of frame position relative to start of new file. + """ + # Copy dataset until EOT or PixelData tag + copy_from.seek(0) + self._file.write(copy_from.read(dataset_end)) + # Write new pixel data start + ( + new_dataset_end, + new_table_start, + new_pixels_start, + ) = self._write_encapsulated_pixel_data_start( + offset_table, len(frame_positions) + ) + # Copy pixel data + first_frame_position = frame_positions[0] + copy_from.seek(first_frame_position) + self._file.write(copy_from.read(pixels_end - first_frame_position)) + + # Adjust frame positions + frame_position_change = new_pixels_start - first_frame_position + new_frame_positions = [ + position + frame_position_change for position in frame_positions + ] + + # Write pixel data end and EOT or BOT if used. + return self._write_encapsulated_pixel_data_end( + offset_table, + new_table_start, + new_pixels_start, + new_dataset_end, + new_frame_positions, + ) + def _write_encapsulated_pixel_data( self, data: Dict[Tuple[str, float], ImageData], @@ -187,23 +248,51 @@ def _write_encapsulated_pixel_data( List[int] List of frame position relative to start of file. """ - table_start, pixels_start = self._write_pixel_data_start( - number_of_frames, offset_table - ) - frame_positions: List[int] = [] - for (path, z), image_data in sorted(data.items()): - frame_positions += self._write_pixel_data( + ( + dataset_end, + table_start, + pixels_start, + ) = self._write_encapsulated_pixel_data_start(offset_table, number_of_frames) + frame_positions = [ + position + for (path, z), image_data in sorted(data.items()) + for position in self._write_pixel_data( image_data, True, z, path, workers, chunk_size, scale ) - pixels_end = self._file.tell() - self._write_pixel_data_end() + ] + return self._write_encapsulated_pixel_data_end( + offset_table, table_start, pixels_start, dataset_end, frame_positions + ) + + def _write_encapsulated_pixel_data_end( + self, + offset_table: OffsetTableType, + table_start: Optional[int], + pixels_start: int, + dataset_end: int, + frame_positions: List[int], + ): + last_frame_end = self._write_pixel_data_end_tag() if offset_table is not OffsetTableType.EMPTY: if table_start is None: raise ValueError("Table start should not be None") elif offset_table == OffsetTableType.EXTENDED: - self._write_eot(table_start, pixels_start, frame_positions, pixels_end) + self._write_eot( + table_start, pixels_start, frame_positions, last_frame_end + ) elif offset_table == OffsetTableType.BASIC: - self._write_bot(table_start, pixels_start, frame_positions) + try: + self._write_bot(table_start, pixels_start, frame_positions) + except WsiDicomBotOverflow as exception: + if self._owned: + frame_positions = self._rewrite_as_table( + OffsetTableType.EXTENDED, + dataset_end, + last_frame_end, + frame_positions, + ) + else: + raise exception return frame_positions def _write_unencapsulated_pixel_data( @@ -345,24 +434,27 @@ def _reserve_bot(self, number_of_frames: int) -> int: self._file.write_leUL(0) return table_start - def _write_pixel_data_start( - self, number_of_frames: int, offset_table: OffsetTableType - ) -> Tuple[Optional[int], int]: + def _write_encapsulated_pixel_data_start( + self, + offset_table: OffsetTableType, + number_of_frames: int, + ) -> Tuple[int, Optional[int], int]: """Write tags starting pixel data and reserves space for BOT or EOT. Parameters ---------- + offset_table: OffsetTableType + Offset table to use. number_of_frames: int Number of frames to reserve space for in BOT or EOT. - offset_table: Optional[str] = 'bot' - Offset table to use, 'bot' basic offset table, 'eot' extended - offset table, None - no offset table. Returns ---------- Tuple[Optional[int], int] - Start of table (BOT or EOT) and start of pixel data (after BOT). + End of dataset (EOT or PixelData tag), start of table (BOT or EOT) and + start of pixel data (after BOT). """ + dataset_end = self._file.tell() table_start: Optional[int] = None if offset_table == OffsetTableType.EXTENDED: table_start = self._reserve_eot(number_of_frames) @@ -372,13 +464,16 @@ def _write_pixel_data_start( if offset_table == OffsetTableType.BASIC: table_start = self._reserve_bot(number_of_frames) - elif offset_table == OffsetTableType.EMPTY: + elif ( + offset_table == OffsetTableType.EMPTY + or offset_table == OffsetTableType.EXTENDED + ): self._file.write_tag(ItemTag) self._file.write_leUL(0) pixel_data_start = self._file.tell() - return table_start, pixel_data_start + return dataset_end, table_start, pixel_data_start def _write_bot( self, bot_start: int, pixel_data_start: int, frame_positions: Sequence[int] @@ -399,7 +494,7 @@ def _write_bot( # Check that last BOT entry is not over 2^32 - 1 last_entry = frame_positions[-1] - pixel_data_start if last_entry > 2**32 - 1: - raise NotImplementedError( + raise WsiDicomBotOverflow( "Image data exceeds 2^32 - 1 bytes " "An extended offset table should be used" ) @@ -596,7 +691,55 @@ def _chunk_tile_points( ) return chunked_tile_points - def _write_pixel_data_end(self) -> None: + def _write_pixel_data_end_tag(self) -> int: """Writes tags ending pixel data.""" + last_frame_end = self._file.tell() self._file.write_tag(SequenceDelimiterTag) self._file.write_leUL(0) + return last_frame_end + + def _rewrite_as_table( + self, + offset_table: OffsetTableType, + dataset_end: int, + pixels_end: int, + frame_positions: List[int], + ) -> List[int]: + """Rewrite file as encapsulated with EOT. Closes current file and replaces + it with the new a new file. + + Parameters + ---------- + offset_table: OffsetTableType + Offset table to use in new file. + dataset_end: int + Position of EOT or PixelData tag in current file. + pixels_end: int + End of PixelData in current file. + frame_positions: List[int] + List of frame positions in current file, relative to start of file. + + Returns + ---------- + List[int] + List of frame position relative to start of new file. + """ + if self._filepath is not None: + temp_file_path = self._filepath.with_suffix(".tmp") + buffer = open(temp_file_path, "w+b") + else: + temp_file_path = None + buffer = BytesIO() + with WsiDicomFileWriter(buffer, True, False, None, True) as writer: + frame_positions = writer.copy_with_table( + self._stream, + offset_table, + dataset_end, + pixels_end, + frame_positions, + ) + self._file.close() + if temp_file_path is not None and self._filepath is not None: + os.replace(temp_file_path, self._filepath) + self._file = DicomFileLike(buffer) + return frame_positions diff --git a/wsidicom/wsidicom.py b/wsidicom/wsidicom.py index 70a0f014..820e163f 100644 --- a/wsidicom/wsidicom.py +++ b/wsidicom/wsidicom.py @@ -36,6 +36,7 @@ WsiDicomOutOfBoundsError, ) from wsidicom.file import WsiDicomFileSource, WsiDicomFileTarget +from wsidicom.file.wsidicom_file_base import OffsetTableType from wsidicom.geometry import Point, PointMm, Region, RegionMm, Size, SizeMm from wsidicom.graphical_annotations import AnnotationInstance from wsidicom.instance import WsiDataset, WsiInstance @@ -589,29 +590,32 @@ def save( uid_generator: Callable[..., UID] = generate_uid, workers: Optional[int] = None, chunk_size: Optional[int] = None, - offset_table: Optional[str] = "bot", + offset_table: Union["str", OffsetTableType] = OffsetTableType.BASIC, add_missing_levels: bool = False, ) -> List[Path]: """ Save wsi as DICOM-files in path. Instances for the same pyramid level will be combined when possible to one file (e.g. not split for optical paths or focal planes). If instances are sparse tiled they - will be converted to full tiled by inserting blank tiles. The PixelData - will contain a basic offset table. All instance uids will be changed. + will be converted to full tiled by inserting blank tiles. All instance uids will + be changed. Parameters ---------- output_path: Union[str, Path] + Output folder to write files to. Should preferably be an dedicated folder + for the wsi. uid_generator: Callable[..., UID] = pydicom.uid.generate_uid - Function that can generate unique identifiers. + Function that can generate unique identifiers. workers: Optional[int] = None Maximum number of thread workers to use. chunk_size: Optional[int] = None Chunk size (number of tiles) to process at a time. Actual chunk size also depends on minimun_chunk_size from image_data. - offset_table: Optional[str] = 'bot' + offset_table: Union['str', OffsetTableType] = OffsetTableType.BASIC, Offset table to use, 'bot' basic offset table, 'eot' extended - offset table, None - no offset table. + offset table, 'empty' - no offset table. Only use 'none' for + non-encapsulated transfer syntaxes. add_missing_levels: bool = False If to add missing dyadic levels up to the single tile level. @@ -630,6 +634,9 @@ def save( chunk_size = 16 if isinstance(output_path, str): output_path = Path(output_path) + os.makedirs(output_path, exist_ok=True) + if not isinstance(offset_table, OffsetTableType): + offset_table = OffsetTableType.from_string(offset_table) with WsiDicomFileTarget( output_path, uid_generator,