From 2af9797af1330a2c30f6e1205ffa0d35aeb75ae0 Mon Sep 17 00:00:00 2001 From: Michal Charemza Date: Mon, 27 May 2024 07:05:27 +0100 Subject: [PATCH 1/4] feat: type annotations now allow strict type checking This changes the internal tuple type to make the uncompressed size and crc_32 non optional ints which makes it possible to pass type checking without any ifs or casts. This is still slightly odd maybe because it means we hard code them to 0 when they're not used. A "better" structure might be to have some sort of more dynamic structure to avoid this - maybe multiple tuples or a more complex class. However, suspect this is still a small step forward since it allows strict type checking. --- .github/workflows/test.yml | 2 +- stream_zip.py | 71 +++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6dcb711..15d8a4b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -50,7 +50,7 @@ jobs: pip install ".[ci]" - name: "Run type checking" run: | - mypy stream_zip.py + mypy stream_zip.py --strict - name: "Run tests" run: | pytest --cov diff --git a/stream_zip.py b/stream_zip.py index 4acf1aa..099a73a 100644 --- a/stream_zip.py +++ b/stream_zip.py @@ -35,8 +35,8 @@ object, # Sentinel of the methods above object, # Sentinel of auto upgrade central directory or not _CompressObjGetter, # Function to get the zlib Compress object for - Optional[int], # The uncompressed size of the file if known - Optional[int], # The CRC32 of the file if known + int, # The uncompressed size of the file for NO_COMPRESSION_STREAMED_* types + int, # The CRC32 of the file for NO_COMPRESSION_STREAMED_* types ] # A "Method" is an instance of a class that has a _get function that returns a _MethodTuple @@ -46,15 +46,15 @@ def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _Met class _ZIP_64_TYPE(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: - return _ZIP_64, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, None, None + return _ZIP_64, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, 0, 0 class _ZIP_32_TYPE(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: - return _ZIP_32, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, None, None + return _ZIP_32, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, 0, 0 class _NO_COMPRESSION_32_TYPE(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: - return _NO_COMPRESSION_BUFFERED_32, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, None, None + return _NO_COMPRESSION_BUFFERED_32, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, 0, 0 def __call__(self, uncompressed_size: int, crc_32: int, *args: Any, **kwarg: Any) -> Method: class _NO_COMPRESSION_32_TYPE_STREAMED_TYPE(Method): @@ -65,7 +65,7 @@ def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _Met class _NO_COMPRESSION_64_TYPE(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: - return _NO_COMPRESSION_BUFFERED_64, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, None, None + return _NO_COMPRESSION_BUFFERED_64, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, 0, 0 def __call__(self, uncompressed_size: int, crc_32: int) -> Method: class _NO_COMPRESSION_64_TYPE_STREAMED_TYPE(Method): @@ -91,7 +91,7 @@ def __call__(self, uncompressed_size: int, level: int=9) -> Method: class _ZIP_AUTO_TYPE_INNER(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: method = _ZIP_64 if uncompressed_size > 4293656841 or offset > 0xffffffff else _ZIP_32 - return (method, _AUTO_UPGRADE_CENTRAL_DIRECTORY, lambda: zlib.compressobj(level=level, memLevel=8, wbits=-zlib.MAX_WBITS), None, None) + return (method, _AUTO_UPGRADE_CENTRAL_DIRECTORY, lambda: zlib.compressobj(level=level, memLevel=8, wbits=-zlib.MAX_WBITS), 0, 0) return _ZIP_AUTO_TYPE_INNER() @@ -246,7 +246,12 @@ def _encrypt_aes(chunks: Generator[bytes, None, Any]) -> Generator[bytes, None, return get_return_value() return _encrypt_aes - def _zip_64_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks) -> Generator[bytes, None, Any]: + def _zip_64_local_header_and_data( + compression: int, aes_size_increase: int, aes_flags: int, name_encoded: bytes, mod_at_ms_dos: bytes, + mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, + crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], + chunks: Iterable[bytes], + ) -> Generator[bytes, None, Any]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) @@ -313,7 +318,12 @@ def _zip_64_local_header_and_data(compression, aes_size_increase, aes_flags, nam 0xffffffff, # Offset of local header - since zip64 ), name_encoded, extra - def _zip_32_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks) -> Generator[bytes, None, Any]: + def _zip_32_local_header_and_data( + compression: int, aes_size_increase: int, aes_flags: int, name_encoded: bytes, mod_at_ms_dos: bytes, + mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, + crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], + chunks: Iterable[bytes], + ) -> Generator[bytes, None, Any]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) @@ -368,7 +378,8 @@ def _zip_32_local_header_and_data(compression, aes_size_increase, aes_flags, nam file_offset, ), name_encoded, extra - def _zip_data(chunks, _get_compress_obj, max_uncompressed_size, max_compressed_size) -> Generator[bytes, None, Any]: + def _zip_data(chunks: Iterable[bytes], _get_compress_obj: _CompressObjGetter, + max_uncompressed_size: int, max_compressed_size: int) -> Generator[bytes, None, Any]: uncompressed_size = 0 compressed_size = 0 crc_32 = zlib.crc32(b'') @@ -395,7 +406,12 @@ def _zip_data(chunks, _get_compress_obj, max_uncompressed_size, max_compressed_s return uncompressed_size, compressed_size, crc_32 - def _no_compression_64_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks) -> Generator[bytes, None, Any]: + def _no_compression_64_local_header_and_data( + compression: int, aes_size_increase: int, aes_flags: int, name_encoded: bytes, mod_at_ms_dos: bytes, + mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, + crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], + chunks: Iterable[bytes], + ) -> Generator[bytes, None, Any]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) @@ -427,7 +443,7 @@ def _no_compression_64_local_header_and_data(compression, aes_size_increase, aes yield from _(name_encoded) yield from _(extra) - yield from encryption_func(chunks) + yield from encryption_func((chunk for chunk in chunks)) extra = zip_64_central_directory_extra_struct.pack( zip_64_extra_signature, @@ -457,7 +473,12 @@ def _no_compression_64_local_header_and_data(compression, aes_size_increase, aes ), name_encoded, extra - def _no_compression_32_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks) -> Generator[bytes, None, Any]: + def _no_compression_32_local_header_and_data( + compression: int, aes_size_increase: int, aes_flags: int, name_encoded: bytes, mod_at_ms_dos: bytes, + mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, + crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], + chunks: Iterable[bytes], + ) -> Generator[bytes, None, Any]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) @@ -484,7 +505,7 @@ def _no_compression_32_local_header_and_data(compression, aes_size_increase, aes yield from _(name_encoded) yield from _(extra) - yield from encryption_func(chunks) + yield from encryption_func((chunk for chunk in chunks)) return central_directory_header_struct.pack( 20, # Version made by @@ -513,7 +534,7 @@ def _no_compression_buffered_data_size_crc_32(chunks: Iterable[bytes], maximum_s size = 0 crc_32 = zlib.crc32(b'') - def _chunks() -> Iterable[bytes]: + def _chunks() -> Generator[bytes, None, Any]: nonlocal size, crc_32 for chunk in chunks: size += len(chunk) @@ -521,11 +542,16 @@ def _chunks() -> Iterable[bytes]: crc_32 = zlib.crc32(chunk, crc_32) yield chunk - chunks = tuple(_chunks()) + __chunks = tuple(_chunks()) - return chunks, size, crc_32 + return __chunks, size, crc_32 - def _no_compression_streamed_64_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks) -> Generator[bytes, None, Any]: + def _no_compression_streamed_64_local_header_and_data( + compression: int, aes_size_increase: int, aes_flags: int, name_encoded: bytes, mod_at_ms_dos: bytes, + mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, + crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], + chunks: Iterable[bytes], + ) -> Generator[bytes, None, Any]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) @@ -585,7 +611,12 @@ def _no_compression_streamed_64_local_header_and_data(compression, aes_size_incr ), name_encoded, extra - def _no_compression_streamed_32_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks) -> Generator[bytes, None, Any]: + def _no_compression_streamed_32_local_header_and_data( + compression: int, aes_size_increase: int, aes_flags: int, name_encoded: bytes, mod_at_ms_dos: bytes, + mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, + crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], + chunks: Iterable[bytes], + ) -> Generator[bytes, None, Any]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) @@ -632,7 +663,7 @@ def _no_compression_streamed_32_local_header_and_data(compression, aes_size_incr file_offset, ), name_encoded, extra - def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_size) -> Generator[bytes, None, Any]: + def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: int, crc_32: int, maximum_size: int) -> Generator[bytes, None, Any]: actual_crc_32 = zlib.crc32(b'') size = 0 for chunk in chunks: From 6fd4a18399cdf3a6c44d15947575eafebde5f9a9 Mon Sep 17 00:00:00 2001 From: Michal Charemza Date: Mon, 27 May 2024 07:19:14 +0100 Subject: [PATCH 2/4] refactor: slightly tighter internal types - avoiding an Any --- stream_zip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stream_zip.py b/stream_zip.py index 099a73a..2da2d0f 100644 --- a/stream_zip.py +++ b/stream_zip.py @@ -379,7 +379,7 @@ def _zip_32_local_header_and_data( ), name_encoded, extra def _zip_data(chunks: Iterable[bytes], _get_compress_obj: _CompressObjGetter, - max_uncompressed_size: int, max_compressed_size: int) -> Generator[bytes, None, Any]: + max_uncompressed_size: int, max_compressed_size: int) -> Generator[bytes, None, Tuple[int, int, int]]: uncompressed_size = 0 compressed_size = 0 crc_32 = zlib.crc32(b'') From 86d6f65ac078da663d7d4657b41f2d3514066a8b Mon Sep 17 00:00:00 2001 From: Michal Charemza Date: Mon, 27 May 2024 07:20:04 +0100 Subject: [PATCH 3/4] feat: remove unused args/kwargs in a method Technically a change in the API, but it was a mistake to have these in - they served no purpose. --- stream_zip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stream_zip.py b/stream_zip.py index 2da2d0f..11bc4ab 100644 --- a/stream_zip.py +++ b/stream_zip.py @@ -56,7 +56,7 @@ class _NO_COMPRESSION_32_TYPE(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: return _NO_COMPRESSION_BUFFERED_32, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, 0, 0 - def __call__(self, uncompressed_size: int, crc_32: int, *args: Any, **kwarg: Any) -> Method: + def __call__(self, uncompressed_size: int, crc_32: int) -> Method: class _NO_COMPRESSION_32_TYPE_STREAMED_TYPE(Method): def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _MethodTuple: return _NO_COMPRESSION_STREAMED_32, _NO_AUTO_UPGRADE_CENTRAL_DIRECTORY, default_get_compressobj, uncompressed_size, crc_32 From 55ae0e1c3f1bb75206beb6896a3dd70376d8199b Mon Sep 17 00:00:00 2001 From: Michal Charemza Date: Mon, 27 May 2024 07:24:48 +0100 Subject: [PATCH 4/4] refactor: slightly tighter internal types - avoiding Any --- stream_zip.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stream_zip.py b/stream_zip.py index 11bc4ab..0e83497 100644 --- a/stream_zip.py +++ b/stream_zip.py @@ -251,7 +251,7 @@ def _zip_64_local_header_and_data( mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], chunks: Iterable[bytes], - ) -> Generator[bytes, None, Any]: + ) -> Generator[bytes, None, Tuple[bytes, bytes, bytes]]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) @@ -323,7 +323,7 @@ def _zip_32_local_header_and_data( mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], chunks: Iterable[bytes], - ) -> Generator[bytes, None, Any]: + ) -> Generator[bytes, None, Tuple[bytes, bytes, bytes]]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) @@ -411,7 +411,7 @@ def _no_compression_64_local_header_and_data( mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], chunks: Iterable[bytes], - ) -> Generator[bytes, None, Any]: + ) -> Generator[bytes, None, Tuple[bytes, bytes, bytes]]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) @@ -478,7 +478,7 @@ def _no_compression_32_local_header_and_data( mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], chunks: Iterable[bytes], - ) -> Generator[bytes, None, Any]: + ) -> Generator[bytes, None, Tuple[bytes, bytes, bytes]]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) @@ -551,7 +551,7 @@ def _no_compression_streamed_64_local_header_and_data( mod_at_unix_extra: bytes, aes_extra: bytes, external_attr: int, uncompressed_size: int, crc_32: int, crc_32_mask: int, _get_compress_obj: _CompressObjGetter, encryption_func: Callable[[Generator[bytes, None, Any]], Generator[bytes, None, Any]], chunks: Iterable[bytes], - ) -> Generator[bytes, None, Any]: + ) -> Generator[bytes, None, Tuple[bytes, bytes, bytes]]: file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError)