Skip to content

Commit

Permalink
support optional crc32 for uncompressed streaming zip32 and zip64:
Browse files Browse the repository at this point in the history
- if crc32 value passed in is 0, then include data descriptor record
with actual length and crc32
  • Loading branch information
ikreymer committed Jul 25, 2024
1 parent e60ec13 commit b4dd0f3
Showing 1 changed file with 35 additions and 5 deletions.
40 changes: 35 additions & 5 deletions stream_zip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import asyncio
import secrets
import zlib
from typing import Any, Iterable, Generator, Tuple, Optional, Deque, Type, AsyncIterable, Callable
from typing import Any, Iterable, Generator, Tuple, Optional, Deque, Type, AsyncIterable, Callable, Iterator

from Crypto.Cipher import AES
from Crypto.Hash import HMAC, SHA1
Expand Down Expand Up @@ -97,6 +97,10 @@ def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _Met
return _ZIP_AUTO_TYPE_INNER()


class CRCActual:
def __init__(self):
self.crc_32 = 0

###############################
# Public sentinel objects/types

Expand Down Expand Up @@ -566,6 +570,9 @@ def _no_compression_streamed_64_local_header_and_data(
compressed_size,
) + mod_at_unix_extra + aes_extra
flags = aes_flags | utf8_flag
if crc_32 == 0:
flags |= data_descriptor_flag

masked_crc_32 = crc_32 & crc_32_mask

yield from _(local_header_signature)
Expand All @@ -583,7 +590,15 @@ def _no_compression_streamed_64_local_header_and_data(
yield from _(name_encoded)
yield from _(extra)

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffffffffffff))
actual = CRCActual()

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffffffffffff, actual))

if crc_32 == 0:
masked_crc_32 = actual.crc_32 & crc_32_mask

yield from _(data_descriptor_signature)
yield from _(data_descriptor_zip_64_struct.pack(masked_crc_32, compressed_size, uncompressed_size))

extra = zip_64_central_directory_extra_struct.pack(
zip_64_extra_signature,
Expand Down Expand Up @@ -626,6 +641,9 @@ def _no_compression_streamed_32_local_header_and_data(
compressed_size = uncompressed_size + aes_size_increase
extra = mod_at_unix_extra + aes_extra
flags = aes_flags | utf8_flag
if crc_32 == 0:
flags |= data_descriptor_flag

masked_crc_32 = crc_32 & crc_32_mask

yield from _(local_header_signature)
Expand All @@ -643,7 +661,16 @@ def _no_compression_streamed_32_local_header_and_data(
yield from _(name_encoded)
yield from _(extra)

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffff))
actual = CRCActual()

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffff, actual))

if crc_32 == 0:
masked_crc_32 = actual.crc_32 & crc_32_mask

yield from _(data_descriptor_signature)
yield from _(data_descriptor_zip_64_struct.pack(masked_crc_32, compressed_size, uncompressed_size))


return central_directory_header_struct.pack(
20, # Version made by
Expand All @@ -665,7 +692,7 @@ def _no_compression_streamed_32_local_header_and_data(
file_offset,
), name_encoded, extra

def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: int, crc_32: int, maximum_size: int) -> Generator[bytes, None, Any]:
def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: int, crc_32: int, maximum_size: int, actual: CRCActual) -> Generator[bytes, None, Any]:
actual_crc_32 = zlib.crc32(b'')
size = 0
for chunk in chunks:
Expand All @@ -674,12 +701,15 @@ def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: in
_raise_if_beyond(size, maximum=maximum_size, exception_class=UncompressedSizeOverflowError)
yield chunk

if actual_crc_32 != crc_32:
# if crc_32 is 0, ignore and provide actual value
if actual_crc_32 != crc_32 and crc_32 != 0:
raise CRC32IntegrityError()

if size != uncompressed_size:
raise UncompressedSizeIntegrityError()

actual.crc_32 = actual_crc_32

for name, modified_at, mode, method, chunks in files:
_method, _auto_upgrade_central_directory, _get_compress_obj, uncompressed_size, crc_32 = method._get(offset, get_compressobj)

Expand Down

0 comments on commit b4dd0f3

Please sign in to comment.