Skip to content

Commit

Permalink
refactor: data func to accept compression
Browse files Browse the repository at this point in the history
  • Loading branch information
michalc committed Jan 3, 2024
1 parent d7710bb commit e5de07c
Showing 1 changed file with 61 additions and 26 deletions.
87 changes: 61 additions & 26 deletions stream_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,42 @@ def _raise_if_beyond(offset, maximum, exception_class):
def _no_encryption(chunks):
return (yield from chunks)

def _zip_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
def _aes_encrypted(chunks):
key_length = 32
salt_length = 16
password_verification_length = 2

salt = secrets.token_bytes(salt_length)
yield salt

keys = PBKDF2(password, salt, 2 * key_length + password_verification_length, 1000)
yield keys[-password_verification_length:]

encrypter = AES.new(
keys[:key_length], AES.MODE_CTR,
counter=Counter.new(nbits=128, little_endian=True),
)
hmac = HMAC.new(keys[key_length:key_length*2], digestmod=SHA1)

# We leverage the not-often used "return value" of generators. Here, we want to iterate
# over chunks to encrypt them, but still return the same "return value". So we use a
# bit of a trick to extract the return value but still have access to the chunks as
# we iterate over them
return_value = None
def with_return_value():
nonlocal return_value
return_value = yield from chunks

for chunk in with_return_value():
encrypted_chunk = encrypter.encrypt(chunk)
hmac.update(encrypted_chunk)
yield encrypted_chunk

yield hmac.digest()[:10]

return return_value

def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
file_offset = offset

_raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError)
Expand All @@ -160,7 +195,7 @@ def _zip_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra
yield from _(local_header_struct.pack(
45, # Version
flags,
8, # Compression - deflate
compression,
mod_at_ms_dos,
0, # CRC32 - 0 since data descriptor
0xffffffff, # Compressed size - since zip64
Expand Down Expand Up @@ -194,7 +229,7 @@ def _zip_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra
45, # Version required
0, # Reserved
flags,
8, # Compression - deflate
compression,
mod_at_ms_dos,
crc_32,
0xffffffff, # Compressed size - since zip64
Expand All @@ -208,7 +243,7 @@ def _zip_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra
0xffffffff, # Offset of local header - since zip64
), name_encoded, extra

def _zip_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
def _zip_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
file_offset = offset

_raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError)
Expand All @@ -224,7 +259,7 @@ def _zip_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra
yield from _(local_header_struct.pack(
20, # Version
flags,
8, # Compression - deflate
compression,
mod_at_ms_dos,
0, # CRC32 - 0 since data descriptor
0, # Compressed size - 0 since data descriptor
Expand All @@ -251,7 +286,7 @@ def _zip_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra
20, # Version required
0, # Reserved
flags,
8, # Compression - deflate
compression,
mod_at_ms_dos,
crc_32,
compressed_size,
Expand Down Expand Up @@ -292,7 +327,7 @@ def _zip_data(chunks, _get_compress_obj, max_uncompressed_size, max_compressed_s

return uncompressed_size, compressed_size, crc_32

def _no_compression_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
file_offset = offset

_raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError)
Expand All @@ -315,7 +350,7 @@ def _no_compression_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at
yield from _(local_header_struct.pack(
45, # Version
flags,
0, # Compression - no compression
compression,
mod_at_ms_dos,
crc_32,
0xffffffff, # Compressed size - since zip64
Expand All @@ -342,7 +377,7 @@ def _no_compression_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at
45, # Version required
0, # Reserved
flags,
0, # Compression - none
compression,
mod_at_ms_dos,
crc_32,
0xffffffff, # Compressed size - since zip64
Expand All @@ -357,7 +392,7 @@ def _no_compression_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at
), name_encoded, extra


def _no_compression_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
file_offset = offset

_raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError)
Expand All @@ -374,7 +409,7 @@ def _no_compression_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at
yield from _(local_header_struct.pack(
20, # Version
flags,
0, # Compression - no compression
compression,
mod_at_ms_dos,
crc_32,
size, # Compressed
Expand All @@ -394,7 +429,7 @@ def _no_compression_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at
20, # Version required
0, # Reserved
flags,
0, # Compression - none
compression,
mod_at_ms_dos,
crc_32,
size, # Compressed
Expand Down Expand Up @@ -427,7 +462,7 @@ def _chunks():

return chunks, size, crc_32

def _no_compression_streamed_64_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
def _no_compression_streamed_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
file_offset = offset

_raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError)
Expand All @@ -448,7 +483,7 @@ def _no_compression_streamed_64_local_header_and_data(name_encoded, mod_at_ms_do
yield from _(local_header_struct.pack(
45, # Version
flags,
0, # Compression - no compression
compression,
mod_at_ms_dos,
crc_32,
0xffffffff, # Compressed size - since zip64
Expand All @@ -474,7 +509,7 @@ def _no_compression_streamed_64_local_header_and_data(name_encoded, mod_at_ms_do
45, # Version required
0, # Reserved
flags,
0, # Compression - none
compression,
mod_at_ms_dos,
crc_32,
0xffffffff, # Compressed size - since zip64
Expand All @@ -489,7 +524,7 @@ def _no_compression_streamed_64_local_header_and_data(name_encoded, mod_at_ms_do
), name_encoded, extra


def _no_compression_streamed_32_local_header_and_data(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
def _no_compression_streamed_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, chunks):
file_offset = offset

_raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError)
Expand All @@ -505,7 +540,7 @@ def _no_compression_streamed_32_local_header_and_data(name_encoded, mod_at_ms_do
yield from _(local_header_struct.pack(
20, # Version
flags,
0, # Compression - no compression
compression,
mod_at_ms_dos,
crc_32,
uncompressed_size, # Compressed
Expand All @@ -524,7 +559,7 @@ def _no_compression_streamed_32_local_header_and_data(name_encoded, mod_at_ms_do
20, # Version required
0, # Reserved
flags,
0, # Compression - none
compression,
mod_at_ms_dos,
crc_32,
uncompressed_size, # Compressed
Expand Down Expand Up @@ -581,17 +616,17 @@ def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_siz
(mode << 16) | \
(0x10 if name_encoded[-1:] == b'/' else 0x0) # MS-DOS directory

data_func = \
_zip_64_local_header_and_data if _method is _ZIP_64 else \
_zip_32_local_header_and_data if _method is _ZIP_32 else \
_no_compression_64_local_header_and_data if _method is _NO_COMPRESSION_BUFFERED_64 else \
_no_compression_32_local_header_and_data if _method is _NO_COMPRESSION_BUFFERED_32 else \
_no_compression_streamed_64_local_header_and_data if _method is _NO_COMPRESSION_STREAMED_64 else \
_no_compression_streamed_32_local_header_and_data
data_func, compression = \
(_zip_64_local_header_and_data, 8) if _method is _ZIP_64 else \
(_zip_32_local_header_and_data, 8) if _method is _ZIP_32 else \
(_no_compression_64_local_header_and_data, 0) if _method is _NO_COMPRESSION_BUFFERED_64 else \
(_no_compression_32_local_header_and_data, 0) if _method is _NO_COMPRESSION_BUFFERED_32 else \
(_no_compression_streamed_64_local_header_and_data, 0) if _method is _NO_COMPRESSION_STREAMED_64 else \
(_no_compression_streamed_32_local_header_and_data, 0)

aes_extra, encryption_func = (b'', _no_encryption)

central_directory_header_entry, name_encoded, extra = yield from data_func(name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, evenly_sized(chunks))
central_directory_header_entry, name_encoded, extra = yield from data_func(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, evenly_sized(chunks))
central_directory_size += len(central_directory_header_signature) + len(central_directory_header_entry) + len(name_encoded) + len(extra)
central_directory.append((central_directory_header_entry, name_encoded, extra))

Expand Down

0 comments on commit e5de07c

Please sign in to comment.