From 47b4f340b388983401dafa63242ba7b2db6fa5f0 Mon Sep 17 00:00:00 2001 From: Michal Charemza Date: Wed, 3 Jan 2024 16:30:29 +0000 Subject: [PATCH] feat: AES-2 encryption This adds AES-2 encryption as requested/discussed in https://github.com/uktrade/stream-zip/issues/93 and defined at https://www.winzip.com/en/support/aes-encryption/ For now, AES-2 is used over AES-1 to prevent leakage of information via CRC-32 for small files, at the price of not having a checksum on the uncompressed plain text data (although there is an HMAC check on the encrypted compressed data as part of AES-2). In a later change, we should be able to make it AES-1 for larger files as recommended at https://www.winzip.com/en/support/aes-encryption/, but not doing this now to keep this change reasonably small. --- .github/workflows/test.yml | 11 ++ README.md | 2 + docs/features.md | 2 + pyproject.toml | 10 +- stream_zip.py | 209 +++++++++++++++++++++++++------------ test_stream_zip.py | 193 +++++++++++++++++++++++++++++++++- 6 files changed, 357 insertions(+), 70 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e0442ad..92bbc97 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,6 +34,17 @@ jobs: - name: "Install bsdcpio" run: | ./install-libarachive.sh + - name: "Install 7z" + run: | + mkdir bin + ( + cd ./bin + wget https://www.7-zip.org/a/7z2301-linux-x64.tar.xz + echo "23babcab045b78016e443f862363e4ab63c77d75bc715c0b3463f6134cbcf318 7z2301-linux-x64.tar.xz" | sha256sum --check + tar -xJf ./7z2301-linux-x64.tar.xz 7zz + rm 7z2301-linux-x64.tar.xz + echo "$PWD" >> $GITHUB_PATH + ) - name: "Install python dependencies" run: | pip install ".[ci]" diff --git a/README.md b/README.md index c95cf77..f8ee99f 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ In addition to being memory efficient (with some [limitations](https://stream-zi - Can construct ZIP files that contain directories, including empty directories +- Can constuct password protected/encrypted ZIP files adhering to the [WinZip AE-2 specification](https://www.winzip.com/en/support/aes-encryption/). + - Allows the specification of permissions on the member files and directories (although not all clients respect them) - By default stores modification time as an extended timestamp. An extended timestamp is a more accurate timestamp than the original ZIP format allows diff --git a/docs/features.md b/docs/features.md index b6da671..2b23030 100644 --- a/docs/features.md +++ b/docs/features.md @@ -14,6 +14,8 @@ In addition to being memory efficient (with some [limitations](/get-started/#lim - Can construct ZIP files that contain directories, including empty directories +- Can constuct password protected/encrypted ZIP files adhering to the [WinZip AE-2 specification](https://www.winzip.com/en/support/aes-encryption/). + - Allows the specification of permissions on the member files and directories (although not all clients respect them) - By default stores modification time as an extended timestamp. An extended timestamp is a more accurate timestamp than the original ZIP format allows diff --git a/pyproject.toml b/pyproject.toml index 06695e9..754aff7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,19 +16,25 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Topic :: System :: Archiving :: Compression", ] +dependencies = [ + "pycryptodome>=3.10.1", +] [project.optional-dependencies] dev = [ "coverage>=6.2", "pytest>=7.0.1", "pytest-cov>=3.0.0", - "stream-unzip>=0.0.86" + "stream-unzip>=0.0.86", + "pyzipper>=0.3.6", ] ci = [ + "pycryptodome==3.10.1", "coverage==6.2", "pytest==7.0.1", "pytest-cov==3.0.0", - "stream-unzip==0.0.86" + "stream-unzip==0.0.86", + "pyzipper==0.3.6", ] [project.urls] diff --git a/stream_zip.py b/stream_zip.py index 0d9c118..8ca9265 100644 --- a/stream_zip.py +++ b/stream_zip.py @@ -1,7 +1,13 @@ from collections import deque from struct import Struct +import secrets import zlib +from Crypto.Cipher import AES +from Crypto.Hash import HMAC, SHA1 +from Crypto.Util import Counter +from Crypto.Protocol.KDF import PBKDF2 + # Private methods _NO_COMPRESSION_BUFFERED_32 = object() @@ -63,7 +69,12 @@ def method_compressobj(offset, default_get_compressobj): return method_compressobj -def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), extended_timestamps=True): +def stream_zip(files, chunk_size=65536, + get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), + extended_timestamps=True, + password=None, + get_crypto_random=lambda num_bytes: secrets.token_bytes(num_bytes), +): def evenly_sized(chunks): chunk = b'' @@ -119,8 +130,12 @@ def get_zipped_chunks_uneven(): mod_at_unix_extra_signature = b'UT' mod_at_unix_extra_struct = Struct('<2sH1sl') + aes_extra_signature = b'\x01\x99' + aes_extra_struct = Struct('<2sHH2sBH') + modified_at_struct = Struct(' maximum: raise exception_class() - def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, chunks): + def _with_returned(gen): + # We leverage the not-often used "return value" of generators. Here, we want to iterate + # over chunks (to encrypt them), but still return the same "return value". So we use a + # bit of a trick to extract the return value but still have access to the chunks as + # we iterate over them + + return_value = None + def with_return_value(): + nonlocal return_value + return_value = yield from gen + + return ((lambda: return_value), with_return_value()) + + def _encrypt_dummy(chunks): + get_return_value, chunks_with_return = _with_returned(chunks) + for chunk in chunks_with_return: + yield from _(chunk) + return get_return_value() + + def _encrypt_aes(chunks): + key_length = 32 + salt_length = 16 + password_verification_length = 2 + + salt = get_crypto_random(salt_length) + yield from _(salt) + + keys = PBKDF2(password, salt, 2 * key_length + password_verification_length, 1000) + yield from _(keys[-password_verification_length:]) + + encrypter = AES.new( + keys[:key_length], AES.MODE_CTR, + counter=Counter.new(nbits=128, little_endian=True), + ) + hmac = HMAC.new(keys[key_length:key_length*2], digestmod=SHA1) + + get_return_value, chunks_with_return = _with_returned(chunks) + for chunk in chunks_with_return: + encrypted_chunk = encrypter.encrypt(chunk) + hmac.update(encrypted_chunk) + yield from _(encrypted_chunk) + + yield from _(hmac.digest()[:10]) + + return get_return_value() + + def _zip_64_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks): file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) @@ -149,8 +210,8 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ 16, # Size of extra 0, # Uncompressed size - since data descriptor 0, # Compressed size - since data descriptor - ) + mod_at_unix_extra - flags = data_descriptor_flag | utf8_flag + ) + mod_at_unix_extra + aes_extra + flags = aes_flags | data_descriptor_flag | utf8_flag yield from _(local_header_signature) yield from _(local_header_struct.pack( @@ -167,15 +228,17 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ yield from _(name_encoded) yield from _(extra) - uncompressed_size, compressed_size, crc_32 = yield from _zip_data( + uncompressed_size, raw_compressed_size, crc_32 = yield from encryption_func(_zip_data( chunks, _get_compress_obj, max_uncompressed_size=0xffffffffffffffff, max_compressed_size=0xffffffffffffffff, - ) + )) + compressed_size = raw_compressed_size + aes_size_increase + masked_crc_32 = crc_32 & crc_32_mask yield from _(data_descriptor_signature) - yield from _(data_descriptor_zip_64_struct.pack(crc_32, compressed_size, uncompressed_size)) + yield from _(data_descriptor_zip_64_struct.pack(masked_crc_32, compressed_size, uncompressed_size)) extra = zip_64_central_directory_extra_struct.pack( zip_64_extra_signature, @@ -183,7 +246,7 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ uncompressed_size, compressed_size, file_offset, - ) + mod_at_unix_extra + ) + mod_at_unix_extra + aes_extra return central_directory_header_struct.pack( 45, # Version made by 3, # System made by (UNIX) @@ -192,7 +255,7 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ flags, compression, mod_at_ms_dos, - crc_32, + masked_crc_32, 0xffffffff, # Compressed size - since zip64 0xffffffff, # Uncompressed size - since zip64 len(name_encoded), @@ -204,13 +267,13 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ 0xffffffff, # Offset of local header - since zip64 ), name_encoded, extra - def _zip_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, chunks): + def _zip_32_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks): file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) - extra = mod_at_unix_extra - flags = data_descriptor_flag | utf8_flag + extra = mod_at_unix_extra + aes_extra + flags = aes_flags | data_descriptor_flag | utf8_flag yield from _(local_header_signature) yield from _(local_header_struct.pack( @@ -227,15 +290,17 @@ def _zip_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ yield from _(name_encoded) yield from _(extra) - uncompressed_size, compressed_size, crc_32 = yield from _zip_data( + uncompressed_size, raw_compressed_size, crc_32 = yield from encryption_func(_zip_data( chunks, _get_compress_obj, max_uncompressed_size=0xffffffff, max_compressed_size=0xffffffff, - ) + )) + compressed_size = raw_compressed_size + aes_size_increase + masked_crc_32 = crc_32 & crc_32_mask yield from _(data_descriptor_signature) - yield from _(data_descriptor_zip_32_struct.pack(crc_32, compressed_size, uncompressed_size)) + yield from _(data_descriptor_zip_32_struct.pack(masked_crc_32, compressed_size, uncompressed_size)) return central_directory_header_struct.pack( 20, # Version made by @@ -245,7 +310,7 @@ def _zip_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_ flags, compression, mod_at_ms_dos, - crc_32, + masked_crc_32, compressed_size, uncompressed_size, len(name_encoded), @@ -273,31 +338,33 @@ def _zip_data(chunks, _get_compress_obj, max_uncompressed_size, max_compressed_s _raise_if_beyond(compressed_size, maximum=max_compressed_size, exception_class=CompressedSizeOverflowError) - yield from _(compressed_chunk) + yield compressed_chunk compressed_chunk = compress_obj.flush() compressed_size += len(compressed_chunk) _raise_if_beyond(compressed_size, maximum=max_compressed_size, exception_class=CompressedSizeOverflowError) - yield from _(compressed_chunk) + yield compressed_chunk return uncompressed_size, compressed_size, crc_32 - def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, chunks): + def _no_compression_64_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks): file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) - chunks, size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffffffffffff) + chunks, uncompressed_size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffffffffffff) + compressed_size = uncompressed_size + aes_size_increase extra = zip_64_local_extra_struct.pack( zip_64_extra_signature, 16, # Size of extra - size, # Uncompressed - size, # Compressed - ) + mod_at_unix_extra - flags = utf8_flag + uncompressed_size, + compressed_size, + ) + mod_at_unix_extra + aes_extra + flags = aes_flags | utf8_flag + masked_crc_32 = crc_32 & crc_32_mask yield from _(local_header_signature) yield from _(local_header_struct.pack( @@ -305,7 +372,7 @@ def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_m flags, compression, mod_at_ms_dos, - crc_32, + masked_crc_32, 0xffffffff, # Compressed size - since zip64 0xffffffff, # Uncompressed size - since zip64 len(name_encoded), @@ -314,16 +381,15 @@ def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_m yield from _(name_encoded) yield from _(extra) - for chunk in chunks: - yield from _(chunk) + yield from encryption_func(chunks) extra = zip_64_central_directory_extra_struct.pack( zip_64_extra_signature, 24, # Size of extra - size, # Uncompressed - size, # Compressed + uncompressed_size, + compressed_size, file_offset, - ) + mod_at_unix_extra + ) + mod_at_unix_extra + aes_extra return central_directory_header_struct.pack( 45, # Version made by 3, # System made by (UNIX) @@ -332,7 +398,7 @@ def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_m flags, compression, mod_at_ms_dos, - crc_32, + masked_crc_32, 0xffffffff, # Compressed size - since zip64 0xffffffff, # Uncompressed size - since zip64 len(name_encoded), @@ -345,15 +411,17 @@ def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_m ), name_encoded, extra - def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, chunks): + def _no_compression_32_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks): file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) - chunks, size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffff) + chunks, uncompressed_size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffff) - extra = mod_at_unix_extra - flags = utf8_flag + compressed_size = uncompressed_size + aes_size_increase + extra = mod_at_unix_extra + aes_extra + flags = aes_flags | utf8_flag + masked_crc_32 = crc_32 & crc_32_mask yield from _(local_header_signature) yield from _(local_header_struct.pack( @@ -361,17 +429,16 @@ def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_m flags, compression, mod_at_ms_dos, - crc_32, - size, # Compressed - size, # Uncompressed + masked_crc_32, + compressed_size, + uncompressed_size, len(name_encoded), len(extra), )) yield from _(name_encoded) yield from _(extra) - for chunk in chunks: - yield from _(chunk) + yield from encryption_func(chunks) return central_directory_header_struct.pack( 20, # Version made by @@ -381,9 +448,9 @@ def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_m flags, compression, mod_at_ms_dos, - crc_32, - size, # Compressed - size, # Uncompressed + masked_crc_32, + compressed_size, + uncompressed_size, len(name_encoded), len(extra), 0, # File comment length @@ -412,18 +479,20 @@ def _chunks(): return chunks, size, crc_32 - def _no_compression_streamed_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, chunks): + def _no_compression_streamed_64_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks): file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError) + compressed_size = uncompressed_size + aes_size_increase extra = zip_64_local_extra_struct.pack( zip_64_extra_signature, 16, # Size of extra - uncompressed_size, # Uncompressed - uncompressed_size, # Compressed - ) + mod_at_unix_extra - flags = utf8_flag + uncompressed_size, + compressed_size, + ) + mod_at_unix_extra + aes_extra + flags = aes_flags | utf8_flag + masked_crc_32 = crc_32 & crc_32_mask yield from _(local_header_signature) yield from _(local_header_struct.pack( @@ -431,7 +500,7 @@ def _no_compression_streamed_64_local_header_and_data(compression, name_encoded, flags, compression, mod_at_ms_dos, - crc_32, + masked_crc_32, 0xffffffff, # Compressed size - since zip64 0xffffffff, # Uncompressed size - since zip64 len(name_encoded), @@ -440,15 +509,15 @@ def _no_compression_streamed_64_local_header_and_data(compression, name_encoded, yield from _(name_encoded) yield from _(extra) - yield from _no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffffffffffff) + yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffffffffffff)) extra = zip_64_central_directory_extra_struct.pack( zip_64_extra_signature, 24, # Size of extra - uncompressed_size, # Uncompressed - uncompressed_size, # Compressed + uncompressed_size, + compressed_size, file_offset, - ) + mod_at_unix_extra + ) + mod_at_unix_extra + aes_extra return central_directory_header_struct.pack( 45, # Version made by 3, # System made by (UNIX) @@ -457,7 +526,7 @@ def _no_compression_streamed_64_local_header_and_data(compression, name_encoded, flags, compression, mod_at_ms_dos, - crc_32, + masked_crc_32, 0xffffffff, # Compressed size - since zip64 0xffffffff, # Uncompressed size - since zip64 len(name_encoded), @@ -470,13 +539,15 @@ def _no_compression_streamed_64_local_header_and_data(compression, name_encoded, ), name_encoded, extra - def _no_compression_streamed_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, chunks): + def _no_compression_streamed_32_local_header_and_data(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, chunks): file_offset = offset _raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError) - extra = mod_at_unix_extra - flags = utf8_flag + compressed_size = uncompressed_size + aes_size_increase + extra = mod_at_unix_extra + aes_extra + flags = aes_flags | utf8_flag + masked_crc_32 = crc_32 & crc_32_mask yield from _(local_header_signature) yield from _(local_header_struct.pack( @@ -484,16 +555,16 @@ def _no_compression_streamed_32_local_header_and_data(compression, name_encoded, flags, compression, mod_at_ms_dos, - crc_32, - uncompressed_size, # Compressed - uncompressed_size, # Uncompressed + masked_crc_32, + compressed_size, + uncompressed_size, len(name_encoded), len(extra), )) yield from _(name_encoded) yield from _(extra) - yield from _no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffff) + yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffff)) return central_directory_header_struct.pack( 20, # Version made by @@ -503,9 +574,9 @@ def _no_compression_streamed_32_local_header_and_data(compression, name_encoded, flags, compression, mod_at_ms_dos, - crc_32, - uncompressed_size, # Compressed - uncompressed_size, # Uncompressed + masked_crc_32, + compressed_size, + uncompressed_size, len(name_encoded), len(extra), 0, # File comment length @@ -522,7 +593,7 @@ def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_siz actual_crc_32 = zlib.crc32(chunk, actual_crc_32) size += len(chunk) _raise_if_beyond(size, maximum=maximum_size, exception_class=UncompressedSizeOverflowError) - yield from _(chunk) + yield chunk if actual_crc_32 != crc_32: raise CRC32IntegrityError() @@ -558,7 +629,7 @@ def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_siz (mode << 16) | \ (0x10 if name_encoded[-1:] == b'/' else 0x0) # MS-DOS directory - data_func, compression = \ + data_func, raw_compression = \ (_zip_64_local_header_and_data, 8) if _method is _ZIP_64 else \ (_zip_32_local_header_and_data, 8) if _method is _ZIP_32 else \ (_no_compression_64_local_header_and_data, 0) if _method is _NO_COMPRESSION_BUFFERED_64 else \ @@ -566,7 +637,11 @@ def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_siz (_no_compression_streamed_64_local_header_and_data, 0) if _method is _NO_COMPRESSION_STREAMED_64 else \ (_no_compression_streamed_32_local_header_and_data, 0) - central_directory_header_entry, name_encoded, extra = yield from data_func(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, evenly_sized(chunks)) + compression, aes_size_increase, aes_flags, aes_extra, crc_32_mask, encryption_func = \ + (99, 28, aes_flag, aes_extra_struct.pack(aes_extra_signature, 7, 2, b'AE', 3, raw_compression), 0, _encrypt_aes) if password is not None else \ + (raw_compression, 0, 0, b'', 0xffffffff, _encrypt_dummy) + + central_directory_header_entry, name_encoded, extra = yield from data_func(compression, aes_size_increase, aes_flags, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, crc_32_mask, _get_compress_obj, encryption_func, evenly_sized(chunks)) central_directory_size += len(central_directory_header_signature) + len(central_directory_header_entry) + len(name_encoded) + len(extra) central_directory.append((central_directory_header_entry, name_encoded, extra)) diff --git a/test_stream_zip.py b/test_stream_zip.py index 623197e..17dafbb 100644 --- a/test_stream_zip.py +++ b/test_stream_zip.py @@ -6,10 +6,12 @@ import subprocess import zlib from tempfile import TemporaryDirectory +from struct import Struct from zipfile import ZipFile import pytest -from stream_unzip import UnsupportedZip64Error, stream_unzip +import pyzipper +from stream_unzip import IncorrectAESPasswordError, UnsupportedZip64Error, stream_unzip from stream_zip import ( stream_zip, @@ -1082,3 +1084,192 @@ def test_unzip_modification_time_extended_timestamps_disabled(method, timezone, subprocess.run(['unzip', f'{d}/test.zip', '-d', d], env={'TZ': timezone}) assert os.path.getmtime('my_file') == expected_modified_at.timestamp() + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_64(18, 1571107898), + NO_COMPRESSION_32, + NO_COMPRESSION_32(18, 1571107898), + ], +) +def test_password_unzips_with_stream_unzip(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, method, (b'a' * 9, b'b' * 9)), + ) + + assert b''.join( + chunk + for _, _, chunks in stream_unzip(stream_zip(files, password=password), password=password) + for chunk in chunks + ) == b'a' * 9 + b'b' * 9 + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_64(18, 1571107898), + NO_COMPRESSION_32, + NO_COMPRESSION_32(18, 1571107898), + ], +) +def test_bad_password_not_unzips_with_stream_unzip(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, method, (b'a' * 9, b'b' * 9)), + ) + + with pytest.raises(IncorrectAESPasswordError): + list(stream_unzip(stream_zip(files, password=password), password='not')) + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_64(18, 1571107898), + NO_COMPRESSION_32, + NO_COMPRESSION_32(18, 1571107898), + ], +) +def test_password_unzips_with_7z(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, method, (b'a' * 9, b'b' * 9)), + ) + + with \ + TemporaryDirectory() as d, \ + cwd(d): \ + + with open('test.zip', 'wb') as fp: + for zipped_chunk in stream_zip(files, password=password): + fp.write(zipped_chunk) + + r = subprocess.run(['7zz', '-pmy-pass', 'e', 'test.zip']) + assert r.returncode == 0 + + for file in files: + with open(file[0], 'rb') as f: + assert f.read() == (b'a' * 9 ) + (b'b' * 9) + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_64(18, 1571107898), + NO_COMPRESSION_32, + NO_COMPRESSION_32(18, 1571107898), + ], +) +def test_password_unzips_with_pyzipper(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, method, (b'a' * 9, b'b' * 9)), + ) + + with \ + TemporaryDirectory() as d, \ + cwd(d): \ + + with open('test.zip', 'wb') as fp: + for zipped_chunk in stream_zip(files, password=password): + fp.write(zipped_chunk) + + with pyzipper.AESZipFile('test.zip') as zf: + zf.setpassword(password.encode()) + zf.testzip() + assert zf.read('file-1') == (b'a' * 9 ) + (b'b' * 9) + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_64(18, 1571107898), + NO_COMPRESSION_32, + NO_COMPRESSION_32(18, 1571107898), + ], +) +def test_password_bytes_not_deterministic(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, method, (b'a' * 9, b'b' * 9)), + ) + + assert b''.join(stream_zip(files, password=password)) != b''.join(stream_zip(files, password=password)) + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_64(18, 1571107898), + NO_COMPRESSION_32, + NO_COMPRESSION_32(18, 1571107898), + ], +) +def test_crc_32_not_in_file(method): + # AE-2 should not have the CRC_32, so we check that the CRC_32 isn't anywhere in the file. This + # is "too strong" as check, because it could just happen to appear in the cipher text, which + # would be fine. The cipher text is by default non-deterministic due to its random salt, and + # so this could be a flaky test and faily randomly. To make the test not flaky, we make the + # bytes of the file completely deterministic, by forcing the random numbers used to generate + # the salt to be non-random + + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, method, (b'a' * 9, b'b' * 9)), + ) + crc_32 = Struct('