diff --git a/pyproject.toml b/pyproject.toml index 1cf5107..42d6b6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Topic :: System :: Archiving :: Compression", ] +dependencies = [ + "pycryptodome>=3.10.1", +] [project.optional-dependencies] dev = [ @@ -25,6 +28,7 @@ dev = [ "stream-unzip>=0.0.86" ] ci = [ + "pycryptodome==3.10.1", "coverage==6.2", "pytest==6.2.5", "pytest-cov==3.0.0", diff --git a/stream_zip.py b/stream_zip.py index 525fcbb..d2eaea9 100644 --- a/stream_zip.py +++ b/stream_zip.py @@ -1,7 +1,13 @@ from collections import deque from struct import Struct +import secrets import zlib +from Crypto.Cipher import AES +from Crypto.Hash import HMAC, SHA1 +from Crypto.Util import Counter +from Crypto.Protocol.KDF import PBKDF2 + # Private methods _NO_COMPRESSION_BUFFERED_32 = object() @@ -63,7 +69,7 @@ def method_compressobj(offset, default_get_compressobj): return method_compressobj -def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), extended_timestamps=True): +def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), extended_timestamps=True, password=None): def evenly_sized(chunks): chunk = b'' @@ -612,7 +618,9 @@ def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_siz b'\x01', # Only modification time (as opposed to also other times) int(modified_at.timestamp()), ) if extended_timestamps else b'' - aes_extra, encryption_func = (b'', _no_encryption) + aes_extra, encryption_func = \ + (b'', _aes_encrypted) if password is not None else \ + (b'', _no_encryption) external_attr = \ (mode << 16) | \ (0x10 if name_encoded[-1:] == b'/' else 0x0) # MS-DOS directory diff --git a/test_stream_zip.py b/test_stream_zip.py index 623197e..466d8b7 100644 --- a/test_stream_zip.py +++ b/test_stream_zip.py @@ -1082,3 +1082,53 @@ def test_unzip_modification_time_extended_timestamps_disabled(method, timezone, subprocess.run(['unzip', f'{d}/test.zip', '-d', d], env={'TZ': timezone}) assert os.path.getmtime('my_file') == expected_modified_at.timestamp() + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_32, + ], +) +def test_password_unzips_with_stream_unzip(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, ZIP_64, (b'a' * 10000, b'b' * 10000)), + ('file-2', now, mode, ZIP_32, (b'c', b'd')), + ) + + assert [ + (b'file-1', None, b'a' * 10000 + b'b' * 10000), + (b'file-2', None, b'cd'), + ] == [ + (name, size, b''.join(chunks)) + for name, size, chunks in stream_unzip(stream_zip(files, password=password), password=password) + ] + + +@pytest.mark.parametrize( + "method", + [ + ZIP_32, + ZIP_64, + NO_COMPRESSION_64, + NO_COMPRESSION_32, + ], +) +def test_password_bytes_not_deterministic(method): + now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S') + mode = stat.S_IFREG | 0o600 + password = 'my-pass' + + files = ( + ('file-1', now, mode, ZIP_64, (b'a' * 10000, b'b' * 10000)), + ('file-2', now, mode, ZIP_32, (b'c', b'd')), + ) + + assert b''.join(stream_zip(files, password=password)) != b''.join(stream_zip(files, password=password))