Skip to content

Commit

Permalink
feat: AES-2 encryption
Browse files Browse the repository at this point in the history
  • Loading branch information
michalc committed Jan 3, 2024
1 parent e5de07c commit 4810da2
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 30 deletions.
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ classifiers = [
"License :: OSI Approved :: MIT License",
"Topic :: System :: Archiving :: Compression",
]
dependencies = [
"pycryptodome>=3.10.1",
]

[project.optional-dependencies]
dev = [
Expand All @@ -25,6 +28,7 @@ dev = [
"stream-unzip>=0.0.86"
]
ci = [
"pycryptodome==3.10.1",
"coverage==6.2",
"pytest==6.2.5",
"pytest-cov==3.0.0",
Expand Down
91 changes: 61 additions & 30 deletions stream_zip.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from collections import deque
from struct import Struct
import secrets
import zlib

from Crypto.Cipher import AES
from Crypto.Hash import HMAC, SHA1
from Crypto.Util import Counter
from Crypto.Protocol.KDF import PBKDF2

# Private methods

_NO_COMPRESSION_BUFFERED_32 = object()
Expand Down Expand Up @@ -63,7 +69,7 @@ def method_compressobj(offset, default_get_compressobj):
return method_compressobj


def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), extended_timestamps=True):
def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), extended_timestamps=True, password=None):

def evenly_sized(chunks):
chunk = b''
Expand Down Expand Up @@ -119,6 +125,9 @@ def get_zipped_chunks_uneven():
mod_at_unix_extra_signature = b'UT'
mod_at_unix_extra_struct = Struct('<2sH1sl')

aes_extra_signature = b'\x01\x99'
aes_extra_struct = Struct('<2sHH2sBH')

modified_at_struct = Struct('<HH')

central_directory = deque()
Expand Down Expand Up @@ -188,7 +197,7 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_

# (encryption,) data descriptor and utf-8 file names
flags = \
b'\x88\x08' if aes_extra else \
b'\x09\x08' if aes_extra else \
b'\x08\x08'

yield from _(local_header_signature)
Expand All @@ -206,12 +215,15 @@ def _zip_64_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_
yield from _(name_encoded)
yield from _(extra)

uncompressed_size, compressed_size, crc_32 = yield from _zip_data(
uncompressed_size, raw_compressed_size, crc_32 = yield from encryption_func(_zip_data(
chunks,
_get_compress_obj,
max_uncompressed_size=0xffffffffffffffff,
max_compressed_size=0xffffffffffffffff,
)
))
compressed_size = \
raw_compressed_size + 28 if aes_extra else \
raw_compressed_size

yield from _(data_descriptor_signature)
yield from _(data_descriptor_zip_64_struct.pack(crc_32, compressed_size, uncompressed_size))
Expand Down Expand Up @@ -252,7 +264,7 @@ def _zip_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_

# (encryption,) data descriptor and utf-8 file names
flags = \
b'\x88\x08' if aes_extra else \
b'\x09\x08' if aes_extra else \
b'\x08\x08'

yield from _(local_header_signature)
Expand All @@ -270,12 +282,15 @@ def _zip_32_local_header_and_data(compression, name_encoded, mod_at_ms_dos, mod_
yield from _(name_encoded)
yield from _(extra)

uncompressed_size, compressed_size, crc_32 = yield from encryption_func(_zip_data(
uncompressed_size, raw_compressed_size, crc_32 = yield from encryption_func(_zip_data(
chunks,
_get_compress_obj,
max_uncompressed_size=0xffffffff,
max_compressed_size=0xffffffff,
))
compressed_size = \
raw_compressed_size + 28 if aes_extra else \
raw_compressed_size

yield from _(data_descriptor_signature)
yield from _(data_descriptor_zip_32_struct.pack(crc_32, compressed_size, uncompressed_size))
Expand Down Expand Up @@ -332,18 +347,21 @@ def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_m

_raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError)

chunks, size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffffffffffff)
chunks, uncompressed_size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffffffffffff)
compressed_size = \
uncompressed_size + 28 if aes_extra else \
uncompressed_size

extra = zip_64_local_extra_struct.pack(
zip_64_extra_signature,
16, # Size of extra
size, # Uncompressed
size, # Compressed
uncompressed_size,
compressed_size,
) + mod_at_unix_extra + aes_extra

# (encryption and) utf-8 file names
flags = \
b'\x80\x08' if aes_extra else \
b'\x01\x08' if aes_extra else \
b'\x00\x08'

yield from _(local_header_signature)
Expand All @@ -367,8 +385,8 @@ def _no_compression_64_local_header_and_data(compression, name_encoded, mod_at_m
extra = zip_64_central_directory_extra_struct.pack(
zip_64_extra_signature,
24, # Size of extra
size, # Uncompressed
size, # Compressed
uncompressed_size,
compressed_size,
file_offset,
) + mod_at_unix_extra + aes_extra
return central_directory_header_struct.pack(
Expand Down Expand Up @@ -397,11 +415,14 @@ def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_m

_raise_if_beyond(file_offset, maximum=0xffffffff, exception_class=OffsetOverflowError)

chunks, size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffff)
chunks, uncompressed_size, crc_32 = _no_compression_buffered_data_size_crc_32(chunks, maximum_size=0xffffffff)
compressed_size = \
uncompressed_size + 28 if aes_extra else \
uncompressed_size

# (encryption and) utf-8 file names
flags = \
b'\x80\x08' if aes_extra else \
b'\x01\x08' if aes_extra else \
b'\x00\x08'

extra = mod_at_unix_extra + aes_extra
Expand All @@ -412,8 +433,8 @@ def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_m
compression,
mod_at_ms_dos,
crc_32,
size, # Compressed
size, # Uncompressed
compressed_size,
uncompressed_size,
len(name_encoded),
len(extra),
))
Expand All @@ -432,8 +453,8 @@ def _no_compression_32_local_header_and_data(compression, name_encoded, mod_at_m
compression,
mod_at_ms_dos,
crc_32,
size, # Compressed
size, # Uncompressed
compressed_size,
uncompressed_size,
len(name_encoded),
len(extra),
0, # File comment length
Expand Down Expand Up @@ -467,16 +488,20 @@ def _no_compression_streamed_64_local_header_and_data(compression, name_encoded,

_raise_if_beyond(file_offset, maximum=0xffffffffffffffff, exception_class=OffsetOverflowError)

compressed_size = \
uncompressed_size + 28 if aes_extra else \
uncompressed_size

extra = zip_64_local_extra_struct.pack(
zip_64_extra_signature,
16, # Size of extra
uncompressed_size, # Uncompressed
uncompressed_size, # Compressed
uncompressed_size,
compressed_size,
) + mod_at_unix_extra + aes_extra

# (encryption and) utf-8 file names
flags = \
b'\x80\x08' if aes_extra else \
b'\x01\x08' if aes_extra else \
b'\x00\x08'

yield from _(local_header_signature)
Expand All @@ -499,8 +524,8 @@ def _no_compression_streamed_64_local_header_and_data(compression, name_encoded,
extra = zip_64_central_directory_extra_struct.pack(
zip_64_extra_signature,
24, # Size of extra
uncompressed_size, # Uncompressed
uncompressed_size, # Compressed
uncompressed_size,
compressed_size,
file_offset,
) + mod_at_unix_extra
return central_directory_header_struct.pack(
Expand Down Expand Up @@ -531,9 +556,13 @@ def _no_compression_streamed_32_local_header_and_data(compression, name_encoded,

extra = mod_at_unix_extra + aes_extra

compressed_size = \
uncompressed_size + 28 if aes_extra else \
uncompressed_size

# (encryption and) utf-8 file names
flags = \
b'\x80\x08' if aes_extra else \
b'\x01\x08' if aes_extra else \
b'\x00\x08'

yield from _(local_header_signature)
Expand All @@ -543,8 +572,8 @@ def _no_compression_streamed_32_local_header_and_data(compression, name_encoded,
compression,
mod_at_ms_dos,
crc_32,
uncompressed_size, # Compressed
uncompressed_size, # Uncompressed
compressed_size,
uncompressed_size,
len(name_encoded),
len(extra),
))
Expand All @@ -562,8 +591,8 @@ def _no_compression_streamed_32_local_header_and_data(compression, name_encoded,
compression,
mod_at_ms_dos,
crc_32,
uncompressed_size, # Compressed
uncompressed_size, # Uncompressed
compressed_size,
uncompressed_size,
len(name_encoded),
len(extra),
0, # File comment length
Expand Down Expand Up @@ -616,15 +645,17 @@ def _no_compression_streamed_data(chunks, uncompressed_size, crc_32, maximum_siz
(mode << 16) | \
(0x10 if name_encoded[-1:] == b'/' else 0x0) # MS-DOS directory

data_func, compression = \
data_func, raw_compression = \
(_zip_64_local_header_and_data, 8) if _method is _ZIP_64 else \
(_zip_32_local_header_and_data, 8) if _method is _ZIP_32 else \
(_no_compression_64_local_header_and_data, 0) if _method is _NO_COMPRESSION_BUFFERED_64 else \
(_no_compression_32_local_header_and_data, 0) if _method is _NO_COMPRESSION_BUFFERED_32 else \
(_no_compression_streamed_64_local_header_and_data, 0) if _method is _NO_COMPRESSION_STREAMED_64 else \
(_no_compression_streamed_32_local_header_and_data, 0)

aes_extra, encryption_func = (b'', _no_encryption)
compression, aes_extra, encryption_func = \
(99, aes_extra_struct.pack(aes_extra_signature, 7, 2, b'AE', 3, raw_compression), _aes_encrypted) if password is not None else \
(raw_compression, b'', _no_encryption)

central_directory_header_entry, name_encoded, extra = yield from data_func(compression, name_encoded, mod_at_ms_dos, mod_at_unix_extra, aes_extra, external_attr, uncompressed_size, crc_32, _get_compress_obj, encryption_func, evenly_sized(chunks))
central_directory_size += len(central_directory_header_signature) + len(central_directory_header_entry) + len(name_encoded) + len(extra)
Expand Down
44 changes: 44 additions & 0 deletions test_stream_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,3 +1082,47 @@ def test_unzip_modification_time_extended_timestamps_disabled(method, timezone,
subprocess.run(['unzip', f'{d}/test.zip', '-d', d], env={'TZ': timezone})

assert os.path.getmtime('my_file') == expected_modified_at.timestamp()


def test_password_unzips_with_stream_unzip():
now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S')
mode = stat.S_IFREG | 0o600
password = 'my-pass'

files = (
('file-1', now, mode, ZIP_32, (b'a' * 10000, b'b' * 10000)),
('file-2', now, mode, ZIP_64, (b'a' * 10000, b'b' * 10000)),
('file-3', now, mode, NO_COMPRESSION_64, (b'a' * 10000, b'b' * 10000)),
('file-4', now, mode, NO_COMPRESSION_64(20000, 2664091433), (b'a' * 10000, b'b' * 10000)),
('file-5', now, mode, NO_COMPRESSION_32, (b'a' * 10000, b'b' * 10000)),
('file-6', now, mode, NO_COMPRESSION_32(20000, 2664091433), (b'a' * 10000, b'b' * 10000)),
)

assert [
(b'file-1', None, b'a' * 10000 + b'b' * 10000),
(b'file-2', None, b'a' * 10000 + b'b' * 10000),
(b'file-3', 20000, b'a' * 10000 + b'b' * 10000),
(b'file-4', 20000, b'a' * 10000 + b'b' * 10000),
(b'file-5', 20000, b'a' * 10000 + b'b' * 10000),
(b'file-6', 20000, b'a' * 10000 + b'b' * 10000),
] == [
(name, size, b''.join(chunks))
for name, size, chunks in stream_unzip(stream_zip(files, password=password), password=password)
]


def test_password_bytes_not_deterministic():
now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S')
mode = stat.S_IFREG | 0o600
password = 'my-pass'

files = (
('file-1', now, mode, ZIP_32, (b'a' * 10000, b'b' * 10000)),
('file-2', now, mode, ZIP_64, (b'a' * 10000, b'b' * 10000)),
('file-3', now, mode, NO_COMPRESSION_64, (b'a' * 10000, b'b' * 10000)),
('file-4', now, mode, NO_COMPRESSION_64(20000, 2664091433), (b'a' * 10000, b'b' * 10000)),
('file-5', now, mode, NO_COMPRESSION_32, (b'a' * 10000, b'b' * 10000)),
('file-6', now, mode, NO_COMPRESSION_32(20000, 2664091433), (b'a' * 10000, b'b' * 10000)),
)

assert b''.join(stream_zip(files, password=password)) != b''.join(stream_zip(files, password=password))

0 comments on commit 4810da2

Please sign in to comment.