Skip to content

Commit

Permalink
feat: allow disable of extended timestamps
Browse files Browse the repository at this point in the history
  • Loading branch information
michalc committed Jul 15, 2023
1 parent 6f80700 commit af58338
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
12 changes: 12 additions & 0 deletions docs/advanced-usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,15 @@ for zipped_chunk in stream_zip(unzipped_files(), chunk_size=65536):
This one size is used both for input - splitting or gathering any uncompressed data into `chunk_size` bytes before attempting to compress it, and in output - splitting or gathering any compressed data into `chunk_size` bytes before returning it to client code.

There may be performance differences with a different `chunk_size` values. The default chunk_size may not be optimal for your use case.


## Without extended timestamps

By default so-called extended timestamps are included in the ZIP, which store the modification time of member files more accurately than the original ZIP format allows. To omit the extended timestamps, you can pass `extended_timestamps=False` to `stream_zip`.

```python
for zipped_chunk in stream_zip(unzipped_files(), extended_timestamps=False):
print(zipped_chunk)
```

This is useful to keep the total number of bytes down as much as possible. This is also useful when creating Open Document files using `stream_zip`. Open Document files cannot have extended timestamps in their member files if they are to pass validation.
4 changes: 2 additions & 2 deletions stream_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def method_compressobj(offset, default_get_compressobj):
return method_compressobj


def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9)):
def stream_zip(files, chunk_size=65536, get_compressobj=lambda: zlib.compressobj(wbits=-zlib.MAX_WBITS, level=9), extended_timestamps=True):

def evenly_sized(chunks):
chunk = b''
Expand Down Expand Up @@ -398,7 +398,7 @@ def _chunks():
5, # Size of extra
b'\x01', # Only modification time (as opposed to also other times)
int(modified_at.timestamp()),
)
) if extended_timestamps else b''
external_attr = \
(mode << 16) | \
(0x10 if name_encoded[-1:] == b'/' else 0x0) # MS-DOS directory
Expand Down
34 changes: 34 additions & 0 deletions test_stream_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,3 +984,37 @@ def test_unzip_modification_time(method, timezone, modified_at):
subprocess.run(['unzip', f'{d}/test.zip', '-d', d], env={'TZ': timezone})

assert os.path.getmtime('my_file') == int(modified_at.timestamp())


@pytest.mark.parametrize(
"method",
[
ZIP_32,
ZIP_64,
NO_COMPRESSION_64,
NO_COMPRESSION_32,
],
)
@pytest.mark.parametrize(
"timezone,modified_at,expected_modified_at",
[
('UTC+1', datetime(2011, 1, 1, 1, 2, 3, 123), datetime(2011, 1, 1, 2, 2, 2, 0)),
],
)
def test_unzip_modification_time_extended_timestamps_disabled(method, timezone, modified_at, expected_modified_at):
member_files = (
('my_file', modified_at, stat.S_IFREG | 0o600, method, ()),
)
zipped_chunks = stream_zip(member_files, extended_timestamps=False)

with \
TemporaryDirectory() as d, \
cwd(d): \

with open('test.zip', 'wb') as fp:
for zipped_chunk in zipped_chunks:
fp.write(zipped_chunk)

subprocess.run(['unzip', f'{d}/test.zip', '-d', d], env={'TZ': timezone})

assert os.path.getmtime('my_file') == expected_modified_at.timestamp()

0 comments on commit af58338

Please sign in to comment.