diff --git a/build.zig b/build.zig index 5ede94e..96264b6 100644 --- a/build.zig +++ b/build.zig @@ -69,6 +69,16 @@ pub fn build(b: *std.Build) !void { const install = b.addInstallArtifact(unzip, .{}); b.step("unzip", "Build/install the unzip cmdline tool").dependOn(&install.step); } + { + const zip = b.addExecutable(.{ + .name = "zip", + .root_source_file = b.path("zip.zig"), + .target = target, + .optimize = optimize, + }); + const install = b.addInstallArtifact(zip, .{}); + b.step("zip", "Build/install the zip cmdline tool").dependOn(&install.step); + } } fn addTest( diff --git a/lib/zip.zig b/lib/zip.zig index 2bfc1fe..f18abd8 100644 --- a/lib/zip.zig +++ b/lib/zip.zig @@ -29,7 +29,9 @@ pub const ExtraHeader = enum(u16) { const GeneralPurposeFlags = packed struct(u16) { encrypted: bool, - _: u15, + _0: u2 = 0, + local_header_zero_fields: bool, + _1: u12 = 0, }; pub const LocalFileHeader = extern struct { @@ -192,7 +194,7 @@ pub fn decompress( return hash.final(); } -fn isBadFilename(filename: []const u8) bool { +pub fn isBadFilename(filename: []const u8) bool { if (filename.len == 0 or filename[0] == '/') return true; diff --git a/unzip.zig b/unzip.zig index 31622f5..264827c 100644 --- a/unzip.zig +++ b/unzip.zig @@ -10,7 +10,7 @@ fn fatal(comptime fmt: []const u8, args: anytype) noreturn { } fn usage() noreturn { - std.log.err("Usage: unzip [-d DIR] ZIP_FILE", .{}); + std.io.getStdErr().writer().print("Usage: unzip [-d DIR] ZIP_FILE\n", .{}) catch |e| @panic(@errorName(e)); std.process.exit(1); } @@ -78,7 +78,7 @@ pub fn main() !void { const zip_file = std.fs.cwd().openFile(zip_file_arg, .{}) catch |err| fatal("open '{s}' failed: {s}", .{zip_file_arg, @errorName(err)}); defer zip_file.close(); - try @import("zip.zig").extract(out_dir, zip_file.seekableStream(), .{ + try @import("lib/zip.zig").extract(out_dir, zip_file.seekableStream(), .{ .allow_backslashes = true, }); } diff --git a/zip.zig b/zip.zig new file mode 100644 index 0000000..1ba13f9 --- /dev/null +++ b/zip.zig @@ -0,0 +1,362 @@ +const builtin = @import("builtin"); +const std = @import("std"); +const zip = @import("lib/zip.zig"); + +fn oom(e: error{OutOfMemory}) noreturn { + @panic(@errorName(e)); +} +fn fatal(comptime fmt: []const u8, args: anytype) noreturn { + std.log.err(fmt, args); + std.process.exit(0xff); +} + +fn usage() noreturn { + std.io.getStdErr().writer().writeAll( + "Usage: zip [-options] ZIP_FILE FILES/DIRS..\n", + ) catch |e| @panic(@errorName(e)); + std.process.exit(1); +} + +var windows_args_arena = if (builtin.os.tag == .windows) + std.heap.ArenaAllocator.init(std.heap.page_allocator) else struct{}{}; +pub fn cmdlineArgs() [][*:0]u8 { + if (builtin.os.tag == .windows) { + const slices = std.process.argsAlloc(windows_args_arena.allocator()) catch |err| switch (err) { + error.OutOfMemory => oom(error.OutOfMemory), + //error.InvalidCmdLine => @panic("InvalidCmdLine"), + error.Overflow => @panic("Overflow while parsing command line"), + }; + const args = windows_args_arena.allocator().alloc([*:0]u8, slices.len - 1) catch |e| oom(e); + for (slices[1..], 0..) |slice, i| { + args[i] = slice.ptr; + } + return args; + } + return std.os.argv.ptr[1 .. std.os.argv.len]; +} + +pub fn main() !void { + var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + const cmd_args = blk: { + const cmd_args = cmdlineArgs(); + var arg_index: usize = 0; + var non_option_len: usize = 0; + while (arg_index < cmd_args.len) : (arg_index += 1) { + const arg = std.mem.span(cmd_args[arg_index]); + if (!std.mem.startsWith(u8, arg, "-")) { + cmd_args[non_option_len] = arg; + non_option_len += 1; + } else { + fatal("unknown cmdline option '{s}'", .{arg}); + } + } + break :blk cmd_args[0 .. non_option_len]; + }; + + if (cmd_args.len < 2) usage(); + const zip_file_arg = std.mem.span(cmd_args[0]); + const paths_to_include = cmd_args[1..]; + + // expand cmdline arguments to a list of files + var files: std.ArrayListUnmanaged(struct { + path: []const u8, + size: u64, + }) = .{}; + for (paths_to_include) |path_ptr| { + const path = std.mem.span(path_ptr); + const stat = std.fs.cwd().statFile(path) catch |err| switch (err) { + error.FileNotFound => fatal("path '{s}' is not found", .{path}), + else => |e| return e, + }; + switch (stat.kind) { + .directory => { + @panic("todo: directories"); + }, + .file => { + if (zip.isBadFilename(path)) + fatal("filename '{s}' is invalid for zip files", .{path}); + try files.append(arena, .{ + .path = path, + .size = stat.size, + }); + }, + .sym_link => fatal("todo: symlinks", .{}), + .block_device, + .character_device, + .named_pipe, + .unix_domain_socket, + .whiteout, + .door, + .event_port, + .unknown => fatal("file '{s}' is an unsupported type {s}", .{path, @tagName(stat.kind)}), + } + } + + const zip_file = std.fs.cwd().createFile(zip_file_arg, .{}) catch |err| + fatal("open '{s}' failed: {s}", .{zip_file_arg, @errorName(err)}); + defer zip_file.close(); + + const store = try arena.alloc(FileStore, files.items.len); + // no need to free + + var zipper = initZipper(zip_file.writer()); + for (files.items, 0..) |file_entry, i| { + const file_offset = zipper.counting_writer.bytes_written; + + const compression: zip.CompressionMethod = .deflate; + + try zipper.writeFileHeader(file_entry.path, compression); + + var file = try std.fs.cwd().openFile(file_entry.path, .{}); + defer file.close(); + + var crc32: u32 = undefined; + + var compressed_size = file_entry.size; + switch (compression) { + .store => { + var hash = std.hash.Crc32.init(); + var full_rw_buf: [std.mem.page_size]u8 = undefined; + var remaining = file_entry.size; + while (remaining > 0) { + const buf = full_rw_buf[0 .. @min(remaining, full_rw_buf.len)]; + const read_len = try file.reader().read(buf); + std.debug.assert(read_len == buf.len); + hash.update(buf); + try zipper.counting_writer.writer().writeAll(buf); + remaining -= buf.len; + } + crc32 = hash.final(); + }, + .deflate => { + const start_offset = zipper.counting_writer.bytes_written; + //var fbs = std.io.fixedBufferStream(opt.content); + var br = std.io.bufferedReader(file.reader()); + var cr = Crc32Reader(@TypeOf(br.reader())){ .underlying_reader = br.reader() }; + + try std.compress.flate.deflate.compress( + .raw, + cr.reader(), + zipper.counting_writer.writer(), + .{}, + ); + if (br.end != br.start) fatal("deflate compressor didn't read all data", .{}); + compressed_size = zipper.counting_writer.bytes_written - start_offset; + crc32 = cr.crc32.final(); + }, + else => @panic("codebug"), + } + store[i] = .{ + .file_offset = file_offset, + .compression = compression, + .uncompressed_size = @intCast(file_entry.size), + .crc32 = crc32, + .compressed_size = @intCast(compressed_size), + }; + } + for (files.items, 0..) |file, i| { + try zipper.writeCentralRecord(store[i], .{ + .name = file.path, + }); + } + try zipper.writeEndRecord(); +} + +pub fn Crc32Reader(comptime ReaderType: type) type { + return struct { + underlying_reader: ReaderType, + crc32: std.hash.Crc32 = std.hash.Crc32.init(), + + pub const Error = ReaderType.Error; + pub const Reader = std.io.Reader(*Self, Error, read); + + const Self = @This(); + + pub fn read(self: *Self, dest: []u8) Error!usize { + const len = try self.underlying_reader.read(dest); + self.crc32.update(dest[0..len]); + return len; + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + }; +} + + +// Used to store any data from writing a file to the zip archive that's needed +// when writing the corresponding central directory record. +pub const FileStore = struct { + file_offset: u64, + compression: zip.CompressionMethod, + uncompressed_size: u32, + crc32: u32, + compressed_size: u32, +}; + +pub fn initZipper(writer: anytype) Zipper(@TypeOf(writer)) { + return .{ .counting_writer = std.io.countingWriter(writer) }; +} + +fn Zipper(comptime Writer: type) type { + return struct { + counting_writer: std.io.CountingWriter(Writer), + central_count: u64 = 0, + first_central_offset: ?u64 = null, + last_central_limit: ?u64 = null, + + const Self = @This(); + + pub fn writeFileHeader( + self: *Self, + name: []const u8, + compression: zip.CompressionMethod, + ) !void { + const writer = self.counting_writer.writer(); + const hdr: zip.LocalFileHeader = .{ + .signature = zip.local_file_header_sig, + .version_needed_to_extract = 10, + .flags = .{ .encrypted = false, .local_header_zero_fields = true }, + .compression_method = compression, + .last_modification_time = 0, + .last_modification_date = 0, + .crc32 = 0, + .compressed_size = 0, + .uncompressed_size = 0, + .filename_len = @intCast(name.len), + .extra_len = 0, + }; + try writeStructEndian(writer, hdr, .little); + try writer.writeAll(name); + +// var compressed_size: u32 = undefined; +// switch (opt.compression) { +// .store => { +// try writer.writeAll(opt.content); +// compressed_size = @intCast(opt.content.len); +// }, +// .deflate => { +// const offset = self.counting_writer.bytes_written; +// var fbs = std.io.fixedBufferStream(opt.content); +// try std.compress.flate.deflate.compress(.raw, fbs.reader(), writer, .{}); +// std.debug.assert(fbs.pos == opt.content.len); +// compressed_size = @intCast(self.counting_writer.bytes_written - offset); +// }, +// else => unreachable, +// } +// return .{ +// .compression = opt.compression, +// .file_offset = file_offset, +// .crc32 = crc32, +// .compressed_size = compressed_size, +// .uncompressed_size = opt.content.len, +// }; + } + + pub fn writeCentralRecord( + self: *Self, + store: FileStore, + opt: struct { + name: []const u8, + version_needed_to_extract: u16 = 10, + }, + ) !void { + if (self.first_central_offset == null) { + self.first_central_offset = self.counting_writer.bytes_written; + } + self.central_count += 1; + + const hdr: zip.CentralDirectoryFileHeader = .{ + .signature = zip.central_file_header_sig, + .version_made_by = 0, + .version_needed_to_extract = opt.version_needed_to_extract, + .flags = .{ .encrypted = false, .local_header_zero_fields = true }, + .compression_method = store.compression, + .last_modification_time = 0, + .last_modification_date = 0, + .crc32 = store.crc32, + .compressed_size = store.compressed_size, + .uncompressed_size = @intCast(store.uncompressed_size), + .filename_len = @intCast(opt.name.len), + .extra_len = 0, + .comment_len = 0, + .disk_number = 0, + .internal_file_attributes = 0, + .external_file_attributes = 0, + .local_file_header_offset = @intCast(store.file_offset), + }; + try writeStructEndian(self.counting_writer.writer(), hdr, .little); + try self.counting_writer.writer().writeAll(opt.name); + self.last_central_limit = self.counting_writer.bytes_written; + } + + pub fn writeEndRecord(self: *Self) !void { + const cd_offset = self.first_central_offset orelse 0; + const cd_end = self.last_central_limit orelse 0; + const hdr: zip.EndRecord = .{ + .signature = zip.end_record_sig, + .disk_number = 0, + .central_directory_disk_number = 0, + .record_count_disk = @intCast(self.central_count), + .record_count_total = @intCast(self.central_count), + .central_directory_size = @intCast(cd_end - cd_offset), + .central_directory_offset = @intCast(cd_offset), + .comment_len = 0, + }; + try writeStructEndian(self.counting_writer.writer(), hdr, .little); + } + }; +} + +const native_endian = @import("builtin").target.cpu.arch.endian(); + +fn writeStructEndian(writer: anytype, value: anytype, endian: std.builtin.Endian) anyerror!void { + // TODO: make sure this value is not a reference type + if (native_endian == endian) { + return writer.writeStruct(value); + } else { + var copy = value; + byteSwapAllFields(@TypeOf(value), ©); + return writer.writeStruct(copy); + } +} +pub fn byteSwapAllFields(comptime S: type, ptr: *S) void { + switch (@typeInfo(S)) { + .Struct => { + inline for (std.meta.fields(S)) |f| { + switch (@typeInfo(f.type)) { + .Struct => |struct_info| if (struct_info.backing_integer) |Int| { + @field(ptr, f.name) = @bitCast(@byteSwap(@as(Int, @bitCast(@field(ptr, f.name))))); + } else { + byteSwapAllFields(f.type, &@field(ptr, f.name)); + }, + .Array => byteSwapAllFields(f.type, &@field(ptr, f.name)), + .Enum => { + @field(ptr, f.name) = @enumFromInt(@byteSwap(@intFromEnum(@field(ptr, f.name)))); + }, + else => { + @field(ptr, f.name) = @byteSwap(@field(ptr, f.name)); + }, + } + } + }, + .Array => { + for (ptr) |*item| { + switch (@typeInfo(@TypeOf(item.*))) { + .Struct, .Array => byteSwapAllFields(@TypeOf(item.*), item), + .Enum => { + item.* = @enumFromInt(@byteSwap(@intFromEnum(item.*))); + }, + else => { + item.* = @byteSwap(item.*); + }, + } + } + }, + else => @compileError("byteSwapAllFields expects a struct or array as the first argument"), + } +}