From a440bee9074985a963475cc3f1139510ab84c057 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sat, 29 Jul 2023 16:36:16 +0800 Subject: [PATCH 01/34] Fix ddoc of generateRandomData --- test/util.d | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/util.d b/test/util.d index cc22cd0..898c218 100644 --- a/test/util.d +++ b/test/util.d @@ -287,8 +287,7 @@ unittest assert(getSize(dm.path) == len); } -/// Generate potentially large but repetitive data constituted of the same phrase repeated -/// over and over until byteSize is written out. +/// Generate potentially very large amount of binary random data until byteSize is written out auto generateRandomData(size_t byteSize, uint seed = unpredictableSeed(), size_t chunkSize = 8192) { auto eng = Random(seed); From d614d52468556b529a16c08cc810834978938142 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sat, 29 Jul 2023 16:36:37 +0800 Subject: [PATCH 02/34] DontDeleteMe --- test/util.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/util.d b/test/util.d index 898c218..ca2cb41 100644 --- a/test/util.d +++ b/test/util.d @@ -83,7 +83,7 @@ struct DeleteMe } // used in place of DeleteMe if needed to inspect the file after the test -struct Path +struct DontDeleteMe { this(string basename, string ext) { From e521c9d42b4518178190c585a5a9f0ae0d85ff90 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sat, 29 Jul 2023 16:37:37 +0800 Subject: [PATCH 03/34] test.util.findProgram --- test/util.d | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/util.d b/test/util.d index ca2cb41..909bda1 100644 --- a/test/util.d +++ b/test/util.d @@ -50,6 +50,51 @@ string testPath(Args...)(Args args) return buildNormalizedPath(__FILE_FULL_PATH__.dirName(), args); } +/// Find a program executable name in the system PATH and return its full path +string findProgram(in string name) +{ + import std.process : environment; + + version (Windows) + { + import std.algorithm : endsWith; + + const efn = name.endsWith(".exe") ? name : name ~ ".exe"; + } + else + { + const efn = name; + } + + return searchInEnvPath(environment["PATH"], efn); +} + +/// environment variable path separator +version (Posix) + enum envPathSep = ':'; +else version (Windows) + enum envPathSep = ';'; +else + static assert(false); + +/// Search for filename in the envPath variable content which can +/// contain multiple paths separated with sep depending on platform. +/// Returns: null if the file can't be found. +string searchInEnvPath(in string envPath, in string filename, in char sep = envPathSep) +{ + import std.algorithm : splitter; + import std.file : exists; + import std.path : buildPath; + + foreach (dir; splitter(envPath, sep)) + { + const filePath = buildPath(dir, filename); + if (exists(filePath)) + return filePath; + } + return null; +} + /// Defines a path in a temporary location /// and delete the file or directory (recursively) at that path when going out of scope. struct DeleteMe From dc4091c0a1cf5727656b8431e726f8301e66a098 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sat, 29 Jul 2023 16:38:07 +0800 Subject: [PATCH 04/34] increase test timeout (for windows) --- meson.build | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 180c3c4..5f0167f 100644 --- a/meson.build +++ b/meson.build @@ -80,5 +80,7 @@ if get_option('enable_test') d_module_versions: squiz_ver, ) - test('unit tests', squiz_test_exe) + test('unit tests', squiz_test_exe, + timeout: 120, + ) endif \ No newline at end of file From fa929cd19bb157a3b9ba1b49abf5706dadeeaae4 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sat, 29 Jul 2023 16:40:45 +0800 Subject: [PATCH 05/34] skipping assertions if program not found --- test/archive.d | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/archive.d b/test/archive.d index 9616684..c59b46d 100644 --- a/test/archive.d +++ b/test/archive.d @@ -3,9 +3,10 @@ module test.archive; import test.util; import squiz_box; -import std.typecons; import std.digest; import std.digest.sha; +import std.stdio; +import std.typecons; string[] filesForArchive() { @@ -24,6 +25,12 @@ void testTarArchiveContent(string archivePath, Flag!"testModes" testModes, Flag! import std.regex : matchFirst; import std.string : splitLines; + if (!findProgram("tar")) + { + stderr.writeln("tar not found: skipping assertions"); + return; + } + if (testModes) { const line1 = `^-rw-r--r-- .+ 7 .+ file1.txt$`; @@ -70,6 +77,12 @@ void testZipArchiveContent(string archivePath) import std.regex : matchFirst; import std.string : splitLines; + if (!findProgram("unzip")) + { + stderr.writeln("unzip not found: skipping assertions"); + return; + } + const line1 = `^\s*7\s.+file1.txt$`; const line2 = `^\s*3521\s.+file 2.txt$`; const line3 = `^\s*26\s.+folder/chmod 666.txt$`; From 4d8d035b5677830f7b14d0d6ddc2129ee1dd9732 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sat, 29 Jul 2023 16:41:11 +0800 Subject: [PATCH 06/34] no need of sha1sum and non portable shell in tests --- test/archive.d | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/test/archive.d b/test/archive.d index c59b46d..f7c89c5 100644 --- a/test/archive.d +++ b/test/archive.d @@ -95,25 +95,19 @@ void testZipArchiveContent(string archivePath) assert(matchFirst(lines[4], line2)); assert(matchFirst(lines[5], line3)); - const archiveShell = escapeShellFileName(archivePath); - auto sha1sumFile(string filename) { - const fileShell = escapeShellFileName(filename); - return executeShell("unzip -p " ~ archiveShell ~ " " ~ fileShell ~ " | sha1sum"); + return sha1sumProcessStdout(["unzip", "-p", archivePath, filename]); } - res = sha1sumFile("file1.txt"); - assert(res.status == 0); - assert(res.output.canFind("38505a984f71c07843a5f3e394ada2bf4c7b6abc")); + auto sha1 = sha1sumFile("file1.txt"); + assert(sha1 == "38505A984F71C07843A5F3E394ADA2BF4C7B6ABC"); - res = sha1sumFile("file 2.txt"); - assert(res.status == 0); - assert(res.output.canFind("01fa4c5c29a58449eef1665658c48c0d7829c45f")); + sha1 = sha1sumFile("file 2.txt"); + assert(sha1 == "01FA4C5C29A58449EEF1665658C48C0D7829C45F"); - res = sha1sumFile("folder/chmod 666.txt"); - assert(res.status == 0); - assert(res.output.canFind("3e31b8e6b2bbba1edfcfdca886e246c9e120bbe3")); + sha1 = sha1sumFile("folder/chmod 666.txt"); + assert(sha1 == "3E31B8E6B2BBBA1EDFCFDCA886E246C9E120BBE3"); } void testExtractedFiles(DM)(auto ref DM dm, Flag!"mode666" mode666) From 7518998b46bac457a718617f6d9528a0da299ea9 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 30 Jul 2023 01:50:54 +0800 Subject: [PATCH 07/34] dfmt --- src/squiz_box/box/tar.d | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index d338472..6c0fd82 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -20,7 +20,8 @@ struct TarAlgo return TarBox!I(entries, chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto dataInput = new ByteRangeCursor!I(input); return TarUnbox(dataInput, removePrefix); @@ -39,7 +40,8 @@ struct TarGzAlgo return TarBox!I(entries, chunkSize).deflateGz(chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto ii = input.inflateGz(); alias II = typeof(ii); @@ -62,7 +64,8 @@ version (HaveSquizBzip2) return TarBox!I(entries, chunkSize).compressBzip2(chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto ii = input.decompressBzip2(); alias II = typeof(ii); @@ -86,7 +89,8 @@ version (HaveSquizLzma) return TarBox!I(entries, chunkSize).compressXz(chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto ii = input.decompressXz(); alias II = typeof(ii); @@ -133,7 +137,8 @@ version (HaveSquizLzma) } /// Returns a range of entries from a `.tar`, `.tar.gz`, `.tar.bz2` or `.tar.xz` formatted byte range -auto unboxTar(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) +auto unboxTar(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto dataInput = new ByteRangeCursor!I(input); return TarUnbox(dataInput, removePrefix); From f86dbbcb0cbbc95458dfe2e52c3f527151df8444 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 30 Jul 2023 02:34:57 +0800 Subject: [PATCH 08/34] allow recursive TAR header processing prepare ground for #17 --- src/squiz_box/box/tar.d | 137 ++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 53 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 6c0fd82..8e37bb1 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -305,9 +305,8 @@ private struct TarUnbox _removePrefix = removePrefix; _block = new ubyte[512]; - // file with zero bytes is a valid tar file if (!_input.eoi) - readHeaderBlock(); + popFront(); } @property bool empty() @@ -324,6 +323,8 @@ private struct TarUnbox { assert(_input.pos <= _next); + _entry = null; + if (_input.pos < _next) { // the current entry was not fully read, we move the stream forward @@ -331,15 +332,46 @@ private struct TarUnbox const dist = _next - _input.pos; _input.ffw(dist); } - readHeaderBlock(); + + auto info = readHeaderBlock(); + + if (info.isNull) + { + while (!_input.eoi) + _input.ffw(512); + return; + } + + if (_removePrefix) + { + import std.algorithm : min; + + const pref = enforce(entryPrefix(info.path, info.type), format!`"%s": no prefix to be removed`( + info.path)); + + if (!_prefix) + _prefix = pref; + + enforce(_prefix == pref, format!`"%s": path prefix mismatch with "%s"`(info.path, _prefix)); + + const len = min(info.path.length, _prefix.length); + info.path = info.path[len .. $]; + + // skipping empty directory + if (!info.path.length && info.type == EntryType.directory) + { + _next = next512(_input.pos + info.size); + info = readHeaderBlock(); + } + } + + _entry = new TarUnboxEntry(_input, info); + _next = next512(_input.pos + info.size); } - private void readHeaderBlock() + private TarEntryInfo readHeaderBlock() { - import std.conv : to; - enforce(_input.read(_block).length == 512, "Unexpected end of input"); - TarHeader* th = cast(TarHeader*) _block.ptr; const computed = th.unsignedChecksum(); @@ -350,38 +382,57 @@ private struct TarUnbox // this is an empty header (only zeros) // indicates end of archive - while (!_input.eoi) - { - _input.ffw(512); - } - return; + // dfmt off + TarEntryInfo info = { + isNull: true, + }; + // dfmt on + return info; } enforce( checksum == computed, - "Invalid TAR checksum at 0x" ~ ( - _input.pos - 512 + th.chksum.offsetof) - .to!string(16) ~ - "\nExpected " ~ computed.to!string ~ " but found " ~ checksum.to!string, + format!"Invalid TAR checksum at 0x%08X\nExpected 0x%08x but found 0x%08x"( + _input.pos - 512 + th.chksum.offsetof, + computed, checksum) ); - if (th.typeflag == Typeflag.posixExtended || th.typeflag == Typeflag.extended) + switch (th.typeflag) { - // skipping extended Tar headers - const sz = next512(parseOctalString!size_t(th.size)); - _input.ffw(sz); - readHeaderBlock(); - return; + case Typeflag.normalNul: + case Typeflag.normal: + case Typeflag.hardLink: + case Typeflag.symLink: + case Typeflag.charSpecial: + case Typeflag.blockSpecial: + case Typeflag.directory: + case Typeflag.fifo: + case Typeflag.contiguousFile: + case Typeflag.posixExtended: + case Typeflag.extended: + return processHeader(th); + default: + const prefix = parseString(th.prefix).idup; + const name = parseString(th.name).idup; + const msg = format!"Unknown TAR typeflag: '%s'\nWhen extracting \"%s\"."( + cast(char)th.typeflag, prefix ~ name + ); + throw new Exception(msg); } + } - TarEntryInfo info; - info.path = (parseString(th.prefix) ~ parseString(th.name)).idup; - info.type = toEntryType(th.typeflag); - info.linkname = parseString(th.linkname).idup; - info.size = parseOctalString!size_t(th.size); + private TarEntryInfo processHeader(scope TarHeader* th) + { + TarEntryInfo info = { + path: (parseString(th.prefix) ~ parseString(th.name)).idup, + type: toEntryType(th.typeflag), + linkname: parseString(th.linkname).idup, + size: parseOctalString!size_t(th.size), + timeLastModified: SysTime(unixTimeToStdTime(parseOctalString!ulong(th.mtime))), + }; info.entrySize = 512 + next512(info.size); - info.timeLastModified = SysTime(unixTimeToStdTime(parseOctalString!ulong(th.mtime))); - version (Posix) + + version(Posix) { // tar mode contains stat.st_mode & 07777. // we have to add the missing flags corresponding to file type @@ -395,30 +446,7 @@ private struct TarUnbox version (Windows) info.path = info.path.replace('\\', '/'); - if (_removePrefix) - { - import std.algorithm : min; - - const pref = enforce(entryPrefix(info.path, info.type), format!`"%s": no prefix to be removed`(info.path)); - - if (!_prefix) - _prefix = pref; - - enforce (_prefix == pref, format!`"%s": path prefix mismatch with "%s"`(info.path, _prefix)); - - const len = min(info.path.length, _prefix.length); - info.path = info.path[len .. $]; - - // skipping empty directory - if (!info.path.length && info.type == EntryType.directory) - { - _next = next512(_input.pos + info.size); - readHeaderBlock(); - } - } - - _entry = new TarUnboxEntry(_input, info); - _next = next512(_input.pos + info.size); + return info; } } @@ -439,6 +467,9 @@ struct TarEntryInfo int ownerId; int groupId; } + + // marker for null header + bool isNull; } private class TarUnboxEntry : UnboxEntry From ae5e6eef92e7c14c1d208d8ad1165d61f5a5241d Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 30 Jul 2023 12:24:50 +0800 Subject: [PATCH 09/34] fix Cursor.read template version --- src/squiz_box/priv.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squiz_box/priv.d b/src/squiz_box/priv.d index f9cbe73..85df6e6 100644 --- a/src/squiz_box/priv.d +++ b/src/squiz_box/priv.d @@ -75,7 +75,7 @@ interface Cursor T[] read(T)(T[] buffer) { - auto ptr = cast(ubyte)&buffer[0]; + auto ptr = cast(ubyte*)&buffer[0]; auto arr = ptr[0 .. buffer.length * T.sizeof]; auto res = read(arr); enforce(res.length % T.sizeof == 0, "Could not read aligned bytes for " ~ T.stringof); From 35634430e0234b560b45b71b2047f22a00941f0e Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 30 Jul 2023 12:25:43 +0800 Subject: [PATCH 10/34] handle tar gnulong name fix #17 --- src/squiz_box/box/tar.d | 105 ++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 30 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 8e37bb1..d04e65b 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -294,7 +294,6 @@ private struct TarUnbox // current header data private size_t _next; - private ubyte[] _block; private UnboxEntry _entry; private Flag!"removePrefix" _removePrefix; private string _prefix; @@ -303,7 +302,6 @@ private struct TarUnbox { _input = input; _removePrefix = removePrefix; - _block = new ubyte[512]; if (!_input.eoi) popFront(); @@ -344,24 +342,13 @@ private struct TarUnbox if (_removePrefix) { - import std.algorithm : min; - - const pref = enforce(entryPrefix(info.path, info.type), format!`"%s": no prefix to be removed`( - info.path)); - - if (!_prefix) - _prefix = pref; - - enforce(_prefix == pref, format!`"%s": path prefix mismatch with "%s"`(info.path, _prefix)); - - const len = min(info.path.length, _prefix.length); - info.path = info.path[len .. $]; + info.name = removePrefix(info.name, info.type); // skipping empty directory - if (!info.path.length && info.type == EntryType.directory) + while (!info.name.length && info.type == EntryType.directory) { - _next = next512(_input.pos + info.size); info = readHeaderBlock(); + info.name = removePrefix(info.name, info.type); } } @@ -369,10 +356,28 @@ private struct TarUnbox _next = next512(_input.pos + info.size); } + private string removePrefix(string name, EntryType type) + { + import std.algorithm : min; + + const pref = enforce(entryPrefix(name, type), format!`"%s": no prefix to be removed`( + name)); + + if (!_prefix) + _prefix = pref; + + enforce(_prefix == pref, format!`"%s": path prefix mismatch with "%s"`(name, _prefix)); + + const len = min(name.length, _prefix.length); + name = name[len .. $]; + + return name; + } + private TarEntryInfo readHeaderBlock() { - enforce(_input.read(_block).length == 512, "Unexpected end of input"); - TarHeader* th = cast(TarHeader*) _block.ptr; + TarHeader th; + _input.readValue(&th); const computed = th.unsignedChecksum(); const checksum = parseOctalString(th.chksum); @@ -410,12 +415,15 @@ private struct TarUnbox case Typeflag.contiguousFile: case Typeflag.posixExtended: case Typeflag.extended: - return processHeader(th); + return processHeader(&th); + case Typeflag.gnuLongname: + case Typeflag.gnuLonglink: + return processGnuLongHeader(&th); default: const prefix = parseString(th.prefix).idup; const name = parseString(th.name).idup; const msg = format!"Unknown TAR typeflag: '%s'\nWhen extracting \"%s\"."( - cast(char)th.typeflag, prefix ~ name + cast(char) th.typeflag, prefix ~ "/" ~ name ); throw new Exception(msg); } @@ -424,7 +432,7 @@ private struct TarUnbox private TarEntryInfo processHeader(scope TarHeader* th) { TarEntryInfo info = { - path: (parseString(th.prefix) ~ parseString(th.name)).idup, + name: parseString(th.name).idup, type: toEntryType(th.typeflag), linkname: parseString(th.linkname).idup, size: parseOctalString!size_t(th.size), @@ -432,7 +440,7 @@ private struct TarUnbox }; info.entrySize = 512 + next512(info.size); - version(Posix) + version (Posix) { // tar mode contains stat.st_mode & 07777. // we have to add the missing flags corresponding to file type @@ -443,18 +451,53 @@ private struct TarUnbox info.groupId = parseOctalString(th.gid); } + if (th.prefix[0] != '\0') + { + const prefix = parseString(th.prefix).idup; + info.name = prefix ~ "/" ~ info.name; + } + version (Windows) - info.path = info.path.replace('\\', '/'); + { + info.name = info.name.replace('\\', '/'); + } return info; } + + private TarEntryInfo processGnuLongHeader(scope TarHeader* th) + { + const size = parseOctalString(th.size); + auto data = new char[next512(size)]; + enforce(_input.read(data).length == data.length, "Unexpected end of input"); + const name = parseString(assumeUnique(data)); + + auto next = readHeaderBlock(); + + switch (th.typeflag) + { + case Typeflag.gnuLongname: + next.name = name; + break; + case Typeflag.gnuLonglink: + next.linkname = name; + break; + default: + assert(false); + } + + if (next.type == EntryType.directory && !next.name.empty && next.name[$ - 1] == '/') + next.name = next.name[0 .. $ - 1]; + + return next; + } } static assert(isUnboxEntryRange!TarUnbox); struct TarEntryInfo { - string path; + string name; string linkname; EntryType type; ulong size; @@ -496,7 +539,7 @@ private class TarUnboxEntry : UnboxEntry @property string path() { - return _info.path; + return _info.name; } @property EntryType type() @@ -699,6 +742,8 @@ private enum Typeflag : ubyte contiguousFile = '7', posixExtended = 'g', extended = 'x', + gnuLongname = 'L', + gnuLonglink = 'K', } Typeflag toTypeflag(EntryType type) @@ -795,12 +840,12 @@ private T parseOctalString(T = uint)(const(char)[] octal) return parse!(T)(src, 8); } -private char[] parseString(char[] chars) +private inout(char)[] parseString(inout(char)[] chars) { - import core.stdc.string : strlen; - - const len = strlen(chars.ptr); - return chars[0 .. len]; + size_t count; + while (count < chars.length && chars[count] != '\0') + count++; + return chars[0 .. count]; } private size_t next512(size_t off) From 15e3eb91a27415a03a7b42824cb155694f88e6af Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 1 Aug 2023 12:11:51 +0800 Subject: [PATCH 11/34] enhance test suite with unit-threaded assertions and stupid driver --- meson.build | 21 ++- tools/stupid.d.in | 331 +++++++++++++++++++++++++++++++++++++++++++++ tools/stupid_gen.d | 99 ++++++++++++++ 3 files changed, 449 insertions(+), 2 deletions(-) create mode 100644 tools/stupid.d.in create mode 100644 tools/stupid_gen.d diff --git a/meson.build b/meson.build index 5f0167f..7b60b6e 100644 --- a/meson.build +++ b/meson.build @@ -65,6 +65,11 @@ squiz_dep = declare_dependency( ) if get_option('enable_test') + ut_assertions_dep = dependency('unit-threaded:assertions', + method: 'dub', + version: '2.0.3', + ) + squiz_test_src = squiz_src + files([ 'test/archive.d', 'test/compress.d', @@ -72,11 +77,23 @@ if get_option('enable_test') 'test/util.d', ]) - squiz_test_exe = executable('squiz-test', squiz_test_src, + stupid_gen_exe = executable('stupid_gen', 'tools/stupid_gen.d', + d_import_dirs: include_directories('tools'), + ) + squiz_stupid = custom_target('dop_stupid', + capture: true, + output: 'stupid.d', + input: squiz_test_src, + command: [ + stupid_gen_exe, '@INPUT@', + ], + ) + + squiz_test_exe = executable('squiz-test', squiz_stupid, squiz_test_src, d_unittest: true, install: false, include_directories: squiz_inc, - dependencies: squiz_deps, + dependencies: squiz_deps + [ut_assertions_dep], d_module_versions: squiz_ver, ) diff --git a/tools/stupid.d.in b/tools/stupid.d.in new file mode 100644 index 0000000..bdc389e --- /dev/null +++ b/tools/stupid.d.in @@ -0,0 +1,331 @@ +// dfmt off +/* + This is a modified version of silly for Dopamine that allows integration out of DUB. + This file actually is a template used by stupid_gen +*/ +/* + * Silly is a test runner for the D programming language + * + * Report bugs and propose new features in project's repository: https://gitlab.com/AntonMeep/silly + */ + +/* SPDX-License-Identifier: ISC */ +/* Copyright (c) 2018-2019, Anton Fediushin */ + +module stupid; + +version(unittest): + +// static if(!__traits(compiles, () {static import dub_test_root;})) { +// static assert(false, "Couldn't find 'dub_test_root'. Make sure you are running tests with `dub test`"); +// } else { +// static import dub_test_root; +// } + +import core.time : Duration, MonoTime; +import std.ascii : newline; +import std.meta : AliasSeq; +import std.stdio : stdout; + +// TESTED MODULES HERE + +version (NoStupidMain) +{ +} +else +{ + void main() {} +} + +shared static this() { + import core.runtime : Runtime, UnitTestResult; + import std.getopt : getopt; + import std.parallelism : TaskPool, totalCPUs; + + Runtime.extendedModuleUnitTester = () { + bool verbose; + shared ulong passed, failed; + uint threads; + string include, exclude; + + auto args = Runtime.args; + auto getoptResult = args.getopt( + "no-colours", + "Disable colours", + &noColours, + "t|threads", + "Number of worker threads. 0 to auto-detect (default)", + &threads, + "i|include", + "Run tests if their name matches specified regular expression", + &include, + "e|exclude", + "Skip tests if their name matches specified regular expression", + &exclude, + "v|verbose", + "Show verbose output (full stack traces, location and durations)", + &verbose, + ); + + if(getoptResult.helpWanted) { + import std.string : leftJustifier; + + stdout.writefln("Usage:%1$s\tdub test -- %1$s%1$sOptions:", newline); + + foreach(option; getoptResult.options) + stdout.writefln(" %s\t%s\t%s", option.optShort, option.optLong.leftJustifier(20), option.help); + + return UnitTestResult(0, 0, false, false); + } + + if(!threads) + threads = totalCPUs; + + Console.init; + + Test[] tests; + + // Test discovery + foreach(m; allModules) { + import std.traits : fullyQualifiedName; + static if(__traits(isModule, m)) { + alias module_ = m; + } else { + import std.meta : Alias; + // For cases when module contains member of the same name + alias module_ = Alias!(__traits(parent, m)); + } + + // Unittests in the module + foreach(test; __traits(getUnitTests, module_)) + tests ~= Test(fullyQualifiedName!test, getTestName!test, getTestLocation!test, &test); + + // Unittests in structs and classes + foreach(member; __traits(derivedMembers, module_)) + static if(__traits(compiles, __traits(getMember, module_, member)) && + __traits(compiles, __traits(isTemplate, __traits(getMember, module_, member))) && + !__traits(isTemplate, __traits(getMember, module_, member)) && + __traits(compiles, __traits(parent, __traits(getMember, module_, member))) && + __traits(isSame, __traits(parent, __traits(getMember, module_, member)), module_) && + __traits(compiles, __traits(getUnitTests, __traits(getMember, module_, member)))) + foreach(test; __traits(getUnitTests, __traits(getMember, module_, member))) + tests ~= Test(fullyQualifiedName!test, getTestName!test, getTestLocation!test, &test); + } + + auto started = MonoTime.currTime; + + with(new TaskPool(threads-1)) { + import core.atomic : atomicOp; + import std.regex : matchFirst; + + foreach(test; parallel(tests)) { + if((!include && !exclude) || + (include && !(test.fullName ~ " " ~ test.testName).matchFirst(include).empty) || + (exclude && (test.fullName ~ " " ~ test.testName).matchFirst(exclude).empty)) { + auto result = test.executeTest; + result.writeResult(verbose); + + atomicOp!"+="(result.succeed ? passed : failed, 1UL); + } + } + + finish(true); + } + + stdout.writeln; + stdout.writefln("%s: %s passed, %s failed in %d ms", + Console.emphasis("Summary"), + Console.colour(passed, Colour.ok), + Console.colour(failed, failed ? Colour.achtung : Colour.none), + (MonoTime.currTime - started).total!"msecs", + ); + + return UnitTestResult(passed + failed, passed, false, false); + }; +} + +void writeResult(TestResult result, in bool verbose) { + import std.format : formattedWrite; + import std.algorithm : canFind; + import std.range : drop; + import std.string : lastIndexOf, lineSplitter; + + auto writer = stdout.lockingTextWriter; + + writer.formattedWrite(" %s %s %s", + result.succeed + ? Console.colour("✓", Colour.ok) + : Console.colour("✗", Colour.achtung), + Console.emphasis(result.test.fullName[0..result.test.fullName.lastIndexOf('.')].truncateName(verbose)), + result.test.testName, + ); + + if(verbose) { + writer.formattedWrite(" (%.3f ms)", (cast(real) result.duration.total!"usecs") / 10.0f ^^ 3); + + if(result.test.location != TestLocation.init) { + writer.formattedWrite(" [%s:%d:%d]", + result.test.location.file, + result.test.location.line, + result.test.location.column); + } + } + + writer.put(newline); + + foreach(th; result.thrown) { + writer.formattedWrite(" %s thrown from %s on line %d: %s%s", + th.type, + th.file, + th.line, + th.message.lineSplitter.front, + newline, + ); + foreach(line; th.message.lineSplitter.drop(1)) + writer.formattedWrite(" %s%s", line, newline); + + writer.formattedWrite(" --- Stack trace ---%s", newline); + if(verbose) { + foreach(line; th.info) + writer.formattedWrite(" %s%s", line, newline); + } else { + for(size_t i = 0; i < th.info.length && !th.info[i].canFind(__FILE__); ++i) + writer.formattedWrite(" %s%s", th.info[i], newline); + } + } +} + +TestResult executeTest(Test test) { + import core.exception : AssertError, OutOfMemoryError; + auto ret = TestResult(test); + auto started = MonoTime.currTime; + + try { + scope(exit) ret.duration = MonoTime.currTime - started; + test.ptr(); + ret.succeed = true; + } catch(Throwable t) { + if(!(cast(Exception) t || cast(AssertError) t)) + throw t; + + foreach(th; t) { + immutable(string)[] trace; + try { + foreach(i; th.info) + trace ~= i.idup; + } catch(OutOfMemoryError) { // TODO: Actually fix a bug instead of this workaround + trace ~= " Failed to get stack trace, see https://gitlab.com/AntonMeep/silly/issues/31"; + } + + ret.thrown ~= Thrown(typeid(th).name, th.message.idup, th.file, th.line, trace); + } + } + + return ret; +} + +struct TestLocation { + string file; + size_t line, column; +} + +struct Test { + string fullName, + testName; + + TestLocation location; + + void function() ptr; +} + +struct TestResult { + Test test; + bool succeed; + Duration duration; + + immutable(Thrown)[] thrown; +} + +struct Thrown { + string type, + message, + file; + size_t line; + immutable(string)[] info; +} + +__gshared bool noColours; + +enum Colour { + none, + ok = 32, + achtung = 31, +} + +static struct Console { + static void init() { + if(noColours) { + return; + } else { + version(Posix) { + import core.sys.posix.unistd; + noColours = isatty(STDOUT_FILENO) == 0; + } else version(Windows) { + import core.sys.windows.winbase : GetStdHandle, STD_OUTPUT_HANDLE, INVALID_HANDLE_VALUE; + import core.sys.windows.wincon : SetConsoleOutputCP, GetConsoleMode, SetConsoleMode; + import core.sys.windows.windef : DWORD; + import core.sys.windows.winnls : CP_UTF8; + + SetConsoleOutputCP(CP_UTF8); + + auto hOut = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD originalMode; + + // TODO: 4 stands for ENABLE_VIRTUAL_TERMINAL_PROCESSING which should be + // in druntime v2.082.0 + noColours = hOut == INVALID_HANDLE_VALUE || + !GetConsoleMode(hOut, &originalMode) || + !SetConsoleMode(hOut, originalMode | 4); + } + } + } + + static string colour(T)(T t, Colour c = Colour.none) { + import std.conv : text; + + return noColours ? text(t) : text("\033[", cast(int) c, "m", t, "\033[m"); + } + + static string emphasis(string s) { + return noColours ? s : "\033[1m" ~ s ~ "\033[m"; + } +} + +string getTestName(alias test)() { + string name = __traits(identifier, test); + + foreach(attribute; __traits(getAttributes, test)) { + static if(is(typeof(attribute) : string)) { + name = attribute; + break; + } + } + + return name; +} + +string truncateName(string s, bool verbose = false) { + import std.algorithm : max; + import std.string : indexOf; + return s.length > 30 && !verbose + ? s[max(s.indexOf('.', s.length - 30), s.length - 30) .. $] + : s; +} + +TestLocation getTestLocation(alias test)() { + // test if compiler is new enough for getLocation (since 2.088.0) + static if(is(typeof(__traits(getLocation, test)))) + return TestLocation(__traits(getLocation, test)); + else + return TestLocation.init; +} diff --git a/tools/stupid_gen.d b/tools/stupid_gen.d new file mode 100644 index 0000000..b8cb467 --- /dev/null +++ b/tools/stupid_gen.d @@ -0,0 +1,99 @@ +/// Discover unittests and generate stupid test driver +module tools.stupid_gen; + +import std.algorithm; +import std.array; +import std.getopt; +import std.file; +import std.path; +import std.stdio; +import std.string; + +/// return module name of the D file at filename +/// only if it contains "unittest" +string getUnittestMod(string filename) +{ + string mod; + auto file = File(filename, "r"); + foreach (l; file.byLine.map!(l => l.strip)) + { + // reasonable assumption about how module is defined + if (!mod && l.startsWith("module ") && l.endsWith(";")) + { + mod = l["module ".length .. $ - 1].strip().idup; + continue; + } + if (mod && l.canFind("unittest")) + { + return mod; + } + } + return null; +} + +int main(string[] args) +{ + string root = "."; + string[] exclusions; + + auto helpInfo = getopt(args, "root", &root, "exclude", &exclusions); + if (helpInfo.helpWanted) + { + defaultGetoptPrinter("Generate stupid test driver.", helpInfo.options); + return 0; + } + + string[] mods; + + string[] dFiles = args[1 .. $]; + if (args.length == 0) + { + dFiles = dirEntries(root, SpanMode.depth).filter!(f => f.name.endsWith(".d")) + .map!(e => e.name) + .array; + } + + outer: foreach (f; dFiles) + { + foreach (ex; exclusions) + { + if (f.canFind(ex)) + continue outer; + } + + const m = getUnittestMod(f); + if (m) + { + mods ~= m; + } + } + + mods = mods.sort().uniq().array; + + const tmplate = import("stupid.d.in"); + + foreach (inl; lineSplitter(tmplate)) + { + if (!inl.startsWith("// TESTED MODULES HERE")) + { + writeln(inl); + continue; + } + + foreach (m; mods) + { + writefln("import %s;", m); + } + writefln(""); + writefln("alias allModules = AliasSeq!("); + foreach (m; mods) + { + writefln(" %s,", m); + } + writefln(");"); + + } + writefln(""); + + return 0; +} From 8c38054f26c99bf970b0d90c25149b6ae5a0a631 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 1 Aug 2023 12:12:28 +0800 Subject: [PATCH 12/34] test tar.splitPosixPrefixName --- src/squiz_box/box/tar.d | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index d04e65b..b8598f7 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -168,6 +168,60 @@ version (HaveSquizLzma) } } +/// Splits long name into prefix and shorter name if it the name exceeds +/// the length of the tar header name field. +/// If the name is longer than prefix + name fields length, name is returned +/// unchanged. +/// On Windows, the path must be converted to Posix path (with '/' separator) +/// Returns: [prefix, name] +private string[2] splitPosixPrefixName(string name) +{ + if (name.length < TarHeader.name.sizeof) + return [null, name]; + if (name.length > TarHeader.name.sizeof + TarHeader.prefix.sizeof) + return [null, name]; + + foreach (i; 0 .. name.length) + { + if (name[i] == '/') + { + const p = name[0 .. i + 1]; + const n = name[i + 1 .. $]; + if (p.length <= TarHeader.prefix.sizeof && n.length <= TarHeader.name.sizeof) + return [p, n]; + } + } + + return [null, name]; +} + +@("tar.splitPosixPrefixName") +unittest +{ + import unit_threaded.assertions; + + enum shortPath = "some/short/path"; + enum veryLongPath = "some/very/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/path"; + + enum longPath = "some/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/path"; + enum longPrefix = "some/long/long/long/long/long/long/"; + enum longName = "long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/path"; + + static assert(veryLongPath.length > 255); + static assert(longPath.length > 100); + static assert(longPath.length < 155); + + splitPosixPrefixName(shortPath).should == [null, shortPath]; + splitPosixPrefixName(veryLongPath).should == [null, veryLongPath]; + splitPosixPrefixName(longPath).should == [longPrefix, longName]; +} + private struct TarBox(I) { // init data @@ -842,6 +896,7 @@ private T parseOctalString(T = uint)(const(char)[] octal) private inout(char)[] parseString(inout(char)[] chars) { + // function similar to strnlen, but operate on slices. size_t count; while (count < chars.length && chars[count] != '\0') count++; From a45a5010a27ccfc6f754b6d22898b9f74390648f Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 1 Aug 2023 12:13:08 +0800 Subject: [PATCH 13/34] fix locale issue on windows test with libarchive tar --- test/archive.d | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/test/archive.d b/test/archive.d index f7c89c5..d6eac7f 100644 --- a/test/archive.d +++ b/test/archive.d @@ -39,8 +39,20 @@ void testTarArchiveContent(string archivePath, Flag!"testModes" testModes, Flag! `^-rw-rw-rw- .+ 26 .+ folder.+chmod 666.txt$` : `^-rw-r--r-- .+ 26 .+ folder.+chmod 666.txt$`; - auto res = execute(["tar", "-tvf", archivePath]); + auto res = execute(["tar", "-tvf", archivePath], ["MM_CHARSET":"UTF-8"]); assert(res.status == 0); + + version (Windows) + { + import std.encoding : transcode; + + // some tar versions of windows use Latin1 encoding + dchar[] buf; + foreach (char c; res.output) + buf ~= cast(dchar)c; + transcode(buf, res.output); + } + const lines = res.output.splitLines(); assert(lines.length == 3); assert(matchFirst(lines[0], line1)); @@ -413,6 +425,29 @@ version (HaveSquizLzma) testExtractedFiles(dm, Yes.mode666); } + + // @("unbox gnulong tar #17") + // unittest + // { + // import std.net.curl : byChunkAsync; + // import std.algorithm : each; + // import std.file : mkdir; + // import std.range : inputRangeObject; + + // //const url = "https://ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.4187+1ae839cd2.tar.xz"; + + // //const archiveBytes = byChunkAsync(url); + // const filename = testPath("data/zig-linux-x86_64-0.11.0-dev.4187+1ae839cd2.tar.xz"); + // const dm = DontDeleteMe("extraction_site", null); + // mkdir(dm.path); + + // auto algo = boxAlgo(filename); + + // auto entries = readBinaryFile(filename) + // .unbox(algo, Yes.removePrefix); + + // entries.each!((e) { stdout.writeln(e.path); e.extractTo(dm.path); }); + // } } @("Extract squiz-box.zip") From 5d139630f9238fb534c19e8a1d6665748ef00bb6 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 13:49:35 +0200 Subject: [PATCH 14/34] simplify tar octal util --- src/squiz_box/box/tar.d | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index b8598f7..4c723f3 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -714,9 +714,9 @@ private struct TarHeader const uid = file.ownerId; const gid = file.groupId; - toOctalString(file.attributes & octal!7777, th.mode[0 .. $ - 1]); - toOctalString(uid, th.uid[0 .. $ - 1]); - toOctalString(gid, th.gid[0 .. $ - 1]); + toOctalString(file.attributes & octal!7777, th.mode[]); + toOctalString(uid, th.uid[]); + toOctalString(gid, th.gid[]); if (uid != 0) { @@ -744,16 +744,16 @@ private struct TarHeader // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- } - toOctalString(file.size, th.size[0 .. $ - 1]); + toOctalString(file.size, th.size[]); const mtime = file.timeLastModified().toUnixTime!long(); - toOctalString(mtime, th.mtime[0 .. $ - 1]); + toOctalString(mtime, th.mtime[]); th.magic = "ustar\0"; th.version_ = "00"; const chksum = th.unsignedChecksum(); - toOctalString(chksum, th.chksum[0 .. $ - 1]); + toOctalString(chksum, th.chksum[]); return block[512 .. $]; } @@ -875,7 +875,8 @@ private void toOctalString(T)(T val, char[] buf) { import std.format : sformat; - sformat(buf, "%0*o", buf.length, val); + sformat(buf[0 .. $ - 1], "%0*o", buf.length - 1, val); + buf[$ - 1] = '\0'; } private T parseOctalString(T = uint)(const(char)[] octal) From 95589d60edca8d26be93e3bde2a694f9fba59f71 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 13:50:28 +0200 Subject: [PATCH 15/34] TarHeader2 and TarBox2 --- src/squiz_box/box/tar.d | 441 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 420 insertions(+), 21 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 4c723f3..9b17cac 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -15,9 +15,8 @@ struct TarAlgo { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { - return TarBox!I(entries, chunkSize); + return TarBox2!I(entries, chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -35,9 +34,8 @@ struct TarGzAlgo { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { - return TarBox!I(entries, chunkSize).deflateGz(chunkSize); + return TarBox2!I(entries, chunkSize).deflateGz(chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -59,9 +57,8 @@ version (HaveSquizBzip2) { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { - return TarBox!I(entries, chunkSize).compressBzip2(chunkSize); + return TarBox2!I(entries, chunkSize).compressBzip2(chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -84,9 +81,8 @@ version (HaveSquizLzma) { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { - return TarBox!I(entries, chunkSize).compressXz(chunkSize); + return TarBox2!I(entries, chunkSize).compressXz(chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -104,12 +100,10 @@ version (HaveSquizLzma) /// Returns a `.tar`, `.tar.gz`, `.tar.bz2` or `.tar.xz` archive as a byte range /// corresponding to the entries in input. -/// chunkSize must be a multiple of 512. auto boxTar(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) -in (chunkSize >= 512 && chunkSize % 512 == 0) { - return TarBox!I(entries, chunkSize); + return TarBox2!I(entries, chunkSize); } /// ditto @@ -168,26 +162,222 @@ version (HaveSquizLzma) } } +private: + +enum blockLen = 512; +enum nameLen = 100; +enum prefixLen = 155; + +enum char[8] posixMagic = "ustar\x0000"; +enum char[8] gnuMagic = "ustar \x00"; + +struct BlockInfo +{ + static struct Block + { + // dfmt off + char [nameLen] name; // 0 0 + char [8] mode; // 100 64 + char [8] uid; // 108 6C + char [8] gid; // 116 74 + char [12] size; // 124 7C + char [12] mtime; // 136 88 + char [8] chksum; // 148 94 + Typeflag typeflag; // 156 9C + char [nameLen] linkname; // 157 9D + char [8] magic; // 257 101 + char [32] uname; // 265 109 + char [32] gname; // 297 129 + char [8] devmajor; // 329 149 + char [8] devminor; // 337 151 + char [prefixLen] prefix; // 345 159 + char [12] padding; // 500 1F4 + //dfmt on + + private uint checksum() + { + uint sum = 0; + sum += unsignedSum(name); + sum += unsignedSum(mode); + sum += unsignedSum(uid); + sum += unsignedSum(gid); + sum += unsignedSum(size); + sum += unsignedSum(mtime); + sum += 32 * 8; + sum += cast(uint) typeflag; + sum += unsignedSum(linkname); + sum += unsignedSum(magic); + sum += unsignedSum(uname); + sum += unsignedSum(gname); + sum += unsignedSum(devmajor); + sum += unsignedSum(devminor); + sum += unsignedSum(prefix); + return sum; + } + } + + static assert(Block.sizeof == blockLen); + + string name; + uint mode; + int uid; + int gid; + size_t size; + long mtime; + Typeflag typeflag; + string linkname; + char[8] magic; + string uname; + string gname; + int devmajor; + int devminor; + string prefix; + + size_t encode(scope ubyte[] buffer) const + in (buffer.length >= Block.sizeof) + in (name.length <= Block.name.sizeof) + in (linkname.length <= Block.linkname.sizeof) + in (uname.length <= Block.uname.sizeof) + in (gname.length <= Block.gname.sizeof) + in (prefix.length <= Block.prefix.sizeof) + { + buffer[0 .. Block.sizeof] = 0; + Block* blk = cast(Block*)&buffer[0]; + + blk.name[0 .. name.length] = name; + toOctalString(mode, blk.mode[]); + toOctalString(uid, blk.uid[]); + toOctalString(size, blk.size[]); + toOctalString(mtime, blk.mtime[]); + blk.typeflag = typeflag; + blk.linkname[0 .. linkname.length] = linkname; + blk.magic = magic; + blk.uname[0 .. uname.length] = uname; + blk.gname[0 .. gname.length] = gname; + toOctalString(devmajor, blk.devmajor[]); + toOctalString(devminor, blk.devminor[]); + blk.prefix[0 .. prefix.length] = prefix; + + const checksum = blk.checksum(); + toOctalString(checksum, blk.chksum[]); + + return blockLen; + } +} + +void ensureLen(ref ubyte[] buffer, size_t len) +{ + if (buffer.length < len) + buffer.length = len; +} + +size_t encodeLongGnu(ref ubyte[] buffer, size_t offset, string name, Typeflag typeflag) +{ + const l512 = next512(name.length); + buffer.ensureLen(offset + blockLen + l512); + + BlockInfo gnu = { + name: "././@LongLink", + size: name.length, + typeflag: Typeflag.gnuLongname, + magic: gnuMagic, + }; + gnu.encode(buffer[offset .. $]); + + buffer[blockLen .. blockLen + name.length] = name.representation; + buffer[blockLen + name.length .. blockLen + l512] = 0; + + return blockLen + l512; +} + +struct TarHeader2 +{ + string name; + uint attributes; + int uid; + int gid; + size_t size; + SysTime mtime; + string linkname; + EntryType type; + string uname; + string gname; + int devmajor; + int devminor; + + // encode this header in buffer, starting at offset + // buffer can potentially be grown if too small + size_t encode(ref ubyte[] buffer, size_t offset) + { + import std.string : representation; + + size_t encoded; + + string nm = name; + string lk = linkname; + string px; + if (nm.length > nameLen) + { + const pn = splitLongName(nm); + if (pn[0] != null) + { + px = pn[0]; + nm = pn[1]; + } + else + { + encoded += buffer.encodeLongGnu(offset + encoded, nm, Typeflag.gnuLongname); + nm = null; + } + } + if (lk.length > nameLen) + { + encoded += buffer.encodeLongGnu(offset + encoded, lk, Typeflag.gnuLonglink); + lk = null; + } + + buffer.ensureLen(offset + encoded + blockLen); + + BlockInfo blk = { + name: nm, + mode: attributes, + uid: uid, + gid: gid, + size: size, + mtime: mtime.toUnixTime(), + linkname: lk, + typeflag: toTypeflag(type), + uname: uname, + gname: gname, + devmajor: devmajor, + devminor: devminor, + }; + + return encoded + blk.encode(buffer[offset + encoded .. $]); + } + +} + /// Splits long name into prefix and shorter name if it the name exceeds /// the length of the tar header name field. /// If the name is longer than prefix + name fields length, name is returned /// unchanged. /// On Windows, the path must be converted to Posix path (with '/' separator) /// Returns: [prefix, name] -private string[2] splitPosixPrefixName(string name) +string[2] splitLongName(string name) { - if (name.length < TarHeader.name.sizeof) + if (name.length < nameLen) return [null, name]; - if (name.length > TarHeader.name.sizeof + TarHeader.prefix.sizeof) + if (name.length > nameLen + prefixLen) return [null, name]; foreach (i; 0 .. name.length) { if (name[i] == '/') { - const p = name[0 .. i + 1]; + const p = name[0 .. i]; const n = name[i + 1 .. $]; - if (p.length <= TarHeader.prefix.sizeof && n.length <= TarHeader.name.sizeof) + if (p.length <= prefixLen && n.length <= nameLen) return [p, n]; } } @@ -195,7 +385,7 @@ private string[2] splitPosixPrefixName(string name) return [null, name]; } -@("tar.splitPosixPrefixName") +@("tar.splitPrefixName") unittest { import unit_threaded.assertions; @@ -209,7 +399,7 @@ unittest enum longPath = "some/long/long/long/long/long/long/long/long/long/long/long" ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/path"; - enum longPrefix = "some/long/long/long/long/long/long/"; + enum longPrefix = "some/long/long/long/long/long/long"; enum longName = "long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" ~ "/long/long/long/long/path"; @@ -217,11 +407,220 @@ unittest static assert(longPath.length > 100); static assert(longPath.length < 155); - splitPosixPrefixName(shortPath).should == [null, shortPath]; - splitPosixPrefixName(veryLongPath).should == [null, veryLongPath]; - splitPosixPrefixName(longPath).should == [longPrefix, longName]; + splitLongName(shortPath).should == [null, shortPath]; + splitLongName(veryLongPath).should == [null, veryLongPath]; + splitLongName(longPath).should == [longPrefix, longName]; +} + +struct TarBox2(I) +{ + I entries; + size_t chunkSize; + + size_t written; + ubyte[] buffer; + const(ubyte)[] chunk; + + // current entry being processed + ubyte[] remainHeader; + BoxEntry entry; + ByteRange entryChunks; + const(ubyte)[] entryChunk; + size_t padSize; + + // footer + size_t footerSize; + + this(I entries, size_t chunkSize) + { + import std.algorithm : max; + + this.entries = entries; + this.chunkSize = chunkSize; + this.buffer = new ubyte[max(512, chunkSize)]; + this.footerSize = blockLen * 2; + + popFront(); + } + + @property bool empty() + { + return chunk.length == 0; + } + + @property ByteChunk front() + { + return chunk; + } + + void popFront() + { + chunk = null; + scope(success) + { + written += chunk.length; + } + + while (!remainHeader.empty || padSize != 0 || hasEntryChunks || !entries.empty) + { + if (nextRemainHeader()) + return; + + if (fillPad()) + return; + + if (nextEntryChunk()) + return; + + if (nextHeader()) + return; + } + + footerSize -= fillZeros(footerSize); + } + + private size_t pos() + { + return written + chunk.length; + } + + private bool hasEntryChunks() + { + return (entryChunks && !entryChunks.empty) || !entryChunk.empty; + } + + private bool nextHeader() + { + import std.algorithm : min; + + assert(chunk.length < chunkSize); + assert(remainHeader.empty); + assert(!hasEntryChunks); + + if (entries.empty) + return false; + + entry = entries.front; + entries.popFront(); + entryChunks = entry.byChunk(chunkSize); + if (!entryChunks.empty) + entryChunk = entryChunks.front; + + TarHeader2 header = { + name: entry.path, + attributes: entry.attributes, + size: entry.size, + mtime: entry.timeLastModified, + type: entry.type, + linkname: entry.linkname, + }; + + version (Posix) + { + header.uid = entry.ownerId; + header.gid = entry.groupId; + } + + const len = header.encode(buffer, chunk.length); + assert(buffer.length >= chunk.length + len); + + const chunkTo = min(chunk.length + len, chunkSize); + if (chunk.length + len > chunkSize) + remainHeader = buffer[chunkSize .. chunk.length + len]; + + chunk = buffer[0 .. chunkTo]; + return chunkTo == chunkSize; + } + + // fill chunk with what remains of previous header (if any) + private bool nextRemainHeader() + { + import std.algorithm : min; + + if (remainHeader.empty) + return false; + + if (chunk.empty && remainHeader.length > chunkSize) + { + chunk = remainHeader[0 .. chunkSize]; + remainHeader = remainHeader[chunkSize .. $]; + return true; + } + + const len = min(chunkSize - chunk.length, remainHeader.length); + buffer[chunk.length .. chunk.length + len] = remainHeader[0 .. len]; + remainHeader = remainHeader[len .. $]; + chunk = buffer[0 .. chunk.length + len]; + + return chunk.length == chunkSize; + } + + // fill chunk with next chunk of current entry + private bool nextEntryChunk() + { + import std.algorithm : min; + + assert(chunk.length < chunkSize); + assert(padSize == 0); + assert(remainHeader.empty); + + while (hasEntryChunks() && chunk.length < chunkSize) + { + if (entryChunk.empty) + { + entryChunks.popFront(); + if (!entryChunks.empty) + entryChunk = entryChunks.front; + + if (entryChunk.empty) + break; + } + + if (chunk.empty && entryChunk.length >= chunkSize) + { + // can directly use entryChunk without copying + chunk = entryChunk[0 .. chunkSize]; + entryChunk = entryChunk[chunkSize .. $]; + break; + } + + // copy slice into buffer + const len = min(chunkSize - chunk.length, entryChunk.length); + buffer[chunk.length .. chunk.length + len] = entryChunk[0 .. len]; + chunk = buffer[0 .. chunk.length + len]; + entryChunk = entryChunk[len .. $]; + } + + if (!hasEntryChunks()) + { + padSize = next512(pos) - pos; + padSize -= fillZeros(padSize); + } + + return chunk.length == chunkSize; + } + + size_t fillZeros(size_t zeros) + { + import std.algorithm : min; + + const len = min (chunkSize - chunk.length, zeros); + buffer[chunk.length .. chunk.length + len] = 0; + chunk = buffer[0 .. chunk.length + len]; + return len; + } + + bool fillPad() + { + if (padSize != 0) + padSize -= fillZeros(padSize); + + return chunk.length == chunkSize; + } } +static assert(isByteRange!(TarBox2!(BoxEntry[]))); + private struct TarBox(I) { // init data From 03a9697c214412c45a48267911bf5a91977d19bb Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 22:26:36 +0200 Subject: [PATCH 16/34] Cursor.readValue takes scope pointer --- src/squiz_box/priv.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squiz_box/priv.d b/src/squiz_box/priv.d index 85df6e6..52ec2e4 100644 --- a/src/squiz_box/priv.d +++ b/src/squiz_box/priv.d @@ -63,7 +63,7 @@ interface Cursor /// Read T.sizeof data and returns it as a T. /// Similar to getValue!T but the value is passed as pointer to be filled in. /// Prefer this form for greater values (e.g. dozens of bytes) - void readValue(T)(T* val) if (!isDynamicArray!T) + void readValue(T)(scope T* val) if (!isDynamicArray!T) { import std.exception : enforce; From de02ddb0e79be9d0db5eebe48e0ddbd6f4cbae31 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 22:28:39 +0200 Subject: [PATCH 17/34] TarHeader2 is used for encoding and decoding --- src/squiz_box/box/tar.d | 353 +++++++++++++++++++++++++--------------- 1 file changed, 219 insertions(+), 134 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 9b17cac..4c1b510 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -166,6 +166,7 @@ private: enum blockLen = 512; enum nameLen = 100; +enum unameLen = 32; enum prefixLen = 155; enum char[8] posixMagic = "ustar\x0000"; @@ -186,8 +187,8 @@ struct BlockInfo Typeflag typeflag; // 156 9C char [nameLen] linkname; // 157 9D char [8] magic; // 257 101 - char [32] uname; // 265 109 - char [32] gname; // 297 129 + char [unameLen] uname; // 265 109 + char [unameLen] gname; // 297 129 char [8] devmajor; // 329 149 char [8] devminor; // 337 151 char [prefixLen] prefix; // 345 159 @@ -233,6 +234,8 @@ struct BlockInfo int devminor; string prefix; + bool isNull; + size_t encode(scope ubyte[] buffer) const in (buffer.length >= Block.sizeof) in (name.length <= Block.name.sizeof) @@ -241,12 +244,16 @@ struct BlockInfo in (gname.length <= Block.gname.sizeof) in (prefix.length <= Block.prefix.sizeof) { - buffer[0 .. Block.sizeof] = 0; + buffer[0 .. blockLen] = 0; + if (isNull) + return blockLen; + Block* blk = cast(Block*)&buffer[0]; blk.name[0 .. name.length] = name; toOctalString(mode, blk.mode[]); toOctalString(uid, blk.uid[]); + toOctalString(gid, blk.gid[]); toOctalString(size, blk.size[]); toOctalString(mtime, blk.mtime[]); blk.typeflag = typeflag; @@ -263,6 +270,53 @@ struct BlockInfo return blockLen; } + + static BlockInfo decode(Cursor cursor) + { + Block blk = void; + cursor.readValue(&blk); + + const computed = blk.checksum(); + const checksum = parseOctalString!uint(blk.chksum); + if (computed == 256 && checksum == 0) + { + // this is an empty header (only zeros) + // indicates end of archive + + // dfmt off + BlockInfo info = { + isNull: true, + }; + // dfmt on + return info; + } + + enforce( + checksum == computed, + format!"Invalid TAR checksum at 0x%08X\nExpected 0x%08x but found 0x%08x"( + cursor.pos - blockLen + blk.chksum.offsetof, + computed, checksum) + ); + + BlockInfo info = { + name: parseString(blk.name).idup, + mode: parseOctalString!uint(blk.mode), + uid: parseOctalString!uint(blk.uid), + gid: parseOctalString!uint(blk.uid), + size: parseOctalString!size_t(blk.size), + mtime: parseOctalString!long(blk.mtime), + typeflag: blk.typeflag, + linkname: parseString(blk.linkname).idup, + magic: blk.magic, + uname: parseString(blk.uname).idup, + gname: parseString(blk.gname).idup, + devmajor: parseOctalString!int(blk.devmajor), + devminor: parseOctalString!int(blk.devminor), + prefix: parseString(blk.prefix).idup, + }; + + return info; + } } void ensureLen(ref ubyte[] buffer, size_t len) @@ -293,7 +347,7 @@ size_t encodeLongGnu(ref ubyte[] buffer, size_t offset, string name, Typeflag ty struct TarHeader2 { string name; - uint attributes; + uint mode; int uid; int gid; size_t size; @@ -305,12 +359,22 @@ struct TarHeader2 int devmajor; int devminor; + size_t entrySize; + bool isNull; + // encode this header in buffer, starting at offset // buffer can potentially be grown if too small size_t encode(ref ubyte[] buffer, size_t offset) { import std.string : representation; + if (isNull) + { + buffer.ensureLen(offset + blockLen); + buffer[offset .. offset + blockLen] = 0; + return blockLen; + } + size_t encoded; string nm = name; @@ -340,13 +404,14 @@ struct TarHeader2 BlockInfo blk = { name: nm, - mode: attributes, + mode: mode, uid: uid, gid: gid, size: size, mtime: mtime.toUnixTime(), linkname: lk, typeflag: toTypeflag(type), + magic: posixMagic, uname: uname, gname: gname, devmajor: devmajor, @@ -356,6 +421,109 @@ struct TarHeader2 return encoded + blk.encode(buffer[offset + encoded .. $]); } + static TarHeader2 decode(Cursor cursor) + { + auto blk = BlockInfo.decode(cursor); + if (blk.isNull) + { + TarHeader2 info = {entrySize: blockLen, + isNull: true,}; + return info; + } + + switch (blk.typeflag) + { + case Typeflag.normalNul: + case Typeflag.normal: + case Typeflag.hardLink: + case Typeflag.symLink: + case Typeflag.charSpecial: + case Typeflag.blockSpecial: + case Typeflag.directory: + case Typeflag.fifo: + case Typeflag.contiguousFile: + case Typeflag.posixExtended: + case Typeflag.extended: + return decodeHeader(blk); + case Typeflag.gnuLongname: + case Typeflag.gnuLonglink: + return decodeGnuLongHeader(cursor, blk); + default: + const msg = format!"Unknown TAR typeflag: '%s'\nWhen extracting \"%s\"."( + cast(char) blk.typeflag, blk.prefix ~ "/" ~ blk.name + ); + throw new Exception(msg); + } + } + + private static TarHeader2 decodeHeader(scope ref BlockInfo blk) + { + TarHeader2 info = { + name: blk.name, + mode: blk.mode, + uid: blk.uid, + gid: blk.gid, + size: blk.size, + mtime: SysTime(unixTimeToStdTime(blk.mtime)), + type: toEntryType(blk.typeflag), + linkname: blk.linkname, + uname: blk.uname, + gname: blk.gname, + devmajor: blk.devmajor, + devminor: blk.devminor, + + entrySize: blockLen + next512(blk.size), + isNull: false, + }; + + if (blk.prefix.length != 0) + { + info.name = blk.prefix ~ "/" ~ blk.name; + } + + version (Posix) + { + // tar mode contains stat.st_mode & 07777. + // we have to add the missing flags corresponding to file type + // (and by no way tar mode is meaningful on Windows) + const filetype = posixModeFileType(blk.typeflag); + info.mode |= filetype; + } + else version (Windows) + { + info.name = info.name.replace('\\', '/'); + info.linkname = info.linkname.replace('\\', '/'); + } + + return info; + } + + private static TarHeader2 decodeGnuLongHeader(Cursor cursor, scope ref BlockInfo blk) + { + auto data = new char[next512(blk.size)]; + enforce(cursor.read(data).length == data.length, "Unexpected end of input"); + const name = parseString(assumeUnique(data)); + + auto next = TarHeader2.decode(cursor); + next.entrySize += (blockLen + data.length); + + switch (blk.typeflag) + { + case Typeflag.gnuLongname: + next.name = name; + break; + case Typeflag.gnuLonglink: + next.linkname = name; + break; + default: + assert(false); + } + + if (next.type == EntryType.directory && !next.name.empty && next.name[$ - 1] == '/') + next.name = next.name[0 .. $ - 1]; + + return next; + } } /// Splits long name into prefix and shorter name if it the name exceeds @@ -456,7 +624,7 @@ struct TarBox2(I) void popFront() { chunk = null; - scope(success) + scope (success) { written += chunk.length; } @@ -506,9 +674,9 @@ struct TarBox2(I) if (!entryChunks.empty) entryChunk = entryChunks.front; + // common fields TarHeader2 header = { name: entry.path, - attributes: entry.attributes, size: entry.size, mtime: entry.timeLastModified, type: entry.type, @@ -517,8 +685,43 @@ struct TarBox2(I) version (Posix) { + import core.sys.posix.grp; + import core.sys.posix.pwd; + import core.stdc.string : strlen; + import std.conv : octal; + + char[512] buf; + + header.mode = entry.attributes & octal!7777; header.uid = entry.ownerId; header.gid = entry.groupId; + + if (header.uid != 0) + { + passwd pwdbuf; + passwd* pwd; + if (getpwuid_r(header.uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0) + { + const len = min(strlen(pwd.pw_name), unameLen); + header.uname = pwd.pw_name[0 .. len].idup; + } + } + if (header.gid != 0) + { + group grpbuf; + group* grp; + if (getgrgid_r(header.gid, &grpbuf, buf.ptr, buf.length, &grp) == 0) + { + const len = min(strlen(grp.gr_name), unameLen); + header.gname = grp.gr_name[0 .. len].idup; + } + } + } + else version (Windows) + { + // default to mode 644 which is the most common on UNIX + header.mode = "0000644"; + // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- } const len = header.encode(buffer, chunk.length); @@ -604,7 +807,7 @@ struct TarBox2(I) { import std.algorithm : min; - const len = min (chunkSize - chunk.length, zeros); + const len = min(chunkSize - chunk.length, zeros); buffer[chunk.length .. chunk.length + len] = 0; chunk = buffer[0 .. chunk.length + len]; return len; @@ -784,7 +987,7 @@ private struct TarUnbox _input.ffw(dist); } - auto info = readHeaderBlock(); + auto info = TarHeader2.decode(_input); if (info.isNull) { @@ -800,7 +1003,7 @@ private struct TarUnbox // skipping empty directory while (!info.name.length && info.type == EntryType.directory) { - info = readHeaderBlock(); + info = TarHeader2.decode(_input); info.name = removePrefix(info.name, info.type); } } @@ -826,124 +1029,6 @@ private struct TarUnbox return name; } - - private TarEntryInfo readHeaderBlock() - { - TarHeader th; - _input.readValue(&th); - - const computed = th.unsignedChecksum(); - const checksum = parseOctalString(th.chksum); - - if (computed == 256 && checksum == 0) - { - // this is an empty header (only zeros) - // indicates end of archive - - // dfmt off - TarEntryInfo info = { - isNull: true, - }; - // dfmt on - return info; - } - - enforce( - checksum == computed, - format!"Invalid TAR checksum at 0x%08X\nExpected 0x%08x but found 0x%08x"( - _input.pos - 512 + th.chksum.offsetof, - computed, checksum) - ); - - switch (th.typeflag) - { - case Typeflag.normalNul: - case Typeflag.normal: - case Typeflag.hardLink: - case Typeflag.symLink: - case Typeflag.charSpecial: - case Typeflag.blockSpecial: - case Typeflag.directory: - case Typeflag.fifo: - case Typeflag.contiguousFile: - case Typeflag.posixExtended: - case Typeflag.extended: - return processHeader(&th); - case Typeflag.gnuLongname: - case Typeflag.gnuLonglink: - return processGnuLongHeader(&th); - default: - const prefix = parseString(th.prefix).idup; - const name = parseString(th.name).idup; - const msg = format!"Unknown TAR typeflag: '%s'\nWhen extracting \"%s\"."( - cast(char) th.typeflag, prefix ~ "/" ~ name - ); - throw new Exception(msg); - } - } - - private TarEntryInfo processHeader(scope TarHeader* th) - { - TarEntryInfo info = { - name: parseString(th.name).idup, - type: toEntryType(th.typeflag), - linkname: parseString(th.linkname).idup, - size: parseOctalString!size_t(th.size), - timeLastModified: SysTime(unixTimeToStdTime(parseOctalString!ulong(th.mtime))), - }; - info.entrySize = 512 + next512(info.size); - - version (Posix) - { - // tar mode contains stat.st_mode & 07777. - // we have to add the missing flags corresponding to file type - // (and by no way tar mode is meaningful on Windows) - const filetype = posixModeFileType(th.typeflag); - info.attributes = parseOctalString(th.mode) | filetype; - info.ownerId = parseOctalString(th.uid); - info.groupId = parseOctalString(th.gid); - } - - if (th.prefix[0] != '\0') - { - const prefix = parseString(th.prefix).idup; - info.name = prefix ~ "/" ~ info.name; - } - - version (Windows) - { - info.name = info.name.replace('\\', '/'); - } - - return info; - } - - private TarEntryInfo processGnuLongHeader(scope TarHeader* th) - { - const size = parseOctalString(th.size); - auto data = new char[next512(size)]; - enforce(_input.read(data).length == data.length, "Unexpected end of input"); - const name = parseString(assumeUnique(data)); - - auto next = readHeaderBlock(); - - switch (th.typeflag) - { - case Typeflag.gnuLongname: - next.name = name; - break; - case Typeflag.gnuLonglink: - next.linkname = name; - break; - default: - assert(false); - } - - if (next.type == EntryType.directory && !next.name.empty && next.name[$ - 1] == '/') - next.name = next.name[0 .. $ - 1]; - - return next; - } } static assert(isUnboxEntryRange!TarUnbox); @@ -975,9 +1060,9 @@ private class TarUnboxEntry : UnboxEntry private Cursor _input; private size_t _start; private size_t _end; - private TarEntryInfo _info; + private TarHeader2 _info; - this(Cursor input, TarEntryInfo info) + this(Cursor input, TarHeader2 info) { _input = input; _start = input.pos; @@ -1017,24 +1102,24 @@ private class TarUnboxEntry : UnboxEntry @property SysTime timeLastModified() { - return _info.timeLastModified; + return _info.mtime; } @property uint attributes() { - return _info.attributes; + return _info.mode; } version (Posix) { @property int ownerId() { - return _info.ownerId; + return _info.uid; } @property int groupId() { - return _info.groupId; + return _info.gid; } } From 79bfb75b4724f9f09269b25b2234c3495d9d84bd Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 22:40:26 +0200 Subject: [PATCH 18/34] large cleanup of tar.d --- src/squiz_box/box/tar.d | 486 +++++++++------------------------------- 1 file changed, 107 insertions(+), 379 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 4c1b510..4269416 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -16,7 +16,7 @@ struct TarAlgo auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) { - return TarBox2!I(entries, chunkSize); + return TarBox!I(entries, chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -35,7 +35,7 @@ struct TarGzAlgo auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) { - return TarBox2!I(entries, chunkSize).deflateGz(chunkSize); + return TarBox!I(entries, chunkSize).deflateGz(chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -58,7 +58,7 @@ version (HaveSquizBzip2) auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) { - return TarBox2!I(entries, chunkSize).compressBzip2(chunkSize); + return TarBox!I(entries, chunkSize).compressBzip2(chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -82,7 +82,7 @@ version (HaveSquizLzma) auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) { - return TarBox2!I(entries, chunkSize).compressXz(chunkSize); + return TarBox!I(entries, chunkSize).compressXz(chunkSize); } auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) @@ -103,7 +103,7 @@ version (HaveSquizLzma) auto boxTar(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) { - return TarBox2!I(entries, chunkSize); + return TarBox!I(entries, chunkSize); } /// ditto @@ -172,6 +172,84 @@ enum prefixLen = 155; enum char[8] posixMagic = "ustar\x0000"; enum char[8] gnuMagic = "ustar \x00"; +enum Typeflag : ubyte +{ + normalNul = 0, + normal = '0', + hardLink = '1', + symLink = '2', + charSpecial = '3', + blockSpecial = '4', + directory = '5', + fifo = '6', + contiguousFile = '7', + posixExtended = 'g', + extended = 'x', + gnuLongname = 'L', + gnuLonglink = 'K', +} + +Typeflag toTypeflag(EntryType type) +{ + final switch (type) + { + case EntryType.regular: + return Typeflag.normal; + case EntryType.directory: + return Typeflag.directory; + case EntryType.symlink: + return Typeflag.symLink; + } +} + +EntryType toEntryType(Typeflag flag) +{ + switch (flag) + { + case Typeflag.directory: + return EntryType.directory; + case Typeflag.symLink: + return EntryType.symlink; + default: + return EntryType.regular; + } +} + +version (Posix) +{ + // stat.st_mode part corresponding to file type + uint posixModeFileType(Typeflag flag) + { + import std.conv : octal; + import std.format : format; + + switch (flag) + { + case Typeflag.normalNul: + case Typeflag.normal: + return octal!100_000; + case Typeflag.hardLink: + // is regular file right for hard links? + return octal!100_000; + case Typeflag.symLink: + return octal!120_000; + case Typeflag.charSpecial: + return octal!20_000; + case Typeflag.blockSpecial: + return octal!60_000; + case Typeflag.directory: + return octal!40_000; + case Typeflag.fifo: + return octal!10_000; + case Typeflag.contiguousFile: + // is regular file right for contiguous files? + return octal!100_000; + default: + throw new Exception(format!"Unexpected Tar entry type: '%s'"(cast(char) flag)); + } + } +} + struct BlockInfo { static struct Block @@ -344,7 +422,7 @@ size_t encodeLongGnu(ref ubyte[] buffer, size_t offset, string name, Typeflag ty return blockLen + l512; } -struct TarHeader2 +struct TarInfo { string name; uint mode; @@ -421,12 +499,12 @@ struct TarHeader2 return encoded + blk.encode(buffer[offset + encoded .. $]); } - static TarHeader2 decode(Cursor cursor) + static TarInfo decode(Cursor cursor) { auto blk = BlockInfo.decode(cursor); if (blk.isNull) { - TarHeader2 info = {entrySize: blockLen, + TarInfo info = {entrySize: blockLen, isNull: true,}; return info; } @@ -456,9 +534,9 @@ struct TarHeader2 } } - private static TarHeader2 decodeHeader(scope ref BlockInfo blk) + private static TarInfo decodeHeader(scope ref BlockInfo blk) { - TarHeader2 info = { + TarInfo info = { name: blk.name, mode: blk.mode, uid: blk.uid, @@ -498,13 +576,13 @@ struct TarHeader2 return info; } - private static TarHeader2 decodeGnuLongHeader(Cursor cursor, scope ref BlockInfo blk) + private static TarInfo decodeGnuLongHeader(Cursor cursor, scope ref BlockInfo blk) { auto data = new char[next512(blk.size)]; enforce(cursor.read(data).length == data.length, "Unexpected end of input"); const name = parseString(assumeUnique(data)); - auto next = TarHeader2.decode(cursor); + auto next = TarInfo.decode(cursor); next.entrySize += (blockLen + data.length); switch (blk.typeflag) @@ -580,7 +658,7 @@ unittest splitLongName(longPath).should == [longPrefix, longName]; } -struct TarBox2(I) +struct TarBox(I) { I entries; size_t chunkSize; @@ -675,7 +753,7 @@ struct TarBox2(I) entryChunk = entryChunks.front; // common fields - TarHeader2 header = { + TarInfo info = { name: entry.path, size: entry.size, mtime: entry.timeLastModified, @@ -692,39 +770,39 @@ struct TarBox2(I) char[512] buf; - header.mode = entry.attributes & octal!7777; - header.uid = entry.ownerId; - header.gid = entry.groupId; + info.mode = entry.attributes & octal!7777; + info.uid = entry.ownerId; + info.gid = entry.groupId; - if (header.uid != 0) + if (info.uid != 0) { passwd pwdbuf; passwd* pwd; - if (getpwuid_r(header.uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0) + if (getpwuid_r(info.uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0) { const len = min(strlen(pwd.pw_name), unameLen); - header.uname = pwd.pw_name[0 .. len].idup; + info.uname = pwd.pw_name[0 .. len].idup; } } - if (header.gid != 0) + if (info.gid != 0) { group grpbuf; group* grp; - if (getgrgid_r(header.gid, &grpbuf, buf.ptr, buf.length, &grp) == 0) + if (getgrgid_r(info.gid, &grpbuf, buf.ptr, buf.length, &grp) == 0) { const len = min(strlen(grp.gr_name), unameLen); - header.gname = grp.gr_name[0 .. len].idup; + info.gname = grp.gr_name[0 .. len].idup; } } } else version (Windows) { // default to mode 644 which is the most common on UNIX - header.mode = "0000644"; + info.mode = "0000644"; // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- } - const len = header.encode(buffer, chunk.length); + const len = info.encode(buffer, chunk.length); assert(buffer.length >= chunk.length + len); const chunkTo = min(chunk.length + len, chunkSize); @@ -822,126 +900,6 @@ struct TarBox2(I) } } -static assert(isByteRange!(TarBox2!(BoxEntry[]))); - -private struct TarBox(I) -{ - // init data - I entriesInput; - ubyte[] buffer; - - // current chunk (front data) - ubyte[] chunk; // data ready - ubyte[] avail; // space available in buffer (after chunk) - - // current entry being processed - BoxEntry entry; - ByteRange entryChunks; - - // footer is two empty blocks - size_t footer; - enum footerLen = 1024; - - this(I entries, size_t chunkSize) - { - enforce(chunkSize % 512 == 0, "chunk size must be a multiple of 512"); - entriesInput = entries; - buffer = new ubyte[chunkSize]; - avail = buffer; - popFront(); - } - - @property bool empty() - { - // handle .init - if (!buffer) - return true; - - // more files to be processed - if (!entriesInput.empty) - return false; - - // current entry not exhausted - if (hasEntryChunks()) - return false; - - // some unconsumed flying data - if (chunk.length) - return false; - - return true; - } - - @property ByteChunk front() - { - return chunk; - } - - void popFront() - { - if (!moreToRead()) - { - if (footer >= footerLen) - { - chunk = null; - } - else - { - import std.algorithm : min; - - const len = min(buffer.length, footerLen - footer); - buffer[0 .. len] = 0; - chunk = buffer[0 .. len]; - footer += len; - } - return; - } - - while (avail.length && moreToRead) - { - nextBlock(); - chunk = buffer[0 .. $ - avail.length]; - } - avail = buffer; - } - - private bool hasEntryChunks() - { - return entryChunks && !entryChunks.empty; - } - - private bool moreToRead() - { - return !entriesInput.empty || hasEntryChunks(); - } - - private void nextBlock() - in (avail.length >= 512) - { - if (!entry || !hasEntryChunks()) - { - enforce(!entriesInput.empty); - entry = entriesInput.front; - entriesInput.popFront(); - avail = TarHeader.fillWith(entry, avail); - entryChunks = entry.byChunk(512); - } - else - { - auto filled = entryChunks.front; - avail[0 .. filled.length] = filled; - avail = avail[filled.length .. $]; - entryChunks.popFront(); - if (entryChunks.empty) - { - const pad = avail.length % 512; - avail[0 .. pad] = 0; - avail = avail[pad .. $]; - } - } - } -} - static assert(isByteRange!(TarBox!(BoxEntry[]))); private struct TarUnbox @@ -987,7 +945,7 @@ private struct TarUnbox _input.ffw(dist); } - auto info = TarHeader2.decode(_input); + auto info = TarInfo.decode(_input); if (info.isNull) { @@ -1003,7 +961,7 @@ private struct TarUnbox // skipping empty directory while (!info.name.length && info.type == EntryType.directory) { - info = TarHeader2.decode(_input); + info = TarInfo.decode(_input); info.name = removePrefix(info.name, info.type); } } @@ -1033,26 +991,6 @@ private struct TarUnbox static assert(isUnboxEntryRange!TarUnbox); -struct TarEntryInfo -{ - string name; - string linkname; - EntryType type; - ulong size; - ulong entrySize; - SysTime timeLastModified; - uint attributes; - - version (Posix) - { - int ownerId; - int groupId; - } - - // marker for null header - bool isNull; -} - private class TarUnboxEntry : UnboxEntry { import std.stdio : File; @@ -1060,9 +998,9 @@ private class TarUnboxEntry : UnboxEntry private Cursor _input; private size_t _start; private size_t _end; - private TarHeader2 _info; + private TarInfo _info; - this(Cursor input, TarHeader2 info) + this(Cursor input, TarInfo info) { _input = input; _start = input.pos; @@ -1135,216 +1073,6 @@ private class TarUnboxEntry : UnboxEntry } } -private struct TarHeader -{ - // dfmt off - char [100] name; // 0 0 - char [8] mode; // 100 64 - char [8] uid; // 108 6C - char [8] gid; // 116 74 - char [12] size; // 124 7C - char [12] mtime; // 136 88 - char [8] chksum; // 148 94 - Typeflag typeflag; // 156 9C - char [100] linkname; // 157 9D - char [6] magic; // 257 101 - char [2] version_; // 263 107 - char [32] uname; // 265 109 - char [32] gname; // 297 129 - char [8] devmajor; // 329 149 - char [8] devminor; // 337 151 - char [155] prefix; // 345 159 - char [12] padding; // 500 1F4 - //dfmt on - - private static ubyte[] fillWith(ArchiveEntry file, ubyte[] block) - in (block.length >= 512) - { - import std.algorithm : min; - import std.string : toStringz; - - version (Posix) - { - char[512] buf; - } - - block[0 .. 512] = 0; - - TarHeader* th = cast(TarHeader*)(&block[0]); - - // prefix and name - const name = file.path; - const prefLen = name.length > 100 ? cast(ptrdiff_t) name.length - 100 : 0; - if (prefLen) - th.prefix[0 .. prefLen] = name[0 .. prefLen]; - th.name[0 .. name.length - prefLen] = name[prefLen .. $]; - - th.typeflag = toTypeflag(file.type); - - if (th.typeflag == Typeflag.symLink) - { - const lname = file.linkname; - const len = min(lname.length, cast(ptrdiff_t) th.linkname.length - 1); - th.linkname[0 .. len] = lname[0 .. len]; - } - - version (Posix) - { - import core.sys.posix.grp; - import core.sys.posix.pwd; - import core.stdc.string : strlen; - import std.conv : octal; - - const uid = file.ownerId; - const gid = file.groupId; - - toOctalString(file.attributes & octal!7777, th.mode[]); - toOctalString(uid, th.uid[]); - toOctalString(gid, th.gid[]); - - if (uid != 0) - { - passwd pwdbuf; - passwd* pwd; - enforce(getpwuid_r(uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0, "Could not read user name"); - const urlen = min(strlen(pwd.pw_name), th.uname.length); - th.uname[0 .. urlen] = pwd.pw_name[0 .. urlen]; - } - - if (gid != 0) - { - group grpbuf; - group* grp; - enforce(getgrgid_r(gid, &grpbuf, buf.ptr, buf.length, &grp) == 0, "Could not read group name"); - const grlen = min(strlen(grp.gr_name), th.gname.length); - th.gname[0 .. grlen] = grp.gr_name[0 .. grlen]; - } - } - else version (Windows) - { - // default to mode 644 which is the most common on UNIX - th.mode[0 .. 7] = "0000644"; - - // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- - } - - toOctalString(file.size, th.size[]); - const mtime = file.timeLastModified().toUnixTime!long(); - toOctalString(mtime, th.mtime[]); - - th.magic = "ustar\0"; - th.version_ = "00"; - - const chksum = th.unsignedChecksum(); - - toOctalString(chksum, th.chksum[]); - - return block[512 .. $]; - } - - private uint unsignedChecksum() - { - uint sum = 0; - sum += unsignedSum(name); - sum += unsignedSum(mode); - sum += unsignedSum(uid); - sum += unsignedSum(gid); - sum += unsignedSum(size); - sum += unsignedSum(mtime); - sum += 32 * 8; - sum += cast(uint) typeflag; - sum += unsignedSum(linkname); - sum += unsignedSum(magic); - sum += unsignedSum(version_); - sum += unsignedSum(uname); - sum += unsignedSum(gname); - sum += unsignedSum(devmajor); - sum += unsignedSum(devminor); - sum += unsignedSum(prefix); - return sum; - } -} - -static assert(TarHeader.sizeof == 512); - -private enum Typeflag : ubyte -{ - normalNul = 0, - normal = '0', - hardLink = '1', - symLink = '2', - charSpecial = '3', - blockSpecial = '4', - directory = '5', - fifo = '6', - contiguousFile = '7', - posixExtended = 'g', - extended = 'x', - gnuLongname = 'L', - gnuLonglink = 'K', -} - -Typeflag toTypeflag(EntryType type) -{ - final switch (type) - { - case EntryType.regular: - return Typeflag.normal; - case EntryType.directory: - return Typeflag.directory; - case EntryType.symlink: - return Typeflag.symLink; - } -} - -EntryType toEntryType(Typeflag flag) -{ - switch (flag) - { - case Typeflag.directory: - return EntryType.directory; - case Typeflag.symLink: - return EntryType.symlink; - default: - return EntryType.regular; - } -} - -version (Posix) -{ - // stat.st_mode part corresponding to file type - uint posixModeFileType(Typeflag flag) - { - import std.conv : octal; - import std.format : format; - - switch (flag) - { - case Typeflag.normalNul: - case Typeflag.normal: - return octal!100_000; - case Typeflag.hardLink: - // is regular file right for hard links? - return octal!100_000; - case Typeflag.symLink: - return octal!120_000; - case Typeflag.charSpecial: - return octal!20_000; - case Typeflag.blockSpecial: - return octal!60_000; - case Typeflag.directory: - return octal!40_000; - case Typeflag.fifo: - return octal!10_000; - case Typeflag.contiguousFile: - // is regular file right for contiguous files? - return octal!100_000; - default: - throw new Exception(format!"Unexpected Tar entry type: '%s'"(cast(char) flag)); - } - } -} - private uint unsignedSum(const(char)[] buf) { uint sum; From afb0a812571b047594cfe902043af04b2a92c100 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 22:40:41 +0200 Subject: [PATCH 19/34] simplify bomb expression --- src/squiz_box/box/package.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squiz_box/box/package.d b/src/squiz_box/box/package.d index f1bf517..1394cd6 100644 --- a/src/squiz_box/box/package.d +++ b/src/squiz_box/box/package.d @@ -270,7 +270,7 @@ interface ArchiveEntry import std.path : buildNormalizedPath, isAbsolute; import std.string : startsWith; - if (allowedSz != ulong.max && size > allowedSz) + if (size > allowedSz) return true; const p = path; From 4608e06e9d471539d462685a515a449ad376ecfe Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Sun, 6 Aug 2023 22:41:53 +0200 Subject: [PATCH 20/34] unzip network download test uses byChunk --- test/archive.d | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/test/archive.d b/test/archive.d index d6eac7f..8effab2 100644 --- a/test/archive.d +++ b/test/archive.d @@ -454,32 +454,26 @@ version (HaveSquizLzma) unittest { import std.algorithm; + import std.net.curl : byChunk; import std.file; - import std.net.curl; import std.path; - import std.range; import std.stdio; const url = "https://github.com/rtbo/squiz-box/archive/refs/tags/v0.2.1.zip"; - - auto file = buildPath(tempDir(), "squiz-box-0.2.1.zip"); auto dir = buildPath(tempDir(), "squiz-box-0.2.1"); - download(url, file); mkdirRecurse(dir); - - version (Posix) + scope (exit) { - scope (exit) - remove(file); - scope (exit) - rmdirRecurse(dir); + rmdirRecurse(dir); } - unboxZip(File(file, "rb"), Yes.removePrefix) + byChunk(url) + .unboxZip(Yes.removePrefix) .each!(e => e.extractTo(dir)); assert(isFile(buildPath(dir, "meson.build"))); + assert(isFile(buildPath(dir, "test", "archive.d"))); } @("Extract 7z") From ed64c6df64a36e91e63e40ca68d11c39c8691c20 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Mon, 7 Aug 2023 09:36:54 +0200 Subject: [PATCH 21/34] do not fail test without network --- test/archive.d | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/test/archive.d b/test/archive.d index 8effab2..0513b52 100644 --- a/test/archive.d +++ b/test/archive.d @@ -454,7 +454,7 @@ version (HaveSquizLzma) unittest { import std.algorithm; - import std.net.curl : byChunk; + import std.net.curl : byChunk, CurlException; import std.file; import std.path; import std.stdio; @@ -468,12 +468,17 @@ unittest rmdirRecurse(dir); } - byChunk(url) - .unboxZip(Yes.removePrefix) - .each!(e => e.extractTo(dir)); + try + { + byChunk(url) + .unboxZip(Yes.removePrefix) + .each!(e => e.extractTo(dir)); - assert(isFile(buildPath(dir, "meson.build"))); - assert(isFile(buildPath(dir, "test", "archive.d"))); + assert(isFile(buildPath(dir, "meson.build"))); + assert(isFile(buildPath(dir, "test", "archive.d"))); + } + catch (CurlException) + {} } @("Extract 7z") From 8d8a82b57955fa7ba9cf7ebf332d7741bffb0a24 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 19:33:04 +0200 Subject: [PATCH 22/34] emptyByteRange --- src/squiz_box/squiz.d | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/squiz_box/squiz.d b/src/squiz_box/squiz.d index fdc754e..b448290 100644 --- a/src/squiz_box/squiz.d +++ b/src/squiz_box/squiz.d @@ -74,6 +74,20 @@ template isByteRange(BR) static assert(isByteRange!ByteRange); +private struct EmptyByteRange +{ + enum bool empty = true; + enum const(ubyte)[] front = []; + void popFront() + { + } +} + +static assert(isByteRange!EmptyByteRange); + +/// An empty range of bytes +enum emptyByteRange = EmptyByteRange(); + /// Exception thrown when inconsistent data is given to /// a decompression algorithm. /// I.e. the data was not compressed with the corresponding algorithm @@ -573,7 +587,7 @@ private struct Squiz(I, A, Flag!"endStream" endStream) const len = min(chunkBuffer.length - chunk.length, maxLen); stream.output = chunkBuffer[chunk.length .. chunk.length + len]; - const streamEnded = algo.process(stream, cast(Flag!"lastChunk") (input.empty && lastInput)); + const streamEnded = algo.process(stream, cast(Flag!"lastChunk")(input.empty && lastInput)); chunk = chunkBuffer[0 .. $ - stream.output.length]; maxLen -= len; @@ -1157,7 +1171,7 @@ struct Inflate assert( (windowBits == 0 && format == ZlibFormat.zlib) || (9 <= windowBits && windowBits <= 15), - "inconsistent windowBits" + "inconsistent windowBits" ); int wb = windowBits; final switch (format) @@ -1460,7 +1474,7 @@ version (HaveSquizBzip2) enforce( (action == BZ_RUN && res == BZ_RUN_OK) || (action == BZ_FINISH && res == BZ_FINISH_OK), - "Bzip2 compress failed with code: " ~ bzResultToString(res) + "Bzip2 compress failed with code: " ~ bzResultToString(res) ); return No.streamEnded; From acf4b8c0d2eea7b70f11538a81a08b85fc80a136 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 19:35:27 +0200 Subject: [PATCH 23/34] BoxEntryInfo and infoEntry --- src/squiz_box/box/package.d | 124 ++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/src/squiz_box/box/package.d b/src/squiz_box/box/package.d index 1394cd6..3079758 100644 --- a/src/squiz_box/box/package.d +++ b/src/squiz_box/box/package.d @@ -571,3 +571,127 @@ class FileBoxEntry : BoxEntry return inputRangeObject(ByChunkImpl(File(filePath, "rb"), chunkSize)); } } + +struct BoxEntryInfo +{ + /// The archive mode this entry is for. + /// The path of the entry within the archive. + /// Should always be a relative path, and never go backward (..) + /// The directory separations are always '/' (forward slash) even on Windows + string path; + + /// The type of entry (directory, file, symlink) + EntryType type; + + /// If symlink, this is the path pointed to by the link (relative to the symlink). + /// Should be null for directories and regular file. + string linkname; + + /// The size of the entry in bytes (should be zero for directories and symlink) + /// This is the size of uncompressed data. + ulong size; + + /// The timeLastModified of the entry + SysTime timeLastModified; + + /// The file attributes (as returned std.file.getLinkAttributes) + uint attributes; + + version (Posix) + { + /// The owner id of the entry + int ownerId; + /// The group id of the entry + int groupId; + } +} + +class InfoBoxEntry : BoxEntry +{ + BoxEntryInfo info; + ByteRange data; + + this(BoxEntryInfo info, ByteRange data) + in (data is null || data.empty || info.type == EntryType.regular, "data can only be supplied for regular files") + { + this.info = info; + this.data = data; + } + + override @property EntryMode mode() + { + return EntryMode.creation; + } + + override @property string path() + { + return info.path; + } + + override @property EntryType type() + { + return info.type; + } + + override @property string linkname() + { + return info.linkname; + } + + override @property ulong size() + { + return info.size; + } + + override @property SysTime timeLastModified() + { + return info.timeLastModified; + } + + override @property uint attributes() + { + return info.attributes; + } + + version (Posix) + { + override @property int ownerId() + { + return info.ownerId; + } + + override @property int groupId() + { + return info.groupId; + } + } + + /// Return the data passed in the ctor. + /// chunkSize has no effect here + ByteRange byChunk(size_t chunkSize = 0) + { + if (data) + return data; + return inputRangeObject(emptyByteRange); + } +} + +/// Create a BoxEntry from the provided info. +/// This allows to create archives out of generated data, without any backing file on disk. +InfoBoxEntry infoEntry(I)(BoxEntryInfo info, I data) +if (isByteRange!I) +in (info.type == EntryType.regular || data.empty, "symlinks and directories can't have data") +{ + import std.datetime : Clock; + + if (info.timeLastModified == SysTime.init) + info.timeLastModified = Clock.currTime; + + return new InfoBoxEntry(info, inputRangeObject(data)); +} + +/// ditto +InfoBoxEntry infoEntry(BoxEntryInfo info) +{ + return infoEntry(info, inputRangeObject(emptyByteRange)); +} From 7c73055d0c27593d14cae1b97a6770d9126f8c41 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 19:36:36 +0200 Subject: [PATCH 24/34] fix negative unix time --- src/squiz_box/box/tar.d | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 4269416..7b093aa 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -444,6 +444,7 @@ struct TarInfo // buffer can potentially be grown if too small size_t encode(ref ubyte[] buffer, size_t offset) { + import std.algorithm : max; import std.string : representation; if (isNull) @@ -486,7 +487,7 @@ struct TarInfo uid: uid, gid: gid, size: size, - mtime: mtime.toUnixTime(), + mtime: max(0, mtime.toUnixTime()), linkname: lk, typeflag: toTypeflag(type), magic: posixMagic, From 661d7770c1e899ca962c3642674a08ea16d37f12 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 19:36:55 +0200 Subject: [PATCH 25/34] fix writing gnulong --- src/squiz_box/box/tar.d | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 7b093aa..b5055b2 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -411,13 +411,13 @@ size_t encodeLongGnu(ref ubyte[] buffer, size_t offset, string name, Typeflag ty BlockInfo gnu = { name: "././@LongLink", size: name.length, - typeflag: Typeflag.gnuLongname, + typeflag: typeflag, magic: gnuMagic, }; gnu.encode(buffer[offset .. $]); - buffer[blockLen .. blockLen + name.length] = name.representation; - buffer[blockLen + name.length .. blockLen + l512] = 0; + buffer[offset + blockLen .. offset + blockLen + name.length] = name.representation; + buffer[offset + blockLen + name.length .. offset + blockLen + l512] = 0; return blockLen + l512; } From 6eee394a2f6542c4dcc15929a007aa6038fc192d Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 19:37:40 +0200 Subject: [PATCH 26/34] test tar read/write gnulong (#17) --- meson.build | 1 + test/archive.d | 23 --------------------- test/tar.d | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 23 deletions(-) create mode 100644 test/tar.d diff --git a/meson.build b/meson.build index 7b60b6e..066a3a8 100644 --- a/meson.build +++ b/meson.build @@ -74,6 +74,7 @@ if get_option('enable_test') 'test/archive.d', 'test/compress.d', 'test/main.d', + 'test/tar.d', 'test/util.d', ]) diff --git a/test/archive.d b/test/archive.d index 0513b52..bd31d6b 100644 --- a/test/archive.d +++ b/test/archive.d @@ -425,29 +425,6 @@ version (HaveSquizLzma) testExtractedFiles(dm, Yes.mode666); } - - // @("unbox gnulong tar #17") - // unittest - // { - // import std.net.curl : byChunkAsync; - // import std.algorithm : each; - // import std.file : mkdir; - // import std.range : inputRangeObject; - - // //const url = "https://ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.4187+1ae839cd2.tar.xz"; - - // //const archiveBytes = byChunkAsync(url); - // const filename = testPath("data/zig-linux-x86_64-0.11.0-dev.4187+1ae839cd2.tar.xz"); - // const dm = DontDeleteMe("extraction_site", null); - // mkdir(dm.path); - - // auto algo = boxAlgo(filename); - - // auto entries = readBinaryFile(filename) - // .unbox(algo, Yes.removePrefix); - - // entries.each!((e) { stdout.writeln(e.path); e.extractTo(dm.path); }); - // } } @("Extract squiz-box.zip") diff --git a/test/tar.d b/test/tar.d new file mode 100644 index 0000000..c04334e --- /dev/null +++ b/test/tar.d @@ -0,0 +1,56 @@ +module test.tar; + +import squiz_box; + +import unit_threaded.assertions; + +import std.algorithm; +import std.array; +import std.conv; +import std.range; +import std.stdio; +import std.string; +import std.typecons; + +@("tar read/write gnulong #17") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(55).join("/") ~ "/file.txt"; + const linkname = "long-path".repeat(55).join("/") ~ "/link.txt"; + + const entries = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)), + infoEntry(BoxEntryInfo( + path: linkname, + type: EntryType.symlink, + linkname: filename, + attributes: octal!"100644", + ))) + .boxTar() + .unboxTar() + .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk)e.readContent())) + .array; + + entries.length.should == 2; + entries[0].should == tuple( + filename, + EntryType.regular, + cast(string)null, + content.length, + content, + ); + entries[1].should == tuple( + linkname, + EntryType.symlink, + filename, + ulong(0), + cast(ByteChunk)null, + ); +} From b6eed028e586ff94fa2608e927c50615fcec5283 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 21:42:21 +0200 Subject: [PATCH 27/34] update meson in CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be39c68..ed7bdb0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: os: [ ubuntu-latest, windows-latest ] dc: [ dmd-latest ] bt: [ debug ] - meson: [ '0.62.0' ] + meson: [ '1.2.0' ] runs-on: ${{ matrix.os }} From 3d80ff9c5acb8eafe5a29c2e5fa337dd830e8218 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 22:07:40 +0200 Subject: [PATCH 28/34] remove dependency to unit-threaded Right now Dub support in meson is broken. See mesonbuild/meson#11798 --- meson.build | 7 +------ src/squiz_box/box/tar.d | 8 +++----- test/tar.d | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/meson.build b/meson.build index 066a3a8..f2819da 100644 --- a/meson.build +++ b/meson.build @@ -65,11 +65,6 @@ squiz_dep = declare_dependency( ) if get_option('enable_test') - ut_assertions_dep = dependency('unit-threaded:assertions', - method: 'dub', - version: '2.0.3', - ) - squiz_test_src = squiz_src + files([ 'test/archive.d', 'test/compress.d', @@ -94,7 +89,7 @@ if get_option('enable_test') d_unittest: true, install: false, include_directories: squiz_inc, - dependencies: squiz_deps + [ut_assertions_dep], + dependencies: squiz_deps, d_module_versions: squiz_ver, ) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index b5055b2..068842d 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -635,8 +635,6 @@ string[2] splitLongName(string name) @("tar.splitPrefixName") unittest { - import unit_threaded.assertions; - enum shortPath = "some/short/path"; enum veryLongPath = "some/very/long/long/long/long/long/long/long/long/long/long/long" ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" @@ -654,9 +652,9 @@ unittest static assert(longPath.length > 100); static assert(longPath.length < 155); - splitLongName(shortPath).should == [null, shortPath]; - splitLongName(veryLongPath).should == [null, veryLongPath]; - splitLongName(longPath).should == [longPrefix, longName]; + assert(splitLongName(shortPath) == [null, shortPath]); + assert(splitLongName(veryLongPath) == [null, veryLongPath]); + assert(splitLongName(longPath) == [longPrefix, longName]); } struct TarBox(I) diff --git a/test/tar.d b/test/tar.d index c04334e..62e0923 100644 --- a/test/tar.d +++ b/test/tar.d @@ -2,8 +2,6 @@ module test.tar; import squiz_box; -import unit_threaded.assertions; - import std.algorithm; import std.array; import std.conv; @@ -12,13 +10,14 @@ import std.stdio; import std.string; import std.typecons; -@("tar read/write gnulong #17") +@("read/write gnulong #17") unittest { const content = cast(ByteChunk)("the content of the file".representation); const filename = "long-path".repeat(55).join("/") ~ "/file.txt"; const linkname = "long-path".repeat(55).join("/") ~ "/link.txt"; + // dfmt off const entries = only( infoEntry(BoxEntryInfo( path: filename, @@ -37,20 +36,21 @@ unittest .unboxTar() .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk)e.readContent())) .array; + // dfmt on - entries.length.should == 2; - entries[0].should == tuple( + assert(entries.length == 2); + assert(entries[0] == tuple( filename, EntryType.regular, - cast(string)null, + cast(string) null, content.length, content, - ); - entries[1].should == tuple( + )); + assert(entries[1] == tuple( linkname, EntryType.symlink, filename, ulong(0), - cast(ByteChunk)null, - ); + cast(ByteChunk) null, + )); } From 275fabf14cda37c2cbbb85ed07298565a1185f06 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 22:43:03 +0200 Subject: [PATCH 29/34] fix 7z code mistake picked-up by recent compiler --- src/squiz_box/box/seven_z/header.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squiz_box/box/seven_z/header.d b/src/squiz_box/box/seven_z/header.d index 52005ed..aa05d42 100644 --- a/src/squiz_box/box/seven_z/header.d +++ b/src/squiz_box/box/seven_z/header.d @@ -193,7 +193,7 @@ struct Header auto unpacked = cursorByteRange(mainCursor, packSize) .squizMaxOut(algo, unpackSize) .join(); - assert(unpacked.length = unpackSize); + assert(unpacked.length == unpackSize); static if (false) { From 0de4a08efc16913902fc6061971bdce70080583b Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 23:13:43 +0200 Subject: [PATCH 30/34] tar test read/write split prefix --- test/tar.d | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/test/tar.d b/test/tar.d index 62e0923..5cc19f5 100644 --- a/test/tar.d +++ b/test/tar.d @@ -10,6 +10,36 @@ import std.stdio; import std.string; import std.typecons; +enum blockLen = 512; +enum nameLen = 100; +enum unameLen = 32; +enum prefixLen = 155; + +enum char[8] posixMagic = "ustar\x0000"; +enum char[8] gnuMagic = "ustar \x00"; + +struct Block +{ + // dfmt off + char [nameLen] name; // 0 0 + char [8] mode; // 100 64 + char [8] uid; // 108 6C + char [8] gid; // 116 74 + char [12] size; // 124 7C + char [12] mtime; // 136 88 + char [8] chksum; // 148 94 + char typeflag; // 156 9C + char [nameLen] linkname; // 157 9D + char [8] magic; // 257 101 + char [unameLen] uname; // 265 109 + char [unameLen] gname; // 297 129 + char [8] devmajor; // 329 149 + char [8] devminor; // 337 151 + char [prefixLen] prefix; // 345 159 + char [12] padding; // 500 1F4 + //dfmt on +} + @("read/write gnulong #17") unittest { @@ -54,3 +84,52 @@ unittest cast(ByteChunk) null, )); } + +@("read/write split prefix") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(11).join("/") ~ "/file.txt"; + + assert(filename.length > nameLen && filename.length < nameLen + prefixLen); + + // dfmt off + const tarData = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)) + ) + .boxTar() + .join(); + // dfmt on + + // 0 file block + // 1 file data + // 2 footer (x2) + assert(tarData.length == 4 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(tarData[1 * blockLen .. $].startsWith(content)); + + assert(tarData[2 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) + .unboxTar() + .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk) e.readContent())) + .array; + + assert(entries.length == 1); + assert(entries[0] == tuple( + filename, + EntryType.regular, + cast(string) null, + content.length, + content, + )); +} \ No newline at end of file From b36317145bf4c3308f2a0f5758aa33028c4c8e44 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 23:14:05 +0200 Subject: [PATCH 31/34] tar fix split prefix encoding --- src/squiz_box/box/tar.d | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 068842d..51d6679 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -495,6 +495,7 @@ struct TarInfo gname: gname, devmajor: devmajor, devminor: devminor, + prefix: px, }; return encoded + blk.encode(buffer[offset + encoded .. $]); @@ -651,6 +652,7 @@ unittest static assert(veryLongPath.length > 255); static assert(longPath.length > 100); static assert(longPath.length < 155); + static assert(longPrefix ~ "/" ~ longName == longPath); assert(splitLongName(shortPath) == [null, shortPath]); assert(splitLongName(veryLongPath) == [null, veryLongPath]); From a2a0b9637969e327d38bd562b0e814697a8d6164 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 23:14:30 +0200 Subject: [PATCH 32/34] enhance gnulong test --- test/tar.d | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/test/tar.d b/test/tar.d index 5cc19f5..982adf4 100644 --- a/test/tar.d +++ b/test/tar.d @@ -47,8 +47,11 @@ unittest const filename = "long-path".repeat(55).join("/") ~ "/file.txt"; const linkname = "long-path".repeat(55).join("/") ~ "/link.txt"; + assert(filename.length > nameLen + prefixLen); + assert(linkname.length > nameLen + prefixLen); + // dfmt off - const entries = only( + const tarData = only( infoEntry(BoxEntryInfo( path: filename, type: EntryType.regular, @@ -63,10 +66,51 @@ unittest attributes: octal!"100644", ))) .boxTar() + .join(); + // dfmt on + + // 0 file name block + // 1 file name data (x2) + // 3 file block + // 4 file data + // 5 link name block + // 6 link name data (x2) + // 8 link linkname block + // 9 link linkname data (x2) + // 11 link block + // 12 footer (x2) + assert(tarData.length == 14 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == 'L'); + assert(blk.magic == gnuMagic); + assert(tarData[blockLen .. $].startsWith(filename.representation)); + + blk = cast(const(Block)*)&tarData[3 * blockLen]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(tarData[4 * blockLen .. $].startsWith(content)); + + blk = cast(const(Block)*)&tarData[5 * blockLen]; + assert(blk.typeflag == 'L'); + assert(blk.magic == gnuMagic); + assert(tarData[6 * blockLen .. $].startsWith(linkname.representation)); + + blk = cast(const(Block)*)&tarData[8 * blockLen]; + assert(blk.typeflag == 'K'); + assert(blk.magic == gnuMagic); + assert(tarData[9 * blockLen .. $].startsWith(filename.representation)); + + blk = cast(const(Block)*)&tarData[11 * blockLen]; + assert(blk.typeflag == '2'); + assert(blk.magic == posixMagic); + + assert(tarData[14 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) .unboxTar() - .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk)e.readContent())) + .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk) e.readContent())) .array; - // dfmt on assert(entries.length == 2); assert(entries[0] == tuple( From 546ba64baec9674a6576c9b70d824ccfa9ad2c25 Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Wed, 9 Aug 2023 00:47:20 +0200 Subject: [PATCH 33/34] test tar name 100 chars --- test/tar.d | 148 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 99 insertions(+), 49 deletions(-) diff --git a/test/tar.d b/test/tar.d index 982adf4..f70cfce 100644 --- a/test/tar.d +++ b/test/tar.d @@ -40,6 +40,105 @@ struct Block //dfmt on } +@("read/write name 100 chars") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(9).join("/") ~ "/123456.txt"; + + assert(filename.length == nameLen); + + // dfmt off + const tarData = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)) + ) + .boxTar() + .join(); + // dfmt on + + // 0 file block + // 1 file data + // 2 footer (x2) + assert(tarData.length == 4 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(blk.name == filename); + assert(blk.prefix[0] == '\0'); + assert(tarData[1 * blockLen .. $].startsWith(content)); + + assert(tarData[2 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) + .unboxTar() + .map!(e => tuple(e.path, e.type, e.size, cast(ByteChunk) e.readContent())) + .array; + + assert(entries.length == 1); + assert(entries[0] == tuple( + filename, + EntryType.regular, + content.length, + content, + )); +} + +@("read/write split prefix") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(11).join("/") ~ "/file.txt"; + + assert(filename.length > nameLen && filename.length < nameLen + prefixLen); + + // dfmt off + const tarData = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)) + ) + .boxTar() + .join(); + // dfmt on + + // 0 file block + // 1 file data + // 2 footer (x2) + assert(tarData.length == 4 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(blk.prefix[0 .. 9] == "long-path"); + assert(tarData[1 * blockLen .. $].startsWith(content)); + + assert(tarData[2 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) + .unboxTar() + .map!(e => tuple(e.path, e.type, e.size, cast(ByteChunk) e.readContent())) + .array; + + assert(entries.length == 1); + assert(entries[0] == tuple( + filename, + EntryType.regular, + content.length, + content, + )); +} + @("read/write gnulong #17") unittest { @@ -128,52 +227,3 @@ unittest cast(ByteChunk) null, )); } - -@("read/write split prefix") -unittest -{ - const content = cast(ByteChunk)("the content of the file".representation); - const filename = "long-path".repeat(11).join("/") ~ "/file.txt"; - - assert(filename.length > nameLen && filename.length < nameLen + prefixLen); - - // dfmt off - const tarData = only( - infoEntry(BoxEntryInfo( - path: filename, - type: EntryType.regular, - size: content.length, - attributes: octal!"100644", - ), - only(content)) - ) - .boxTar() - .join(); - // dfmt on - - // 0 file block - // 1 file data - // 2 footer (x2) - assert(tarData.length == 4 * blockLen); - - const(Block)* blk = cast(const(Block)*)&tarData[0]; - assert(blk.typeflag == '0'); - assert(blk.magic == posixMagic); - assert(tarData[1 * blockLen .. $].startsWith(content)); - - assert(tarData[2 * blockLen .. $].all!"a == 0"); - - const entries = only(tarData) - .unboxTar() - .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk) e.readContent())) - .array; - - assert(entries.length == 1); - assert(entries[0] == tuple( - filename, - EntryType.regular, - cast(string) null, - content.length, - content, - )); -} \ No newline at end of file From ae3c907f8201071604c4ff45ededf4070bc598ee Mon Sep 17 00:00:00 2001 From: Remi Thebault Date: Tue, 8 Aug 2023 23:39:36 +0200 Subject: [PATCH 34/34] Rework TarInfo semantics, fix permission bits --- .github/workflows/ci.yml | 2 +- src/squiz_box/box/tar.d | 118 +++++++++++++++++++++++++-------------- test/archive.d | 5 +- 3 files changed, 79 insertions(+), 46 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed7bdb0..131d0a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: os: [ ubuntu-latest, windows-latest ] dc: [ dmd-latest ] bt: [ debug ] - meson: [ '1.2.0' ] + meson: [ '0.62.2' ] runs-on: ${{ matrix.os }} diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index 51d6679..0efd347 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -215,11 +215,30 @@ EntryType toEntryType(Typeflag flag) } } -version (Posix) +/// Returns TAR permission bits from attributes (as per std.file.getAttributes) +uint toTarMode(uint attributes) { - // stat.st_mode part corresponding to file type - uint posixModeFileType(Typeflag flag) + import std.conv : octal; + + version(Posix) + { + return attributes & octal!"7777"; + } + else version(Windows) { + // windows attributes are irrelevant to permission bits + return octal!"644"; + } +} + +/// Returns attributes with the same semantics as std.file.getAttributes +/// flag is Block.typeflag and mode is octal translation of Block.mode +uint toAttributes(Typeflag flag, uint mode) +{ + version (Posix) + { + // tar mode contains stat.st_mode & 07777. + // we have to add the missing flags corresponding to file type import std.conv : octal; import std.format : format; @@ -227,29 +246,47 @@ version (Posix) { case Typeflag.normalNul: case Typeflag.normal: - return octal!100_000; + return mode | octal!100_000; case Typeflag.hardLink: // is regular file right for hard links? - return octal!100_000; + return mode | octal!100_000; case Typeflag.symLink: - return octal!120_000; + return mode | octal!120_000; case Typeflag.charSpecial: - return octal!20_000; + return mode | octal!20_000; case Typeflag.blockSpecial: - return octal!60_000; + return mode | octal!60_000; case Typeflag.directory: - return octal!40_000; + return mode | octal!40_000; case Typeflag.fifo: - return octal!10_000; + return mode | octal!10_000; case Typeflag.contiguousFile: // is regular file right for contiguous files? - return octal!100_000; + return mode | octal!100_000; default: throw new Exception(format!"Unexpected Tar entry type: '%s'"(cast(char) flag)); } } + else version (Windows) + { + // the same values as win32 GetFileAttributes + // (mode permission bits are irrelevant) + import core.sys.windows.winnt : FILE_ATTRIBUTE_DIRECTORY, FILE_ATTRIBUTE_NORMAL; + + switch (flag) + { + case Typeflag.normal: + return FILE_ATTRIBUTE_NORMAL; + case Typeflag.directory: + return FILE_ATTRIBUTE_DIRECTORY; + default: + // TODO: symbolic links in windows + return 0; + } + } } +/// BlockInfo contains info for one 512 byte TAR block struct BlockInfo { static struct Block @@ -422,12 +459,13 @@ size_t encodeLongGnu(ref ubyte[] buffer, size_t offset, string name, Typeflag ty return blockLen + l512; } +/// TarInfo contains info for one TAR header, which is one or more TAR blocks struct TarInfo { string name; - uint mode; - int uid; - int gid; + uint attributes; + int ownerId; + int groupId; size_t size; SysTime mtime; string linkname; @@ -483,9 +521,9 @@ struct TarInfo BlockInfo blk = { name: nm, - mode: mode, - uid: uid, - gid: gid, + mode: toTarMode(attributes), + uid: ownerId, + gid: groupId, size: size, mtime: max(0, mtime.toUnixTime()), linkname: lk, @@ -506,8 +544,12 @@ struct TarInfo auto blk = BlockInfo.decode(cursor); if (blk.isNull) { - TarInfo info = {entrySize: blockLen, - isNull: true,}; + // dfmt off + TarInfo info = { + entrySize: blockLen, + isNull: true, + }; + // dfmt on return info; } @@ -540,9 +582,9 @@ struct TarInfo { TarInfo info = { name: blk.name, - mode: blk.mode, - uid: blk.uid, - gid: blk.gid, + attributes: toAttributes(blk.typeflag, blk.mode), + ownerId: blk.uid, + groupId: blk.gid, size: blk.size, mtime: SysTime(unixTimeToStdTime(blk.mtime)), type: toEntryType(blk.typeflag), @@ -561,15 +603,7 @@ struct TarInfo info.name = blk.prefix ~ "/" ~ blk.name; } - version (Posix) - { - // tar mode contains stat.st_mode & 07777. - // we have to add the missing flags corresponding to file type - // (and by no way tar mode is meaningful on Windows) - const filetype = posixModeFileType(blk.typeflag); - info.mode |= filetype; - } - else version (Windows) + version (Windows) { info.name = info.name.replace('\\', '/'); info.linkname = info.linkname.replace('\\', '/'); @@ -756,6 +790,7 @@ struct TarBox(I) // common fields TarInfo info = { name: entry.path, + attributes: entry.attributes, size: entry.size, mtime: entry.timeLastModified, type: entry.type, @@ -771,25 +806,24 @@ struct TarBox(I) char[512] buf; - info.mode = entry.attributes & octal!7777; - info.uid = entry.ownerId; - info.gid = entry.groupId; + info.ownerId = entry.ownerId; + info.groupId = entry.groupId; - if (info.uid != 0) + if (info.ownerId != 0) { passwd pwdbuf; passwd* pwd; - if (getpwuid_r(info.uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0) + if (getpwuid_r(info.ownerId, &pwdbuf, buf.ptr, buf.length, &pwd) == 0) { const len = min(strlen(pwd.pw_name), unameLen); info.uname = pwd.pw_name[0 .. len].idup; } } - if (info.gid != 0) + if (info.groupId != 0) { group grpbuf; group* grp; - if (getgrgid_r(info.gid, &grpbuf, buf.ptr, buf.length, &grp) == 0) + if (getgrgid_r(info.groupId, &grpbuf, buf.ptr, buf.length, &grp) == 0) { const len = min(strlen(grp.gr_name), unameLen); info.gname = grp.gr_name[0 .. len].idup; @@ -798,8 +832,6 @@ struct TarBox(I) } else version (Windows) { - // default to mode 644 which is the most common on UNIX - info.mode = "0000644"; // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- } @@ -1046,19 +1078,19 @@ private class TarUnboxEntry : UnboxEntry @property uint attributes() { - return _info.mode; + return _info.attributes; } version (Posix) { @property int ownerId() { - return _info.uid; + return _info.ownerId; } @property int groupId() { - return _info.gid; + return _info.groupId; } } diff --git a/test/archive.d b/test/archive.d index bd31d6b..4d04efd 100644 --- a/test/archive.d +++ b/test/archive.d @@ -194,8 +194,9 @@ unittest } else { - const attr644 = 0; - const attr666 = 0; + import core.sys.windows.winnt : FILE_ATTRIBUTE_NORMAL; + const attr644 = FILE_ATTRIBUTE_NORMAL; + const attr666 = FILE_ATTRIBUTE_NORMAL; } const expectedEntries = [