diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be39c68..131d0a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: os: [ ubuntu-latest, windows-latest ] dc: [ dmd-latest ] bt: [ debug ] - meson: [ '0.62.0' ] + meson: [ '0.62.2' ] runs-on: ${{ matrix.os }} diff --git a/meson.build b/meson.build index 180c3c4..f2819da 100644 --- a/meson.build +++ b/meson.build @@ -69,10 +69,23 @@ if get_option('enable_test') 'test/archive.d', 'test/compress.d', 'test/main.d', + 'test/tar.d', 'test/util.d', ]) - squiz_test_exe = executable('squiz-test', squiz_test_src, + stupid_gen_exe = executable('stupid_gen', 'tools/stupid_gen.d', + d_import_dirs: include_directories('tools'), + ) + squiz_stupid = custom_target('dop_stupid', + capture: true, + output: 'stupid.d', + input: squiz_test_src, + command: [ + stupid_gen_exe, '@INPUT@', + ], + ) + + squiz_test_exe = executable('squiz-test', squiz_stupid, squiz_test_src, d_unittest: true, install: false, include_directories: squiz_inc, @@ -80,5 +93,7 @@ if get_option('enable_test') d_module_versions: squiz_ver, ) - test('unit tests', squiz_test_exe) + test('unit tests', squiz_test_exe, + timeout: 120, + ) endif \ No newline at end of file diff --git a/src/squiz_box/box/package.d b/src/squiz_box/box/package.d index f1bf517..3079758 100644 --- a/src/squiz_box/box/package.d +++ b/src/squiz_box/box/package.d @@ -270,7 +270,7 @@ interface ArchiveEntry import std.path : buildNormalizedPath, isAbsolute; import std.string : startsWith; - if (allowedSz != ulong.max && size > allowedSz) + if (size > allowedSz) return true; const p = path; @@ -571,3 +571,127 @@ class FileBoxEntry : BoxEntry return inputRangeObject(ByChunkImpl(File(filePath, "rb"), chunkSize)); } } + +struct BoxEntryInfo +{ + /// The archive mode this entry is for. + /// The path of the entry within the archive. + /// Should always be a relative path, and never go backward (..) + /// The directory separations are always '/' (forward slash) even on Windows + string path; + + /// The type of entry (directory, file, symlink) + EntryType type; + + /// If symlink, this is the path pointed to by the link (relative to the symlink). + /// Should be null for directories and regular file. + string linkname; + + /// The size of the entry in bytes (should be zero for directories and symlink) + /// This is the size of uncompressed data. + ulong size; + + /// The timeLastModified of the entry + SysTime timeLastModified; + + /// The file attributes (as returned std.file.getLinkAttributes) + uint attributes; + + version (Posix) + { + /// The owner id of the entry + int ownerId; + /// The group id of the entry + int groupId; + } +} + +class InfoBoxEntry : BoxEntry +{ + BoxEntryInfo info; + ByteRange data; + + this(BoxEntryInfo info, ByteRange data) + in (data is null || data.empty || info.type == EntryType.regular, "data can only be supplied for regular files") + { + this.info = info; + this.data = data; + } + + override @property EntryMode mode() + { + return EntryMode.creation; + } + + override @property string path() + { + return info.path; + } + + override @property EntryType type() + { + return info.type; + } + + override @property string linkname() + { + return info.linkname; + } + + override @property ulong size() + { + return info.size; + } + + override @property SysTime timeLastModified() + { + return info.timeLastModified; + } + + override @property uint attributes() + { + return info.attributes; + } + + version (Posix) + { + override @property int ownerId() + { + return info.ownerId; + } + + override @property int groupId() + { + return info.groupId; + } + } + + /// Return the data passed in the ctor. + /// chunkSize has no effect here + ByteRange byChunk(size_t chunkSize = 0) + { + if (data) + return data; + return inputRangeObject(emptyByteRange); + } +} + +/// Create a BoxEntry from the provided info. +/// This allows to create archives out of generated data, without any backing file on disk. +InfoBoxEntry infoEntry(I)(BoxEntryInfo info, I data) +if (isByteRange!I) +in (info.type == EntryType.regular || data.empty, "symlinks and directories can't have data") +{ + import std.datetime : Clock; + + if (info.timeLastModified == SysTime.init) + info.timeLastModified = Clock.currTime; + + return new InfoBoxEntry(info, inputRangeObject(data)); +} + +/// ditto +InfoBoxEntry infoEntry(BoxEntryInfo info) +{ + return infoEntry(info, inputRangeObject(emptyByteRange)); +} diff --git a/src/squiz_box/box/seven_z/header.d b/src/squiz_box/box/seven_z/header.d index 52005ed..aa05d42 100644 --- a/src/squiz_box/box/seven_z/header.d +++ b/src/squiz_box/box/seven_z/header.d @@ -193,7 +193,7 @@ struct Header auto unpacked = cursorByteRange(mainCursor, packSize) .squizMaxOut(algo, unpackSize) .join(); - assert(unpacked.length = unpackSize); + assert(unpacked.length == unpackSize); static if (false) { diff --git a/src/squiz_box/box/tar.d b/src/squiz_box/box/tar.d index d338472..0efd347 100644 --- a/src/squiz_box/box/tar.d +++ b/src/squiz_box/box/tar.d @@ -15,12 +15,12 @@ struct TarAlgo { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { return TarBox!I(entries, chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto dataInput = new ByteRangeCursor!I(input); return TarUnbox(dataInput, removePrefix); @@ -34,12 +34,12 @@ struct TarGzAlgo { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { return TarBox!I(entries, chunkSize).deflateGz(chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto ii = input.inflateGz(); alias II = typeof(ii); @@ -57,12 +57,12 @@ version (HaveSquizBzip2) { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { return TarBox!I(entries, chunkSize).compressBzip2(chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto ii = input.decompressBzip2(); alias II = typeof(ii); @@ -81,12 +81,12 @@ version (HaveSquizLzma) { auto box(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) - in (chunkSize >= 512 && chunkSize % 512 == 0) { return TarBox!I(entries, chunkSize).compressXz(chunkSize); } - auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) + auto unbox(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto ii = input.decompressXz(); alias II = typeof(ii); @@ -100,10 +100,8 @@ version (HaveSquizLzma) /// Returns a `.tar`, `.tar.gz`, `.tar.bz2` or `.tar.xz` archive as a byte range /// corresponding to the entries in input. -/// chunkSize must be a multiple of 512. auto boxTar(I)(I entries, size_t chunkSize = defaultChunkSize) if (isBoxEntryRange!I) -in (chunkSize >= 512 && chunkSize % 512 == 0) { return TarBox!I(entries, chunkSize); } @@ -133,7 +131,8 @@ version (HaveSquizLzma) } /// Returns a range of entries from a `.tar`, `.tar.gz`, `.tar.bz2` or `.tar.xz` formatted byte range -auto unboxTar(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) if (isByteRange!I) +auto unboxTar(I)(I input, Flag!"removePrefix" removePrefix = No.removePrefix) + if (isByteRange!I) { auto dataInput = new ByteRangeCursor!I(input); return TarUnbox(dataInput, removePrefix); @@ -163,121 +162,774 @@ version (HaveSquizLzma) } } -private struct TarBox(I) +private: + +enum blockLen = 512; +enum nameLen = 100; +enum unameLen = 32; +enum prefixLen = 155; + +enum char[8] posixMagic = "ustar\x0000"; +enum char[8] gnuMagic = "ustar \x00"; + +enum Typeflag : ubyte { - // init data - I entriesInput; - ubyte[] buffer; + normalNul = 0, + normal = '0', + hardLink = '1', + symLink = '2', + charSpecial = '3', + blockSpecial = '4', + directory = '5', + fifo = '6', + contiguousFile = '7', + posixExtended = 'g', + extended = 'x', + gnuLongname = 'L', + gnuLonglink = 'K', +} - // current chunk (front data) - ubyte[] chunk; // data ready - ubyte[] avail; // space available in buffer (after chunk) +Typeflag toTypeflag(EntryType type) +{ + final switch (type) + { + case EntryType.regular: + return Typeflag.normal; + case EntryType.directory: + return Typeflag.directory; + case EntryType.symlink: + return Typeflag.symLink; + } +} - // current entry being processed - BoxEntry entry; - ByteRange entryChunks; +EntryType toEntryType(Typeflag flag) +{ + switch (flag) + { + case Typeflag.directory: + return EntryType.directory; + case Typeflag.symLink: + return EntryType.symlink; + default: + return EntryType.regular; + } +} - // footer is two empty blocks - size_t footer; - enum footerLen = 1024; +/// Returns TAR permission bits from attributes (as per std.file.getAttributes) +uint toTarMode(uint attributes) +{ + import std.conv : octal; - this(I entries, size_t chunkSize) + version(Posix) { - enforce(chunkSize % 512 == 0, "chunk size must be a multiple of 512"); - entriesInput = entries; - buffer = new ubyte[chunkSize]; - avail = buffer; - popFront(); + return attributes & octal!"7777"; } + else version(Windows) + { + // windows attributes are irrelevant to permission bits + return octal!"644"; + } +} - @property bool empty() +/// Returns attributes with the same semantics as std.file.getAttributes +/// flag is Block.typeflag and mode is octal translation of Block.mode +uint toAttributes(Typeflag flag, uint mode) +{ + version (Posix) { - // handle .init - if (!buffer) - return true; + // tar mode contains stat.st_mode & 07777. + // we have to add the missing flags corresponding to file type + import std.conv : octal; + import std.format : format; - // more files to be processed - if (!entriesInput.empty) - return false; + switch (flag) + { + case Typeflag.normalNul: + case Typeflag.normal: + return mode | octal!100_000; + case Typeflag.hardLink: + // is regular file right for hard links? + return mode | octal!100_000; + case Typeflag.symLink: + return mode | octal!120_000; + case Typeflag.charSpecial: + return mode | octal!20_000; + case Typeflag.blockSpecial: + return mode | octal!60_000; + case Typeflag.directory: + return mode | octal!40_000; + case Typeflag.fifo: + return mode | octal!10_000; + case Typeflag.contiguousFile: + // is regular file right for contiguous files? + return mode | octal!100_000; + default: + throw new Exception(format!"Unexpected Tar entry type: '%s'"(cast(char) flag)); + } + } + else version (Windows) + { + // the same values as win32 GetFileAttributes + // (mode permission bits are irrelevant) + import core.sys.windows.winnt : FILE_ATTRIBUTE_DIRECTORY, FILE_ATTRIBUTE_NORMAL; - // current entry not exhausted - if (hasEntryChunks()) - return false; + switch (flag) + { + case Typeflag.normal: + return FILE_ATTRIBUTE_NORMAL; + case Typeflag.directory: + return FILE_ATTRIBUTE_DIRECTORY; + default: + // TODO: symbolic links in windows + return 0; + } + } +} - // some unconsumed flying data - if (chunk.length) - return false; +/// BlockInfo contains info for one 512 byte TAR block +struct BlockInfo +{ + static struct Block + { + // dfmt off + char [nameLen] name; // 0 0 + char [8] mode; // 100 64 + char [8] uid; // 108 6C + char [8] gid; // 116 74 + char [12] size; // 124 7C + char [12] mtime; // 136 88 + char [8] chksum; // 148 94 + Typeflag typeflag; // 156 9C + char [nameLen] linkname; // 157 9D + char [8] magic; // 257 101 + char [unameLen] uname; // 265 109 + char [unameLen] gname; // 297 129 + char [8] devmajor; // 329 149 + char [8] devminor; // 337 151 + char [prefixLen] prefix; // 345 159 + char [12] padding; // 500 1F4 + //dfmt on + + private uint checksum() + { + uint sum = 0; + sum += unsignedSum(name); + sum += unsignedSum(mode); + sum += unsignedSum(uid); + sum += unsignedSum(gid); + sum += unsignedSum(size); + sum += unsignedSum(mtime); + sum += 32 * 8; + sum += cast(uint) typeflag; + sum += unsignedSum(linkname); + sum += unsignedSum(magic); + sum += unsignedSum(uname); + sum += unsignedSum(gname); + sum += unsignedSum(devmajor); + sum += unsignedSum(devminor); + sum += unsignedSum(prefix); + return sum; + } + } + + static assert(Block.sizeof == blockLen); - return true; + string name; + uint mode; + int uid; + int gid; + size_t size; + long mtime; + Typeflag typeflag; + string linkname; + char[8] magic; + string uname; + string gname; + int devmajor; + int devminor; + string prefix; + + bool isNull; + + size_t encode(scope ubyte[] buffer) const + in (buffer.length >= Block.sizeof) + in (name.length <= Block.name.sizeof) + in (linkname.length <= Block.linkname.sizeof) + in (uname.length <= Block.uname.sizeof) + in (gname.length <= Block.gname.sizeof) + in (prefix.length <= Block.prefix.sizeof) + { + buffer[0 .. blockLen] = 0; + if (isNull) + return blockLen; + + Block* blk = cast(Block*)&buffer[0]; + + blk.name[0 .. name.length] = name; + toOctalString(mode, blk.mode[]); + toOctalString(uid, blk.uid[]); + toOctalString(gid, blk.gid[]); + toOctalString(size, blk.size[]); + toOctalString(mtime, blk.mtime[]); + blk.typeflag = typeflag; + blk.linkname[0 .. linkname.length] = linkname; + blk.magic = magic; + blk.uname[0 .. uname.length] = uname; + blk.gname[0 .. gname.length] = gname; + toOctalString(devmajor, blk.devmajor[]); + toOctalString(devminor, blk.devminor[]); + blk.prefix[0 .. prefix.length] = prefix; + + const checksum = blk.checksum(); + toOctalString(checksum, blk.chksum[]); + + return blockLen; } - @property ByteChunk front() + static BlockInfo decode(Cursor cursor) { - return chunk; + Block blk = void; + cursor.readValue(&blk); + + const computed = blk.checksum(); + const checksum = parseOctalString!uint(blk.chksum); + if (computed == 256 && checksum == 0) + { + // this is an empty header (only zeros) + // indicates end of archive + + // dfmt off + BlockInfo info = { + isNull: true, + }; + // dfmt on + return info; + } + + enforce( + checksum == computed, + format!"Invalid TAR checksum at 0x%08X\nExpected 0x%08x but found 0x%08x"( + cursor.pos - blockLen + blk.chksum.offsetof, + computed, checksum) + ); + + BlockInfo info = { + name: parseString(blk.name).idup, + mode: parseOctalString!uint(blk.mode), + uid: parseOctalString!uint(blk.uid), + gid: parseOctalString!uint(blk.uid), + size: parseOctalString!size_t(blk.size), + mtime: parseOctalString!long(blk.mtime), + typeflag: blk.typeflag, + linkname: parseString(blk.linkname).idup, + magic: blk.magic, + uname: parseString(blk.uname).idup, + gname: parseString(blk.gname).idup, + devmajor: parseOctalString!int(blk.devmajor), + devminor: parseOctalString!int(blk.devminor), + prefix: parseString(blk.prefix).idup, + }; + + return info; } +} - void popFront() +void ensureLen(ref ubyte[] buffer, size_t len) +{ + if (buffer.length < len) + buffer.length = len; +} + +size_t encodeLongGnu(ref ubyte[] buffer, size_t offset, string name, Typeflag typeflag) +{ + const l512 = next512(name.length); + buffer.ensureLen(offset + blockLen + l512); + + BlockInfo gnu = { + name: "././@LongLink", + size: name.length, + typeflag: typeflag, + magic: gnuMagic, + }; + gnu.encode(buffer[offset .. $]); + + buffer[offset + blockLen .. offset + blockLen + name.length] = name.representation; + buffer[offset + blockLen + name.length .. offset + blockLen + l512] = 0; + + return blockLen + l512; +} + +/// TarInfo contains info for one TAR header, which is one or more TAR blocks +struct TarInfo +{ + string name; + uint attributes; + int ownerId; + int groupId; + size_t size; + SysTime mtime; + string linkname; + EntryType type; + string uname; + string gname; + int devmajor; + int devminor; + + size_t entrySize; + bool isNull; + + // encode this header in buffer, starting at offset + // buffer can potentially be grown if too small + size_t encode(ref ubyte[] buffer, size_t offset) { - if (!moreToRead()) + import std.algorithm : max; + import std.string : representation; + + if (isNull) + { + buffer.ensureLen(offset + blockLen); + buffer[offset .. offset + blockLen] = 0; + return blockLen; + } + + size_t encoded; + + string nm = name; + string lk = linkname; + string px; + if (nm.length > nameLen) { - if (footer >= footerLen) + const pn = splitLongName(nm); + if (pn[0] != null) { - chunk = null; + px = pn[0]; + nm = pn[1]; } else { - import std.algorithm : min; - - const len = min(buffer.length, footerLen - footer); - buffer[0 .. len] = 0; - chunk = buffer[0 .. len]; - footer += len; + encoded += buffer.encodeLongGnu(offset + encoded, nm, Typeflag.gnuLongname); + nm = null; } - return; + } + if (lk.length > nameLen) + { + encoded += buffer.encodeLongGnu(offset + encoded, lk, Typeflag.gnuLonglink); + lk = null; + } + + buffer.ensureLen(offset + encoded + blockLen); + + BlockInfo blk = { + name: nm, + mode: toTarMode(attributes), + uid: ownerId, + gid: groupId, + size: size, + mtime: max(0, mtime.toUnixTime()), + linkname: lk, + typeflag: toTypeflag(type), + magic: posixMagic, + uname: uname, + gname: gname, + devmajor: devmajor, + devminor: devminor, + prefix: px, + }; + + return encoded + blk.encode(buffer[offset + encoded .. $]); + } + + static TarInfo decode(Cursor cursor) + { + auto blk = BlockInfo.decode(cursor); + if (blk.isNull) + { + // dfmt off + TarInfo info = { + entrySize: blockLen, + isNull: true, + }; + // dfmt on + return info; + } + + switch (blk.typeflag) + { + case Typeflag.normalNul: + case Typeflag.normal: + case Typeflag.hardLink: + case Typeflag.symLink: + case Typeflag.charSpecial: + case Typeflag.blockSpecial: + case Typeflag.directory: + case Typeflag.fifo: + case Typeflag.contiguousFile: + case Typeflag.posixExtended: + case Typeflag.extended: + return decodeHeader(blk); + case Typeflag.gnuLongname: + case Typeflag.gnuLonglink: + return decodeGnuLongHeader(cursor, blk); + default: + const msg = format!"Unknown TAR typeflag: '%s'\nWhen extracting \"%s\"."( + cast(char) blk.typeflag, blk.prefix ~ "/" ~ blk.name + ); + throw new Exception(msg); + } + } + + private static TarInfo decodeHeader(scope ref BlockInfo blk) + { + TarInfo info = { + name: blk.name, + attributes: toAttributes(blk.typeflag, blk.mode), + ownerId: blk.uid, + groupId: blk.gid, + size: blk.size, + mtime: SysTime(unixTimeToStdTime(blk.mtime)), + type: toEntryType(blk.typeflag), + linkname: blk.linkname, + uname: blk.uname, + gname: blk.gname, + devmajor: blk.devmajor, + devminor: blk.devminor, + + entrySize: blockLen + next512(blk.size), + isNull: false, + }; + + if (blk.prefix.length != 0) + { + info.name = blk.prefix ~ "/" ~ blk.name; + } + + version (Windows) + { + info.name = info.name.replace('\\', '/'); + info.linkname = info.linkname.replace('\\', '/'); + } + + return info; + } + + private static TarInfo decodeGnuLongHeader(Cursor cursor, scope ref BlockInfo blk) + { + auto data = new char[next512(blk.size)]; + enforce(cursor.read(data).length == data.length, "Unexpected end of input"); + const name = parseString(assumeUnique(data)); + + auto next = TarInfo.decode(cursor); + next.entrySize += (blockLen + data.length); + + switch (blk.typeflag) + { + case Typeflag.gnuLongname: + next.name = name; + break; + case Typeflag.gnuLonglink: + next.linkname = name; + break; + default: + assert(false); + } + + if (next.type == EntryType.directory && !next.name.empty && next.name[$ - 1] == '/') + next.name = next.name[0 .. $ - 1]; + + return next; + } +} + +/// Splits long name into prefix and shorter name if it the name exceeds +/// the length of the tar header name field. +/// If the name is longer than prefix + name fields length, name is returned +/// unchanged. +/// On Windows, the path must be converted to Posix path (with '/' separator) +/// Returns: [prefix, name] +string[2] splitLongName(string name) +{ + if (name.length < nameLen) + return [null, name]; + if (name.length > nameLen + prefixLen) + return [null, name]; + + foreach (i; 0 .. name.length) + { + if (name[i] == '/') + { + const p = name[0 .. i]; + const n = name[i + 1 .. $]; + if (p.length <= prefixLen && n.length <= nameLen) + return [p, n]; + } + } + + return [null, name]; +} + +@("tar.splitPrefixName") +unittest +{ + enum shortPath = "some/short/path"; + enum veryLongPath = "some/very/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/path"; + + enum longPath = "some/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/long/long/long/long/long/long/long/long/long/long/path"; + enum longPrefix = "some/long/long/long/long/long/long"; + enum longName = "long/long/long/long/long/long/long/long/long/long/long/long/long/long/long" + ~ "/long/long/long/long/path"; + + static assert(veryLongPath.length > 255); + static assert(longPath.length > 100); + static assert(longPath.length < 155); + static assert(longPrefix ~ "/" ~ longName == longPath); + + assert(splitLongName(shortPath) == [null, shortPath]); + assert(splitLongName(veryLongPath) == [null, veryLongPath]); + assert(splitLongName(longPath) == [longPrefix, longName]); +} + +struct TarBox(I) +{ + I entries; + size_t chunkSize; + + size_t written; + ubyte[] buffer; + const(ubyte)[] chunk; + + // current entry being processed + ubyte[] remainHeader; + BoxEntry entry; + ByteRange entryChunks; + const(ubyte)[] entryChunk; + size_t padSize; + + // footer + size_t footerSize; + + this(I entries, size_t chunkSize) + { + import std.algorithm : max; + + this.entries = entries; + this.chunkSize = chunkSize; + this.buffer = new ubyte[max(512, chunkSize)]; + this.footerSize = blockLen * 2; + + popFront(); + } + + @property bool empty() + { + return chunk.length == 0; + } + + @property ByteChunk front() + { + return chunk; + } + + void popFront() + { + chunk = null; + scope (success) + { + written += chunk.length; } - while (avail.length && moreToRead) + while (!remainHeader.empty || padSize != 0 || hasEntryChunks || !entries.empty) { - nextBlock(); - chunk = buffer[0 .. $ - avail.length]; + if (nextRemainHeader()) + return; + + if (fillPad()) + return; + + if (nextEntryChunk()) + return; + + if (nextHeader()) + return; } - avail = buffer; + + footerSize -= fillZeros(footerSize); + } + + private size_t pos() + { + return written + chunk.length; } private bool hasEntryChunks() { - return entryChunks && !entryChunks.empty; + return (entryChunks && !entryChunks.empty) || !entryChunk.empty; } - private bool moreToRead() + private bool nextHeader() { - return !entriesInput.empty || hasEntryChunks(); + import std.algorithm : min; + + assert(chunk.length < chunkSize); + assert(remainHeader.empty); + assert(!hasEntryChunks); + + if (entries.empty) + return false; + + entry = entries.front; + entries.popFront(); + entryChunks = entry.byChunk(chunkSize); + if (!entryChunks.empty) + entryChunk = entryChunks.front; + + // common fields + TarInfo info = { + name: entry.path, + attributes: entry.attributes, + size: entry.size, + mtime: entry.timeLastModified, + type: entry.type, + linkname: entry.linkname, + }; + + version (Posix) + { + import core.sys.posix.grp; + import core.sys.posix.pwd; + import core.stdc.string : strlen; + import std.conv : octal; + + char[512] buf; + + info.ownerId = entry.ownerId; + info.groupId = entry.groupId; + + if (info.ownerId != 0) + { + passwd pwdbuf; + passwd* pwd; + if (getpwuid_r(info.ownerId, &pwdbuf, buf.ptr, buf.length, &pwd) == 0) + { + const len = min(strlen(pwd.pw_name), unameLen); + info.uname = pwd.pw_name[0 .. len].idup; + } + } + if (info.groupId != 0) + { + group grpbuf; + group* grp; + if (getgrgid_r(info.groupId, &grpbuf, buf.ptr, buf.length, &grp) == 0) + { + const len = min(strlen(grp.gr_name), unameLen); + info.gname = grp.gr_name[0 .. len].idup; + } + } + } + else version (Windows) + { + // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- + } + + const len = info.encode(buffer, chunk.length); + assert(buffer.length >= chunk.length + len); + + const chunkTo = min(chunk.length + len, chunkSize); + if (chunk.length + len > chunkSize) + remainHeader = buffer[chunkSize .. chunk.length + len]; + + chunk = buffer[0 .. chunkTo]; + return chunkTo == chunkSize; } - private void nextBlock() - in (avail.length >= 512) + // fill chunk with what remains of previous header (if any) + private bool nextRemainHeader() { - if (!entry || !hasEntryChunks()) + import std.algorithm : min; + + if (remainHeader.empty) + return false; + + if (chunk.empty && remainHeader.length > chunkSize) { - enforce(!entriesInput.empty); - entry = entriesInput.front; - entriesInput.popFront(); - avail = TarHeader.fillWith(entry, avail); - entryChunks = entry.byChunk(512); + chunk = remainHeader[0 .. chunkSize]; + remainHeader = remainHeader[chunkSize .. $]; + return true; } - else + + const len = min(chunkSize - chunk.length, remainHeader.length); + buffer[chunk.length .. chunk.length + len] = remainHeader[0 .. len]; + remainHeader = remainHeader[len .. $]; + chunk = buffer[0 .. chunk.length + len]; + + return chunk.length == chunkSize; + } + + // fill chunk with next chunk of current entry + private bool nextEntryChunk() + { + import std.algorithm : min; + + assert(chunk.length < chunkSize); + assert(padSize == 0); + assert(remainHeader.empty); + + while (hasEntryChunks() && chunk.length < chunkSize) { - auto filled = entryChunks.front; - avail[0 .. filled.length] = filled; - avail = avail[filled.length .. $]; - entryChunks.popFront(); - if (entryChunks.empty) + if (entryChunk.empty) { - const pad = avail.length % 512; - avail[0 .. pad] = 0; - avail = avail[pad .. $]; + entryChunks.popFront(); + if (!entryChunks.empty) + entryChunk = entryChunks.front; + + if (entryChunk.empty) + break; } + + if (chunk.empty && entryChunk.length >= chunkSize) + { + // can directly use entryChunk without copying + chunk = entryChunk[0 .. chunkSize]; + entryChunk = entryChunk[chunkSize .. $]; + break; + } + + // copy slice into buffer + const len = min(chunkSize - chunk.length, entryChunk.length); + buffer[chunk.length .. chunk.length + len] = entryChunk[0 .. len]; + chunk = buffer[0 .. chunk.length + len]; + entryChunk = entryChunk[len .. $]; } + + if (!hasEntryChunks()) + { + padSize = next512(pos) - pos; + padSize -= fillZeros(padSize); + } + + return chunk.length == chunkSize; + } + + size_t fillZeros(size_t zeros) + { + import std.algorithm : min; + + const len = min(chunkSize - chunk.length, zeros); + buffer[chunk.length .. chunk.length + len] = 0; + chunk = buffer[0 .. chunk.length + len]; + return len; + } + + bool fillPad() + { + if (padSize != 0) + padSize -= fillZeros(padSize); + + return chunk.length == chunkSize; } } @@ -289,7 +941,6 @@ private struct TarUnbox // current header data private size_t _next; - private ubyte[] _block; private UnboxEntry _entry; private Flag!"removePrefix" _removePrefix; private string _prefix; @@ -298,11 +949,9 @@ private struct TarUnbox { _input = input; _removePrefix = removePrefix; - _block = new ubyte[512]; - // file with zero bytes is a valid tar file if (!_input.eoi) - readHeaderBlock(); + popFront(); } @property bool empty() @@ -319,6 +968,8 @@ private struct TarUnbox { assert(_input.pos <= _next); + _entry = null; + if (_input.pos < _next) { // the current entry was not fully read, we move the stream forward @@ -326,116 +977,53 @@ private struct TarUnbox const dist = _next - _input.pos; _input.ffw(dist); } - readHeaderBlock(); - } - - private void readHeaderBlock() - { - import std.conv : to; - - enforce(_input.read(_block).length == 512, "Unexpected end of input"); - TarHeader* th = cast(TarHeader*) _block.ptr; + auto info = TarInfo.decode(_input); - const computed = th.unsignedChecksum(); - const checksum = parseOctalString(th.chksum); - - if (computed == 256 && checksum == 0) + if (info.isNull) { - // this is an empty header (only zeros) - // indicates end of archive - while (!_input.eoi) - { _input.ffw(512); - } - return; - } - - enforce( - checksum == computed, - "Invalid TAR checksum at 0x" ~ ( - _input.pos - 512 + th.chksum.offsetof) - .to!string(16) ~ - "\nExpected " ~ computed.to!string ~ " but found " ~ checksum.to!string, - ); - - if (th.typeflag == Typeflag.posixExtended || th.typeflag == Typeflag.extended) - { - // skipping extended Tar headers - const sz = next512(parseOctalString!size_t(th.size)); - _input.ffw(sz); - readHeaderBlock(); return; } - TarEntryInfo info; - info.path = (parseString(th.prefix) ~ parseString(th.name)).idup; - info.type = toEntryType(th.typeflag); - info.linkname = parseString(th.linkname).idup; - info.size = parseOctalString!size_t(th.size); - info.entrySize = 512 + next512(info.size); - info.timeLastModified = SysTime(unixTimeToStdTime(parseOctalString!ulong(th.mtime))); - version (Posix) - { - // tar mode contains stat.st_mode & 07777. - // we have to add the missing flags corresponding to file type - // (and by no way tar mode is meaningful on Windows) - const filetype = posixModeFileType(th.typeflag); - info.attributes = parseOctalString(th.mode) | filetype; - info.ownerId = parseOctalString(th.uid); - info.groupId = parseOctalString(th.gid); - } - - version (Windows) - info.path = info.path.replace('\\', '/'); - if (_removePrefix) { - import std.algorithm : min; - - const pref = enforce(entryPrefix(info.path, info.type), format!`"%s": no prefix to be removed`(info.path)); - - if (!_prefix) - _prefix = pref; - - enforce (_prefix == pref, format!`"%s": path prefix mismatch with "%s"`(info.path, _prefix)); - - const len = min(info.path.length, _prefix.length); - info.path = info.path[len .. $]; + info.name = removePrefix(info.name, info.type); // skipping empty directory - if (!info.path.length && info.type == EntryType.directory) + while (!info.name.length && info.type == EntryType.directory) { - _next = next512(_input.pos + info.size); - readHeaderBlock(); + info = TarInfo.decode(_input); + info.name = removePrefix(info.name, info.type); } } _entry = new TarUnboxEntry(_input, info); _next = next512(_input.pos + info.size); } -} -static assert(isUnboxEntryRange!TarUnbox); + private string removePrefix(string name, EntryType type) + { + import std.algorithm : min; -struct TarEntryInfo -{ - string path; - string linkname; - EntryType type; - ulong size; - ulong entrySize; - SysTime timeLastModified; - uint attributes; + const pref = enforce(entryPrefix(name, type), format!`"%s": no prefix to be removed`( + name)); - version (Posix) - { - int ownerId; - int groupId; + if (!_prefix) + _prefix = pref; + + enforce(_prefix == pref, format!`"%s": path prefix mismatch with "%s"`(name, _prefix)); + + const len = min(name.length, _prefix.length); + name = name[len .. $]; + + return name; } } +static assert(isUnboxEntryRange!TarUnbox); + private class TarUnboxEntry : UnboxEntry { import std.stdio : File; @@ -443,9 +1031,9 @@ private class TarUnboxEntry : UnboxEntry private Cursor _input; private size_t _start; private size_t _end; - private TarEntryInfo _info; + private TarInfo _info; - this(Cursor input, TarEntryInfo info) + this(Cursor input, TarInfo info) { _input = input; _start = input.pos; @@ -460,7 +1048,7 @@ private class TarUnboxEntry : UnboxEntry @property string path() { - return _info.path; + return _info.name; } @property EntryType type() @@ -485,7 +1073,7 @@ private class TarUnboxEntry : UnboxEntry @property SysTime timeLastModified() { - return _info.timeLastModified; + return _info.mtime; } @property uint attributes() @@ -518,214 +1106,6 @@ private class TarUnboxEntry : UnboxEntry } } -private struct TarHeader -{ - // dfmt off - char [100] name; // 0 0 - char [8] mode; // 100 64 - char [8] uid; // 108 6C - char [8] gid; // 116 74 - char [12] size; // 124 7C - char [12] mtime; // 136 88 - char [8] chksum; // 148 94 - Typeflag typeflag; // 156 9C - char [100] linkname; // 157 9D - char [6] magic; // 257 101 - char [2] version_; // 263 107 - char [32] uname; // 265 109 - char [32] gname; // 297 129 - char [8] devmajor; // 329 149 - char [8] devminor; // 337 151 - char [155] prefix; // 345 159 - char [12] padding; // 500 1F4 - //dfmt on - - private static ubyte[] fillWith(ArchiveEntry file, ubyte[] block) - in (block.length >= 512) - { - import std.algorithm : min; - import std.string : toStringz; - - version (Posix) - { - char[512] buf; - } - - block[0 .. 512] = 0; - - TarHeader* th = cast(TarHeader*)(&block[0]); - - // prefix and name - const name = file.path; - const prefLen = name.length > 100 ? cast(ptrdiff_t) name.length - 100 : 0; - if (prefLen) - th.prefix[0 .. prefLen] = name[0 .. prefLen]; - th.name[0 .. name.length - prefLen] = name[prefLen .. $]; - - th.typeflag = toTypeflag(file.type); - - if (th.typeflag == Typeflag.symLink) - { - const lname = file.linkname; - const len = min(lname.length, cast(ptrdiff_t) th.linkname.length - 1); - th.linkname[0 .. len] = lname[0 .. len]; - } - - version (Posix) - { - import core.sys.posix.grp; - import core.sys.posix.pwd; - import core.stdc.string : strlen; - import std.conv : octal; - - const uid = file.ownerId; - const gid = file.groupId; - - toOctalString(file.attributes & octal!7777, th.mode[0 .. $ - 1]); - toOctalString(uid, th.uid[0 .. $ - 1]); - toOctalString(gid, th.gid[0 .. $ - 1]); - - if (uid != 0) - { - passwd pwdbuf; - passwd* pwd; - enforce(getpwuid_r(uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0, "Could not read user name"); - const urlen = min(strlen(pwd.pw_name), th.uname.length); - th.uname[0 .. urlen] = pwd.pw_name[0 .. urlen]; - } - - if (gid != 0) - { - group grpbuf; - group* grp; - enforce(getgrgid_r(gid, &grpbuf, buf.ptr, buf.length, &grp) == 0, "Could not read group name"); - const grlen = min(strlen(grp.gr_name), th.gname.length); - th.gname[0 .. grlen] = grp.gr_name[0 .. grlen]; - } - } - else version (Windows) - { - // default to mode 644 which is the most common on UNIX - th.mode[0 .. 7] = "0000644"; - - // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- - } - - toOctalString(file.size, th.size[0 .. $ - 1]); - const mtime = file.timeLastModified().toUnixTime!long(); - toOctalString(mtime, th.mtime[0 .. $ - 1]); - - th.magic = "ustar\0"; - th.version_ = "00"; - - const chksum = th.unsignedChecksum(); - - toOctalString(chksum, th.chksum[0 .. $ - 1]); - - return block[512 .. $]; - } - - private uint unsignedChecksum() - { - uint sum = 0; - sum += unsignedSum(name); - sum += unsignedSum(mode); - sum += unsignedSum(uid); - sum += unsignedSum(gid); - sum += unsignedSum(size); - sum += unsignedSum(mtime); - sum += 32 * 8; - sum += cast(uint) typeflag; - sum += unsignedSum(linkname); - sum += unsignedSum(magic); - sum += unsignedSum(version_); - sum += unsignedSum(uname); - sum += unsignedSum(gname); - sum += unsignedSum(devmajor); - sum += unsignedSum(devminor); - sum += unsignedSum(prefix); - return sum; - } -} - -static assert(TarHeader.sizeof == 512); - -private enum Typeflag : ubyte -{ - normalNul = 0, - normal = '0', - hardLink = '1', - symLink = '2', - charSpecial = '3', - blockSpecial = '4', - directory = '5', - fifo = '6', - contiguousFile = '7', - posixExtended = 'g', - extended = 'x', -} - -Typeflag toTypeflag(EntryType type) -{ - final switch (type) - { - case EntryType.regular: - return Typeflag.normal; - case EntryType.directory: - return Typeflag.directory; - case EntryType.symlink: - return Typeflag.symLink; - } -} - -EntryType toEntryType(Typeflag flag) -{ - switch (flag) - { - case Typeflag.directory: - return EntryType.directory; - case Typeflag.symLink: - return EntryType.symlink; - default: - return EntryType.regular; - } -} - -version (Posix) -{ - // stat.st_mode part corresponding to file type - uint posixModeFileType(Typeflag flag) - { - import std.conv : octal; - import std.format : format; - - switch (flag) - { - case Typeflag.normalNul: - case Typeflag.normal: - return octal!100_000; - case Typeflag.hardLink: - // is regular file right for hard links? - return octal!100_000; - case Typeflag.symLink: - return octal!120_000; - case Typeflag.charSpecial: - return octal!20_000; - case Typeflag.blockSpecial: - return octal!60_000; - case Typeflag.directory: - return octal!40_000; - case Typeflag.fifo: - return octal!10_000; - case Typeflag.contiguousFile: - // is regular file right for contiguous files? - return octal!100_000; - default: - throw new Exception(format!"Unexpected Tar entry type: '%s'"(cast(char) flag)); - } - } -} - private uint unsignedSum(const(char)[] buf) { uint sum; @@ -740,7 +1120,8 @@ private void toOctalString(T)(T val, char[] buf) { import std.format : sformat; - sformat(buf, "%0*o", buf.length, val); + sformat(buf[0 .. $ - 1], "%0*o", buf.length - 1, val); + buf[$ - 1] = '\0'; } private T parseOctalString(T = uint)(const(char)[] octal) @@ -759,12 +1140,13 @@ private T parseOctalString(T = uint)(const(char)[] octal) return parse!(T)(src, 8); } -private char[] parseString(char[] chars) +private inout(char)[] parseString(inout(char)[] chars) { - import core.stdc.string : strlen; - - const len = strlen(chars.ptr); - return chars[0 .. len]; + // function similar to strnlen, but operate on slices. + size_t count; + while (count < chars.length && chars[count] != '\0') + count++; + return chars[0 .. count]; } private size_t next512(size_t off) diff --git a/src/squiz_box/priv.d b/src/squiz_box/priv.d index f9cbe73..52ec2e4 100644 --- a/src/squiz_box/priv.d +++ b/src/squiz_box/priv.d @@ -63,7 +63,7 @@ interface Cursor /// Read T.sizeof data and returns it as a T. /// Similar to getValue!T but the value is passed as pointer to be filled in. /// Prefer this form for greater values (e.g. dozens of bytes) - void readValue(T)(T* val) if (!isDynamicArray!T) + void readValue(T)(scope T* val) if (!isDynamicArray!T) { import std.exception : enforce; @@ -75,7 +75,7 @@ interface Cursor T[] read(T)(T[] buffer) { - auto ptr = cast(ubyte)&buffer[0]; + auto ptr = cast(ubyte*)&buffer[0]; auto arr = ptr[0 .. buffer.length * T.sizeof]; auto res = read(arr); enforce(res.length % T.sizeof == 0, "Could not read aligned bytes for " ~ T.stringof); diff --git a/src/squiz_box/squiz.d b/src/squiz_box/squiz.d index fdc754e..b448290 100644 --- a/src/squiz_box/squiz.d +++ b/src/squiz_box/squiz.d @@ -74,6 +74,20 @@ template isByteRange(BR) static assert(isByteRange!ByteRange); +private struct EmptyByteRange +{ + enum bool empty = true; + enum const(ubyte)[] front = []; + void popFront() + { + } +} + +static assert(isByteRange!EmptyByteRange); + +/// An empty range of bytes +enum emptyByteRange = EmptyByteRange(); + /// Exception thrown when inconsistent data is given to /// a decompression algorithm. /// I.e. the data was not compressed with the corresponding algorithm @@ -573,7 +587,7 @@ private struct Squiz(I, A, Flag!"endStream" endStream) const len = min(chunkBuffer.length - chunk.length, maxLen); stream.output = chunkBuffer[chunk.length .. chunk.length + len]; - const streamEnded = algo.process(stream, cast(Flag!"lastChunk") (input.empty && lastInput)); + const streamEnded = algo.process(stream, cast(Flag!"lastChunk")(input.empty && lastInput)); chunk = chunkBuffer[0 .. $ - stream.output.length]; maxLen -= len; @@ -1157,7 +1171,7 @@ struct Inflate assert( (windowBits == 0 && format == ZlibFormat.zlib) || (9 <= windowBits && windowBits <= 15), - "inconsistent windowBits" + "inconsistent windowBits" ); int wb = windowBits; final switch (format) @@ -1460,7 +1474,7 @@ version (HaveSquizBzip2) enforce( (action == BZ_RUN && res == BZ_RUN_OK) || (action == BZ_FINISH && res == BZ_FINISH_OK), - "Bzip2 compress failed with code: " ~ bzResultToString(res) + "Bzip2 compress failed with code: " ~ bzResultToString(res) ); return No.streamEnded; diff --git a/test/archive.d b/test/archive.d index 9616684..4d04efd 100644 --- a/test/archive.d +++ b/test/archive.d @@ -3,9 +3,10 @@ module test.archive; import test.util; import squiz_box; -import std.typecons; import std.digest; import std.digest.sha; +import std.stdio; +import std.typecons; string[] filesForArchive() { @@ -24,6 +25,12 @@ void testTarArchiveContent(string archivePath, Flag!"testModes" testModes, Flag! import std.regex : matchFirst; import std.string : splitLines; + if (!findProgram("tar")) + { + stderr.writeln("tar not found: skipping assertions"); + return; + } + if (testModes) { const line1 = `^-rw-r--r-- .+ 7 .+ file1.txt$`; @@ -32,8 +39,20 @@ void testTarArchiveContent(string archivePath, Flag!"testModes" testModes, Flag! `^-rw-rw-rw- .+ 26 .+ folder.+chmod 666.txt$` : `^-rw-r--r-- .+ 26 .+ folder.+chmod 666.txt$`; - auto res = execute(["tar", "-tvf", archivePath]); + auto res = execute(["tar", "-tvf", archivePath], ["MM_CHARSET":"UTF-8"]); assert(res.status == 0); + + version (Windows) + { + import std.encoding : transcode; + + // some tar versions of windows use Latin1 encoding + dchar[] buf; + foreach (char c; res.output) + buf ~= cast(dchar)c; + transcode(buf, res.output); + } + const lines = res.output.splitLines(); assert(lines.length == 3); assert(matchFirst(lines[0], line1)); @@ -70,6 +89,12 @@ void testZipArchiveContent(string archivePath) import std.regex : matchFirst; import std.string : splitLines; + if (!findProgram("unzip")) + { + stderr.writeln("unzip not found: skipping assertions"); + return; + } + const line1 = `^\s*7\s.+file1.txt$`; const line2 = `^\s*3521\s.+file 2.txt$`; const line3 = `^\s*26\s.+folder/chmod 666.txt$`; @@ -82,25 +107,19 @@ void testZipArchiveContent(string archivePath) assert(matchFirst(lines[4], line2)); assert(matchFirst(lines[5], line3)); - const archiveShell = escapeShellFileName(archivePath); - auto sha1sumFile(string filename) { - const fileShell = escapeShellFileName(filename); - return executeShell("unzip -p " ~ archiveShell ~ " " ~ fileShell ~ " | sha1sum"); + return sha1sumProcessStdout(["unzip", "-p", archivePath, filename]); } - res = sha1sumFile("file1.txt"); - assert(res.status == 0); - assert(res.output.canFind("38505a984f71c07843a5f3e394ada2bf4c7b6abc")); + auto sha1 = sha1sumFile("file1.txt"); + assert(sha1 == "38505A984F71C07843A5F3E394ADA2BF4C7B6ABC"); - res = sha1sumFile("file 2.txt"); - assert(res.status == 0); - assert(res.output.canFind("01fa4c5c29a58449eef1665658c48c0d7829c45f")); + sha1 = sha1sumFile("file 2.txt"); + assert(sha1 == "01FA4C5C29A58449EEF1665658C48C0D7829C45F"); - res = sha1sumFile("folder/chmod 666.txt"); - assert(res.status == 0); - assert(res.output.canFind("3e31b8e6b2bbba1edfcfdca886e246c9e120bbe3")); + sha1 = sha1sumFile("folder/chmod 666.txt"); + assert(sha1 == "3E31B8E6B2BBBA1EDFCFDCA886E246C9E120BBE3"); } void testExtractedFiles(DM)(auto ref DM dm, Flag!"mode666" mode666) @@ -175,8 +194,9 @@ unittest } else { - const attr644 = 0; - const attr666 = 0; + import core.sys.windows.winnt : FILE_ATTRIBUTE_NORMAL; + const attr644 = FILE_ATTRIBUTE_NORMAL; + const attr666 = FILE_ATTRIBUTE_NORMAL; } const expectedEntries = [ @@ -412,32 +432,31 @@ version (HaveSquizLzma) unittest { import std.algorithm; + import std.net.curl : byChunk, CurlException; import std.file; - import std.net.curl; import std.path; - import std.range; import std.stdio; const url = "https://github.com/rtbo/squiz-box/archive/refs/tags/v0.2.1.zip"; - - auto file = buildPath(tempDir(), "squiz-box-0.2.1.zip"); auto dir = buildPath(tempDir(), "squiz-box-0.2.1"); - download(url, file); mkdirRecurse(dir); - - version (Posix) + scope (exit) { - scope (exit) - remove(file); - scope (exit) - rmdirRecurse(dir); + rmdirRecurse(dir); } - unboxZip(File(file, "rb"), Yes.removePrefix) - .each!(e => e.extractTo(dir)); + try + { + byChunk(url) + .unboxZip(Yes.removePrefix) + .each!(e => e.extractTo(dir)); - assert(isFile(buildPath(dir, "meson.build"))); + assert(isFile(buildPath(dir, "meson.build"))); + assert(isFile(buildPath(dir, "test", "archive.d"))); + } + catch (CurlException) + {} } @("Extract 7z") diff --git a/test/tar.d b/test/tar.d new file mode 100644 index 0000000..f70cfce --- /dev/null +++ b/test/tar.d @@ -0,0 +1,229 @@ +module test.tar; + +import squiz_box; + +import std.algorithm; +import std.array; +import std.conv; +import std.range; +import std.stdio; +import std.string; +import std.typecons; + +enum blockLen = 512; +enum nameLen = 100; +enum unameLen = 32; +enum prefixLen = 155; + +enum char[8] posixMagic = "ustar\x0000"; +enum char[8] gnuMagic = "ustar \x00"; + +struct Block +{ + // dfmt off + char [nameLen] name; // 0 0 + char [8] mode; // 100 64 + char [8] uid; // 108 6C + char [8] gid; // 116 74 + char [12] size; // 124 7C + char [12] mtime; // 136 88 + char [8] chksum; // 148 94 + char typeflag; // 156 9C + char [nameLen] linkname; // 157 9D + char [8] magic; // 257 101 + char [unameLen] uname; // 265 109 + char [unameLen] gname; // 297 129 + char [8] devmajor; // 329 149 + char [8] devminor; // 337 151 + char [prefixLen] prefix; // 345 159 + char [12] padding; // 500 1F4 + //dfmt on +} + +@("read/write name 100 chars") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(9).join("/") ~ "/123456.txt"; + + assert(filename.length == nameLen); + + // dfmt off + const tarData = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)) + ) + .boxTar() + .join(); + // dfmt on + + // 0 file block + // 1 file data + // 2 footer (x2) + assert(tarData.length == 4 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(blk.name == filename); + assert(blk.prefix[0] == '\0'); + assert(tarData[1 * blockLen .. $].startsWith(content)); + + assert(tarData[2 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) + .unboxTar() + .map!(e => tuple(e.path, e.type, e.size, cast(ByteChunk) e.readContent())) + .array; + + assert(entries.length == 1); + assert(entries[0] == tuple( + filename, + EntryType.regular, + content.length, + content, + )); +} + +@("read/write split prefix") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(11).join("/") ~ "/file.txt"; + + assert(filename.length > nameLen && filename.length < nameLen + prefixLen); + + // dfmt off + const tarData = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)) + ) + .boxTar() + .join(); + // dfmt on + + // 0 file block + // 1 file data + // 2 footer (x2) + assert(tarData.length == 4 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(blk.prefix[0 .. 9] == "long-path"); + assert(tarData[1 * blockLen .. $].startsWith(content)); + + assert(tarData[2 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) + .unboxTar() + .map!(e => tuple(e.path, e.type, e.size, cast(ByteChunk) e.readContent())) + .array; + + assert(entries.length == 1); + assert(entries[0] == tuple( + filename, + EntryType.regular, + content.length, + content, + )); +} + +@("read/write gnulong #17") +unittest +{ + const content = cast(ByteChunk)("the content of the file".representation); + const filename = "long-path".repeat(55).join("/") ~ "/file.txt"; + const linkname = "long-path".repeat(55).join("/") ~ "/link.txt"; + + assert(filename.length > nameLen + prefixLen); + assert(linkname.length > nameLen + prefixLen); + + // dfmt off + const tarData = only( + infoEntry(BoxEntryInfo( + path: filename, + type: EntryType.regular, + size: content.length, + attributes: octal!"100644", + ), + only(content)), + infoEntry(BoxEntryInfo( + path: linkname, + type: EntryType.symlink, + linkname: filename, + attributes: octal!"100644", + ))) + .boxTar() + .join(); + // dfmt on + + // 0 file name block + // 1 file name data (x2) + // 3 file block + // 4 file data + // 5 link name block + // 6 link name data (x2) + // 8 link linkname block + // 9 link linkname data (x2) + // 11 link block + // 12 footer (x2) + assert(tarData.length == 14 * blockLen); + + const(Block)* blk = cast(const(Block)*)&tarData[0]; + assert(blk.typeflag == 'L'); + assert(blk.magic == gnuMagic); + assert(tarData[blockLen .. $].startsWith(filename.representation)); + + blk = cast(const(Block)*)&tarData[3 * blockLen]; + assert(blk.typeflag == '0'); + assert(blk.magic == posixMagic); + assert(tarData[4 * blockLen .. $].startsWith(content)); + + blk = cast(const(Block)*)&tarData[5 * blockLen]; + assert(blk.typeflag == 'L'); + assert(blk.magic == gnuMagic); + assert(tarData[6 * blockLen .. $].startsWith(linkname.representation)); + + blk = cast(const(Block)*)&tarData[8 * blockLen]; + assert(blk.typeflag == 'K'); + assert(blk.magic == gnuMagic); + assert(tarData[9 * blockLen .. $].startsWith(filename.representation)); + + blk = cast(const(Block)*)&tarData[11 * blockLen]; + assert(blk.typeflag == '2'); + assert(blk.magic == posixMagic); + + assert(tarData[14 * blockLen .. $].all!"a == 0"); + + const entries = only(tarData) + .unboxTar() + .map!(e => tuple(e.path, e.type, e.linkname, e.size, cast(ByteChunk) e.readContent())) + .array; + + assert(entries.length == 2); + assert(entries[0] == tuple( + filename, + EntryType.regular, + cast(string) null, + content.length, + content, + )); + assert(entries[1] == tuple( + linkname, + EntryType.symlink, + filename, + ulong(0), + cast(ByteChunk) null, + )); +} diff --git a/test/util.d b/test/util.d index cc22cd0..909bda1 100644 --- a/test/util.d +++ b/test/util.d @@ -50,6 +50,51 @@ string testPath(Args...)(Args args) return buildNormalizedPath(__FILE_FULL_PATH__.dirName(), args); } +/// Find a program executable name in the system PATH and return its full path +string findProgram(in string name) +{ + import std.process : environment; + + version (Windows) + { + import std.algorithm : endsWith; + + const efn = name.endsWith(".exe") ? name : name ~ ".exe"; + } + else + { + const efn = name; + } + + return searchInEnvPath(environment["PATH"], efn); +} + +/// environment variable path separator +version (Posix) + enum envPathSep = ':'; +else version (Windows) + enum envPathSep = ';'; +else + static assert(false); + +/// Search for filename in the envPath variable content which can +/// contain multiple paths separated with sep depending on platform. +/// Returns: null if the file can't be found. +string searchInEnvPath(in string envPath, in string filename, in char sep = envPathSep) +{ + import std.algorithm : splitter; + import std.file : exists; + import std.path : buildPath; + + foreach (dir; splitter(envPath, sep)) + { + const filePath = buildPath(dir, filename); + if (exists(filePath)) + return filePath; + } + return null; +} + /// Defines a path in a temporary location /// and delete the file or directory (recursively) at that path when going out of scope. struct DeleteMe @@ -83,7 +128,7 @@ struct DeleteMe } // used in place of DeleteMe if needed to inspect the file after the test -struct Path +struct DontDeleteMe { this(string basename, string ext) { @@ -287,8 +332,7 @@ unittest assert(getSize(dm.path) == len); } -/// Generate potentially large but repetitive data constituted of the same phrase repeated -/// over and over until byteSize is written out. +/// Generate potentially very large amount of binary random data until byteSize is written out auto generateRandomData(size_t byteSize, uint seed = unpredictableSeed(), size_t chunkSize = 8192) { auto eng = Random(seed); diff --git a/tools/stupid.d.in b/tools/stupid.d.in new file mode 100644 index 0000000..bdc389e --- /dev/null +++ b/tools/stupid.d.in @@ -0,0 +1,331 @@ +// dfmt off +/* + This is a modified version of silly for Dopamine that allows integration out of DUB. + This file actually is a template used by stupid_gen +*/ +/* + * Silly is a test runner for the D programming language + * + * Report bugs and propose new features in project's repository: https://gitlab.com/AntonMeep/silly + */ + +/* SPDX-License-Identifier: ISC */ +/* Copyright (c) 2018-2019, Anton Fediushin */ + +module stupid; + +version(unittest): + +// static if(!__traits(compiles, () {static import dub_test_root;})) { +// static assert(false, "Couldn't find 'dub_test_root'. Make sure you are running tests with `dub test`"); +// } else { +// static import dub_test_root; +// } + +import core.time : Duration, MonoTime; +import std.ascii : newline; +import std.meta : AliasSeq; +import std.stdio : stdout; + +// TESTED MODULES HERE + +version (NoStupidMain) +{ +} +else +{ + void main() {} +} + +shared static this() { + import core.runtime : Runtime, UnitTestResult; + import std.getopt : getopt; + import std.parallelism : TaskPool, totalCPUs; + + Runtime.extendedModuleUnitTester = () { + bool verbose; + shared ulong passed, failed; + uint threads; + string include, exclude; + + auto args = Runtime.args; + auto getoptResult = args.getopt( + "no-colours", + "Disable colours", + &noColours, + "t|threads", + "Number of worker threads. 0 to auto-detect (default)", + &threads, + "i|include", + "Run tests if their name matches specified regular expression", + &include, + "e|exclude", + "Skip tests if their name matches specified regular expression", + &exclude, + "v|verbose", + "Show verbose output (full stack traces, location and durations)", + &verbose, + ); + + if(getoptResult.helpWanted) { + import std.string : leftJustifier; + + stdout.writefln("Usage:%1$s\tdub test -- %1$s%1$sOptions:", newline); + + foreach(option; getoptResult.options) + stdout.writefln(" %s\t%s\t%s", option.optShort, option.optLong.leftJustifier(20), option.help); + + return UnitTestResult(0, 0, false, false); + } + + if(!threads) + threads = totalCPUs; + + Console.init; + + Test[] tests; + + // Test discovery + foreach(m; allModules) { + import std.traits : fullyQualifiedName; + static if(__traits(isModule, m)) { + alias module_ = m; + } else { + import std.meta : Alias; + // For cases when module contains member of the same name + alias module_ = Alias!(__traits(parent, m)); + } + + // Unittests in the module + foreach(test; __traits(getUnitTests, module_)) + tests ~= Test(fullyQualifiedName!test, getTestName!test, getTestLocation!test, &test); + + // Unittests in structs and classes + foreach(member; __traits(derivedMembers, module_)) + static if(__traits(compiles, __traits(getMember, module_, member)) && + __traits(compiles, __traits(isTemplate, __traits(getMember, module_, member))) && + !__traits(isTemplate, __traits(getMember, module_, member)) && + __traits(compiles, __traits(parent, __traits(getMember, module_, member))) && + __traits(isSame, __traits(parent, __traits(getMember, module_, member)), module_) && + __traits(compiles, __traits(getUnitTests, __traits(getMember, module_, member)))) + foreach(test; __traits(getUnitTests, __traits(getMember, module_, member))) + tests ~= Test(fullyQualifiedName!test, getTestName!test, getTestLocation!test, &test); + } + + auto started = MonoTime.currTime; + + with(new TaskPool(threads-1)) { + import core.atomic : atomicOp; + import std.regex : matchFirst; + + foreach(test; parallel(tests)) { + if((!include && !exclude) || + (include && !(test.fullName ~ " " ~ test.testName).matchFirst(include).empty) || + (exclude && (test.fullName ~ " " ~ test.testName).matchFirst(exclude).empty)) { + auto result = test.executeTest; + result.writeResult(verbose); + + atomicOp!"+="(result.succeed ? passed : failed, 1UL); + } + } + + finish(true); + } + + stdout.writeln; + stdout.writefln("%s: %s passed, %s failed in %d ms", + Console.emphasis("Summary"), + Console.colour(passed, Colour.ok), + Console.colour(failed, failed ? Colour.achtung : Colour.none), + (MonoTime.currTime - started).total!"msecs", + ); + + return UnitTestResult(passed + failed, passed, false, false); + }; +} + +void writeResult(TestResult result, in bool verbose) { + import std.format : formattedWrite; + import std.algorithm : canFind; + import std.range : drop; + import std.string : lastIndexOf, lineSplitter; + + auto writer = stdout.lockingTextWriter; + + writer.formattedWrite(" %s %s %s", + result.succeed + ? Console.colour("✓", Colour.ok) + : Console.colour("✗", Colour.achtung), + Console.emphasis(result.test.fullName[0..result.test.fullName.lastIndexOf('.')].truncateName(verbose)), + result.test.testName, + ); + + if(verbose) { + writer.formattedWrite(" (%.3f ms)", (cast(real) result.duration.total!"usecs") / 10.0f ^^ 3); + + if(result.test.location != TestLocation.init) { + writer.formattedWrite(" [%s:%d:%d]", + result.test.location.file, + result.test.location.line, + result.test.location.column); + } + } + + writer.put(newline); + + foreach(th; result.thrown) { + writer.formattedWrite(" %s thrown from %s on line %d: %s%s", + th.type, + th.file, + th.line, + th.message.lineSplitter.front, + newline, + ); + foreach(line; th.message.lineSplitter.drop(1)) + writer.formattedWrite(" %s%s", line, newline); + + writer.formattedWrite(" --- Stack trace ---%s", newline); + if(verbose) { + foreach(line; th.info) + writer.formattedWrite(" %s%s", line, newline); + } else { + for(size_t i = 0; i < th.info.length && !th.info[i].canFind(__FILE__); ++i) + writer.formattedWrite(" %s%s", th.info[i], newline); + } + } +} + +TestResult executeTest(Test test) { + import core.exception : AssertError, OutOfMemoryError; + auto ret = TestResult(test); + auto started = MonoTime.currTime; + + try { + scope(exit) ret.duration = MonoTime.currTime - started; + test.ptr(); + ret.succeed = true; + } catch(Throwable t) { + if(!(cast(Exception) t || cast(AssertError) t)) + throw t; + + foreach(th; t) { + immutable(string)[] trace; + try { + foreach(i; th.info) + trace ~= i.idup; + } catch(OutOfMemoryError) { // TODO: Actually fix a bug instead of this workaround + trace ~= " Failed to get stack trace, see https://gitlab.com/AntonMeep/silly/issues/31"; + } + + ret.thrown ~= Thrown(typeid(th).name, th.message.idup, th.file, th.line, trace); + } + } + + return ret; +} + +struct TestLocation { + string file; + size_t line, column; +} + +struct Test { + string fullName, + testName; + + TestLocation location; + + void function() ptr; +} + +struct TestResult { + Test test; + bool succeed; + Duration duration; + + immutable(Thrown)[] thrown; +} + +struct Thrown { + string type, + message, + file; + size_t line; + immutable(string)[] info; +} + +__gshared bool noColours; + +enum Colour { + none, + ok = 32, + achtung = 31, +} + +static struct Console { + static void init() { + if(noColours) { + return; + } else { + version(Posix) { + import core.sys.posix.unistd; + noColours = isatty(STDOUT_FILENO) == 0; + } else version(Windows) { + import core.sys.windows.winbase : GetStdHandle, STD_OUTPUT_HANDLE, INVALID_HANDLE_VALUE; + import core.sys.windows.wincon : SetConsoleOutputCP, GetConsoleMode, SetConsoleMode; + import core.sys.windows.windef : DWORD; + import core.sys.windows.winnls : CP_UTF8; + + SetConsoleOutputCP(CP_UTF8); + + auto hOut = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD originalMode; + + // TODO: 4 stands for ENABLE_VIRTUAL_TERMINAL_PROCESSING which should be + // in druntime v2.082.0 + noColours = hOut == INVALID_HANDLE_VALUE || + !GetConsoleMode(hOut, &originalMode) || + !SetConsoleMode(hOut, originalMode | 4); + } + } + } + + static string colour(T)(T t, Colour c = Colour.none) { + import std.conv : text; + + return noColours ? text(t) : text("\033[", cast(int) c, "m", t, "\033[m"); + } + + static string emphasis(string s) { + return noColours ? s : "\033[1m" ~ s ~ "\033[m"; + } +} + +string getTestName(alias test)() { + string name = __traits(identifier, test); + + foreach(attribute; __traits(getAttributes, test)) { + static if(is(typeof(attribute) : string)) { + name = attribute; + break; + } + } + + return name; +} + +string truncateName(string s, bool verbose = false) { + import std.algorithm : max; + import std.string : indexOf; + return s.length > 30 && !verbose + ? s[max(s.indexOf('.', s.length - 30), s.length - 30) .. $] + : s; +} + +TestLocation getTestLocation(alias test)() { + // test if compiler is new enough for getLocation (since 2.088.0) + static if(is(typeof(__traits(getLocation, test)))) + return TestLocation(__traits(getLocation, test)); + else + return TestLocation.init; +} diff --git a/tools/stupid_gen.d b/tools/stupid_gen.d new file mode 100644 index 0000000..b8cb467 --- /dev/null +++ b/tools/stupid_gen.d @@ -0,0 +1,99 @@ +/// Discover unittests and generate stupid test driver +module tools.stupid_gen; + +import std.algorithm; +import std.array; +import std.getopt; +import std.file; +import std.path; +import std.stdio; +import std.string; + +/// return module name of the D file at filename +/// only if it contains "unittest" +string getUnittestMod(string filename) +{ + string mod; + auto file = File(filename, "r"); + foreach (l; file.byLine.map!(l => l.strip)) + { + // reasonable assumption about how module is defined + if (!mod && l.startsWith("module ") && l.endsWith(";")) + { + mod = l["module ".length .. $ - 1].strip().idup; + continue; + } + if (mod && l.canFind("unittest")) + { + return mod; + } + } + return null; +} + +int main(string[] args) +{ + string root = "."; + string[] exclusions; + + auto helpInfo = getopt(args, "root", &root, "exclude", &exclusions); + if (helpInfo.helpWanted) + { + defaultGetoptPrinter("Generate stupid test driver.", helpInfo.options); + return 0; + } + + string[] mods; + + string[] dFiles = args[1 .. $]; + if (args.length == 0) + { + dFiles = dirEntries(root, SpanMode.depth).filter!(f => f.name.endsWith(".d")) + .map!(e => e.name) + .array; + } + + outer: foreach (f; dFiles) + { + foreach (ex; exclusions) + { + if (f.canFind(ex)) + continue outer; + } + + const m = getUnittestMod(f); + if (m) + { + mods ~= m; + } + } + + mods = mods.sort().uniq().array; + + const tmplate = import("stupid.d.in"); + + foreach (inl; lineSplitter(tmplate)) + { + if (!inl.startsWith("// TESTED MODULES HERE")) + { + writeln(inl); + continue; + } + + foreach (m; mods) + { + writefln("import %s;", m); + } + writefln(""); + writefln("alias allModules = AliasSeq!("); + foreach (m; mods) + { + writefln(" %s,", m); + } + writefln(");"); + + } + writefln(""); + + return 0; +}