From 1f602c089b54ad543c8ad89add716994bbdecfa3 Mon Sep 17 00:00:00 2001 From: Doug Mayer Date: Wed, 15 Jan 2025 08:48:35 -0600 Subject: [PATCH 1/2] Add a contentsByteOffset to the header emitted on entry events. When emitting the header for an entry event, the contentsByteOffset property is added to the header object. This property is the byte offset of the start of the entry's file contents in the tar stream, after the header. This is useful when building an index of a tar's contents, for later seeking to a specific entry's contents in the tar stream. Headers can be variable length based on the tar implementation. --- extract.js | 3 ++- test/extract.js | 48 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/extract.js b/extract.js index 0ed9f82..e301070 100644 --- a/extract.js +++ b/extract.js @@ -169,8 +169,9 @@ class Extract extends Writable { this._stream = this._createStream() this._missing = this._header.size + const header = { ...this._header, contentsByteOffset: this._buffer.shifted } - this.emit('entry', this._header, this._stream, this._unlockBound) + this.emit('entry', header, this._stream, this._unlockBound) return true } diff --git a/test/extract.js b/test/extract.js index 26d20e7..9fddfc9 100644 --- a/test/extract.js +++ b/test/extract.js @@ -24,7 +24,8 @@ test('one-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) stream.pipe(concat(function (data) { @@ -61,7 +62,8 @@ test('chunked-one-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) stream.pipe(concat(function (data) { @@ -111,7 +113,8 @@ test('multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) extract.on('entry', onfile2) @@ -135,7 +138,8 @@ test('multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 1536 }) stream.pipe(concat(function (data) { @@ -178,7 +182,8 @@ test('chunked-multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) extract.on('entry', onfile2) @@ -202,7 +207,8 @@ test('chunked-multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 1536 }) stream.pipe(concat(function (data) { @@ -233,7 +239,8 @@ test('pax', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: { path: 'pax.txt', special: 'sauce' } + pax: { path: 'pax.txt', special: 'sauce' }, + contentsByteOffset: 1536 }) stream.pipe(concat(function (data) { @@ -337,7 +344,8 @@ test('long-name', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) stream.pipe(concat(function (data) { @@ -374,7 +382,8 @@ test('unicode-bsd', function (t) { // can unpack a bsdtar unicoded tarball gname: 'staff', devmajor: 0, devminor: 0, - pax: { 'SCHILY.dev': '16777217', 'SCHILY.ino': '3599143', 'SCHILY.nlink': '1', atime: '1387589077', ctime: '1387588646', path: 'høllø.txt' } + pax: { 'SCHILY.dev': '16777217', 'SCHILY.ino': '3599143', 'SCHILY.nlink': '1', atime: '1387589077', ctime: '1387588646', path: 'høllø.txt' }, + contentsByteOffset: 1536 }) stream.pipe(concat(function (data) { @@ -411,7 +420,8 @@ test('unicode', function (t) { // can unpack a bsdtar unicoded tarball gname: 'staff', devmajor: 0, devminor: 0, - pax: { path: 'høstål.txt' } + pax: { path: 'høstål.txt' }, + contentsByteOffset: 1536 }) stream.pipe(concat(function (data) { @@ -528,7 +538,8 @@ test('base 256 size', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) cb() }) @@ -561,7 +572,8 @@ test('latin-1', function (t) { // can unpack filenames encoded in latin-1 gname: 'root', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) stream.pipe(concat(function (data) { @@ -618,7 +630,8 @@ test('gnu', function (t) { // can correctly unpack gnu-tar format gname: 'mygroup', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) stream.pipe(concat(function (data) { @@ -659,7 +672,8 @@ test('gnu-incremental', function (t) { gname: 'mygroup', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) stream.pipe(concat(function (data) { @@ -730,7 +744,8 @@ test('unknown format attempts to extract if allowed', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 512 }) extract.on('entry', onfile2) @@ -754,7 +769,8 @@ test('unknown format attempts to extract if allowed', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null + pax: null, + contentsByteOffset: 1536 }) stream.pipe(concat(function (data) { From 00659645754049cf134d75dbecaddb4c3753f746 Mon Sep 17 00:00:00 2001 From: Doug Mayer Date: Wed, 15 Jan 2025 09:08:57 -0600 Subject: [PATCH 2/2] Rename contentsByteOffset to byteOffset and include in all headers. --- extract.js | 5 ++-- headers.js | 1 + test/extract.js | 66 +++++++++++++++++++++++++------------------------ 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/extract.js b/extract.js index e301070..6bed3d5 100644 --- a/extract.js +++ b/extract.js @@ -149,6 +149,8 @@ class Extract extends Writable { if (!this._header) return true + this._header.byteOffset = this._buffer.shifted + switch (this._header.type) { case 'gnu-long-path': case 'gnu-long-link-path': @@ -169,9 +171,8 @@ class Extract extends Writable { this._stream = this._createStream() this._missing = this._header.size - const header = { ...this._header, contentsByteOffset: this._buffer.shifted } - this.emit('entry', header, this._stream, this._unlockBound) + this.emit('entry', this._header, this._stream, this._unlockBound) return true } diff --git a/headers.js b/headers.js index aa1a6c4..c6ecf96 100644 --- a/headers.js +++ b/headers.js @@ -137,6 +137,7 @@ exports.decode = function decode (buf, filenameEncoding, allowUnknownFormat) { uid, gid, size, + byteOffset: 0, mtime: new Date(1000 * mtime), type, linkname, diff --git a/test/extract.js b/test/extract.js index 9fddfc9..8fc34d2 100644 --- a/test/extract.js +++ b/test/extract.js @@ -17,6 +17,7 @@ test('one-file', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -24,8 +25,7 @@ test('one-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) stream.pipe(concat(function (data) { @@ -55,6 +55,7 @@ test('chunked-one-file', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -62,8 +63,7 @@ test('chunked-one-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) stream.pipe(concat(function (data) { @@ -106,6 +106,7 @@ test('multi-file', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -113,8 +114,7 @@ test('multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) extract.on('entry', onfile2) @@ -131,6 +131,7 @@ test('multi-file', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 1536, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -138,8 +139,7 @@ test('multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 1536 + pax: null }) stream.pipe(concat(function (data) { @@ -175,6 +175,7 @@ test('chunked-multi-file', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -182,8 +183,7 @@ test('chunked-multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) extract.on('entry', onfile2) @@ -200,6 +200,7 @@ test('chunked-multi-file', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 1536, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -207,8 +208,7 @@ test('chunked-multi-file', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 1536 + pax: null }) stream.pipe(concat(function (data) { @@ -232,6 +232,7 @@ test('pax', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 1536, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -239,8 +240,7 @@ test('pax', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: { path: 'pax.txt', special: 'sauce' }, - contentsByteOffset: 1536 + pax: { path: 'pax.txt', special: 'sauce' } }) stream.pipe(concat(function (data) { @@ -278,6 +278,7 @@ test('types', function (t) { uid: 501, gid: 20, size: 0, + byteOffset: 512, mtime: new Date(1387580181000), type: 'directory', linkname: null, @@ -304,6 +305,7 @@ test('types', function (t) { uid: 501, gid: 20, size: 0, + byteOffset: 1024, mtime: new Date(1387580181000), type: 'symlink', linkname: 'directory', @@ -337,6 +339,7 @@ test('long-name', function (t) { uid: 501, gid: 20, size: 16, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -344,8 +347,7 @@ test('long-name', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) stream.pipe(concat(function (data) { @@ -375,6 +377,7 @@ test('unicode-bsd', function (t) { // can unpack a bsdtar unicoded tarball uid: 501, gid: 20, size: 4, + byteOffset: 1536, mtime: new Date(1387588646000), type: 'file', linkname: null, @@ -382,8 +385,7 @@ test('unicode-bsd', function (t) { // can unpack a bsdtar unicoded tarball gname: 'staff', devmajor: 0, devminor: 0, - pax: { 'SCHILY.dev': '16777217', 'SCHILY.ino': '3599143', 'SCHILY.nlink': '1', atime: '1387589077', ctime: '1387588646', path: 'høllø.txt' }, - contentsByteOffset: 1536 + pax: { 'SCHILY.dev': '16777217', 'SCHILY.ino': '3599143', 'SCHILY.nlink': '1', atime: '1387589077', ctime: '1387588646', path: 'høllø.txt' } }) stream.pipe(concat(function (data) { @@ -413,6 +415,7 @@ test('unicode', function (t) { // can unpack a bsdtar unicoded tarball uid: 501, gid: 20, size: 8, + byteOffset: 1536, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -420,8 +423,7 @@ test('unicode', function (t) { // can unpack a bsdtar unicoded tarball gname: 'staff', devmajor: 0, devminor: 0, - pax: { path: 'høstål.txt' }, - contentsByteOffset: 1536 + pax: { path: 'høstål.txt' } }) stream.pipe(concat(function (data) { @@ -531,6 +533,7 @@ test('base 256 size', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -538,8 +541,7 @@ test('base 256 size', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) cb() }) @@ -565,6 +567,7 @@ test('latin-1', function (t) { // can unpack filenames encoded in latin-1 uid: 0, gid: 0, size: 14, + byteOffset: 512, mtime: new Date(1495941034000), type: 'file', linkname: null, @@ -572,8 +575,7 @@ test('latin-1', function (t) { // can unpack filenames encoded in latin-1 gname: 'root', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) stream.pipe(concat(function (data) { @@ -623,6 +625,7 @@ test('gnu', function (t) { // can correctly unpack gnu-tar format uid: 12345, gid: 67890, size: 14, + byteOffset: 512, mtime: new Date(1559239869000), type: 'file', linkname: null, @@ -630,8 +633,7 @@ test('gnu', function (t) { // can correctly unpack gnu-tar format gname: 'mygroup', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) stream.pipe(concat(function (data) { @@ -665,6 +667,7 @@ test('gnu-incremental', function (t) { uid: 12345, gid: 67890, size: 14, + byteOffset: 512, mtime: new Date(1559239869000), type: 'file', linkname: null, @@ -672,8 +675,7 @@ test('gnu-incremental', function (t) { gname: 'mygroup', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) stream.pipe(concat(function (data) { @@ -737,6 +739,7 @@ test('unknown format attempts to extract if allowed', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 512, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -744,8 +747,7 @@ test('unknown format attempts to extract if allowed', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 512 + pax: null }) extract.on('entry', onfile2) @@ -762,6 +764,7 @@ test('unknown format attempts to extract if allowed', function (t) { uid: 501, gid: 20, size: 12, + byteOffset: 1536, mtime: new Date(1387580181000), type: 'file', linkname: null, @@ -769,8 +772,7 @@ test('unknown format attempts to extract if allowed', function (t) { gname: 'staff', devmajor: 0, devminor: 0, - pax: null, - contentsByteOffset: 1536 + pax: null }) stream.pipe(concat(function (data) {