From bbea1f63857e75082745c2b249857d35052acea2 Mon Sep 17 00:00:00 2001 From: Matteo Cargnelutti Date: Fri, 22 Sep 2023 17:39:49 -0400 Subject: [PATCH] Add `Sccop.targetUrlResolved` As discussed w/ @rebeccacremona: This new property helps keep track of redirects. It is updated during the non-web content detection phase of the capture process (`Scoop.#detectAndCaptureNonWebContent()`) and appears in `Scoop.summary()` / `--json-summary-output`. --- Scoop.js | 10 ++++++++++ Scoop.test.js | 1 + 2 files changed, 11 insertions(+) diff --git a/Scoop.js b/Scoop.js index 5603626b..7fa20879 100644 --- a/Scoop.js +++ b/Scoop.js @@ -70,6 +70,13 @@ export class Scoop { */ url = '' + /** + * URL to capture, resolved to account for redirects. + * Populated during non-web content detection step. + * @type {string} + */ + targetUrlResolved = '' + /** * Is the target url a web page? * Assumed `true` until detected otherwise. @@ -186,6 +193,7 @@ export class Scoop { this.options = filterOptions(options) this.blocklist = this.options.blocklist.map(castBlocklistMatcher) this.url = this.filterUrl(url) + this.targetUrlResolved = this.url // Logging setup (level, output formatting) logPrefix.reg(this.log) @@ -675,6 +683,7 @@ export class Scoop { headRequestTimeMs = after - before + this.targetUrlResolved = headRequest.url contentType = headRequest.headers.get('Content-Type') contentLength = headRequest.headers.get('Content-Length') } catch (err) { @@ -1413,6 +1422,7 @@ export class Scoop { state: this.state, states: Object.keys(Scoop.states), // So summary.states[summary.state] = 'NAME-OF-STATE' targetUrl: this.url, + targetUrlResolved: this.targetUrlResolved, targetUrlIsWebPage: this.targetUrlIsWebPage, targetUrlContentType: this.targetUrlContentType, startedAt: this.startedAt, diff --git a/Scoop.test.js b/Scoop.test.js index 06b10893..b075112c 100644 --- a/Scoop.test.js +++ b/Scoop.test.js @@ -93,6 +93,7 @@ await test('Scoop - capture of a web page.', async (t) => { const summary = await capture.summary() assert(summary) assert.equal(summary.targetUrl, capture.url) + assert.equal(summary.targetUrlResolved, capture.targetUrlResolved) assert.equal(summary.targetUrlContentType, 'text/html; charset=UTF-8') assert.equal(summary.state, Scoop.states.COMPLETE) assert.equal(summary.exchangeUrls.length, capture.exchanges.length)