From 49e3c6614f8863323dfcbbeeb8fdf13af87c1e76 Mon Sep 17 00:00:00 2001 From: Roberto Manzella Date: Mon, 23 Apr 2018 13:42:01 +0100 Subject: [PATCH] Add exclude option --- index.js | 1 + package.json | 1 + src/puppeteer_utils.js | 6 +++++- yarn.lock | 5 +++++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index c0d9f91a..ab01addd 100644 --- a/index.js +++ b/index.js @@ -19,6 +19,7 @@ const defaultOptions = { destination: null, concurrency: 4, include: ["/"], + exclude: [], userAgent: "ReactSnap", // 4 params below will be refactored to one: `puppeteer: {}` // https://github.com/stereobooster/react-snap/issues/120 diff --git a/package.json b/package.json index 8358c99f..69ed4068 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "clean-css": "4.2.1", "express": "4.17.1", "express-history-api-fallback": "2.2.1", + "glob-to-regexp": "^0.4.1", "highland": "2.13.4", "html-minifier": "4.0.0", "minimalcss": "0.8.2", diff --git a/src/puppeteer_utils.js b/src/puppeteer_utils.js index 820cded0..ae218a68 100644 --- a/src/puppeteer_utils.js +++ b/src/puppeteer_utils.js @@ -1,6 +1,8 @@ const puppeteer = require("puppeteer"); const _ = require("highland"); const url = require("url"); +const glob = require("glob-to-regexp"); +// @ts-ignore const mapStackTrace = require("sourcemapped-stacktrace-node").default; const path = require("path"); const fs = require("fs"); @@ -140,6 +142,7 @@ const crawl = async opt => { publicPath, sourceDir } = opt; + const exclude = options.exclude.map(g => glob(g, { extended: true, globstar: true})); let shuttingDown = false; let streamClosed = false; @@ -173,7 +176,7 @@ const crawl = async opt => { * @returns {void} */ const addToQueue = newUrl => { - const { hostname, search, hash, port } = url.parse(newUrl); + const { hostname, search, hash, port, pathname } = url.parse(newUrl); newUrl = newUrl.replace(`${search || ""}${hash || ""}`, ""); // Ensures that only link on the same port are crawled @@ -184,6 +187,7 @@ const crawl = async opt => { // Port can be null, therefore we need the null check const isOnAppPort = port && port.toString() === options.port.toString(); + if (exclude.filter(regex => regex.test(pathname)).length > 0) return; if (hostname === "localhost" && isOnAppPort && !uniqueUrls.has(newUrl) && !streamClosed) { uniqueUrls.add(newUrl); enqued++; diff --git a/yarn.lock b/yarn.lock index 5342afe6..70d2c119 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1453,6 +1453,11 @@ getpass@^0.1.1: dependencies: assert-plus "^1.0.0" +glob-to-regexp@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz#c75297087c851b9a578bd217dd59a92f59fe546e" + integrity sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw== + glob@^7.0.5, glob@^7.1.1, glob@^7.1.2: version "7.1.2" resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.2.tgz#c19c9df9a028702d678612384a6552404c636d15"