From de44cfbca26c330b113a6cbf1b6920e10f0ee731 Mon Sep 17 00:00:00 2001 From: Chris Malloy Date: Mon, 17 Jul 2023 06:23:28 -0300 Subject: [PATCH] Cache failed scrapes --- src/main/java/jasper/component/WebScraper.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/jasper/component/WebScraper.java b/src/main/java/jasper/component/WebScraper.java index aaf5573e..ac6b342c 100644 --- a/src/main/java/jasper/component/WebScraper.java +++ b/src/main/java/jasper/component/WebScraper.java @@ -668,13 +668,16 @@ public void scrape(String url) { public void scrapeAsync(String url) { if (isBlank(url)) return; if (exists(url)) return; + var web = new Web(); + web.setUrl(url); + webRepository.save(web); scrapeLater.add(url); } @Scheduled(fixedDelay = 300) public void drainAsyncScrape() { scrapeLater.drainTo(scraping); - for (var url : scraping) scrape(url); + for (var url : scraping) fetch(url); scraping.clear(); } @@ -682,7 +685,7 @@ public void drainAsyncScrape() { public Web fetch(String url) { url = fixUrl(url); var maybeWeb = webRepository.findById(url); - if (maybeWeb.isPresent() && maybeWeb.get().getData() != null) return maybeWeb.get(); + if (maybeWeb.isPresent()) return maybeWeb.get(); List scrapeMore = List.of(); try { var web = doScrape(url);