Skip to content

Commit

Permalink
Cache failed scrapes
Browse files Browse the repository at this point in the history
  • Loading branch information
cjmalloy committed Jul 17, 2023
1 parent 6425e58 commit de44cfb
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions src/main/java/jasper/component/WebScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -668,21 +668,24 @@ public void scrape(String url) {
public void scrapeAsync(String url) {
if (isBlank(url)) return;
if (exists(url)) return;
var web = new Web();
web.setUrl(url);
webRepository.save(web);
scrapeLater.add(url);
}

@Scheduled(fixedDelay = 300)
public void drainAsyncScrape() {
scrapeLater.drainTo(scraping);
for (var url : scraping) scrape(url);
for (var url : scraping) fetch(url);
scraping.clear();
}

@Timed(value = "jasper.webscrape")
public Web fetch(String url) {
url = fixUrl(url);
var maybeWeb = webRepository.findById(url);
if (maybeWeb.isPresent() && maybeWeb.get().getData() != null) return maybeWeb.get();
if (maybeWeb.isPresent()) return maybeWeb.get();
List<String> scrapeMore = List.of();
try {
var web = doScrape(url);
Expand Down

0 comments on commit de44cfb

Please sign in to comment.