Skip to content

Commit

Permalink
Avoid parsing RSS entry that already exists
Browse files Browse the repository at this point in the history
  • Loading branch information
cjmalloy committed Jul 17, 2023
1 parent c67e367 commit 88702f0
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/main/java/jasper/component/RssParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ public void scrape(Ref feed) throws IOException, FeedException {
Ref ref;
try {
ref = parseEntry(feed, config, entry, feedImage);
} catch (AlreadyExistsException e) {
logger.debug("Skipping RSS entry in feed {} which already exists. {} {}",
feed.getTitle(), entry.getTitle(), entry.getLink());
continue;
} catch (Exception e) {
logger.error("Error processing entry", e);
continue;
Expand Down Expand Up @@ -159,6 +163,9 @@ private Ref parseEntry(Ref feed, Feed config, SyndEntry entry, Map<String, Objec
if (config.isStripQuery() && l.contains("?")) {
l = l.substring(0, l.indexOf("?"));
}
if (refRepository.existsByUrlAndOrigin(l, feed.getOrigin())) {
throw new AlreadyExistsException();
}
try {
var web = webScraper.web(l);
if (web != null && config.isScrapeWebpage()) {
Expand Down

0 comments on commit 88702f0

Please sign in to comment.