diff --git a/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java b/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java index d05fd59f7..45ff6d790 100644 --- a/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java +++ b/src/main/java/org/codelibs/fess/app/service/FailureUrlService.java @@ -128,9 +128,9 @@ public void deleteByConfigId(final String configId) { }); } - public void store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) { + public FailureUrl store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) { if (e instanceof ContainerNotAvailableException) { - return; + return null; } final FailureUrlBhv bhv = ComponentUtil.getComponent(FailureUrlBhv.class); @@ -160,6 +160,7 @@ public void store(final CrawlingConfig crawlingConfig, final String errorName, f bhv.insertOrUpdate(failureUrl, op -> { op.setRefreshPolicy(Constants.TRUE); }); + return failureUrl; } private String getStackTrace(final Throwable t) { diff --git a/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java b/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java index 5f61c919f..d0ef0a190 100644 --- a/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java +++ b/src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java @@ -20,10 +20,12 @@ import org.codelibs.fess.app.service.FailureUrlService; import org.codelibs.fess.crawler.CrawlerContext; import org.codelibs.fess.crawler.entity.UrlQueue; +import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException; import org.codelibs.fess.crawler.helper.impl.LogHelperImpl; import org.codelibs.fess.crawler.log.LogType; import org.codelibs.fess.es.config.exentity.CrawlingConfig; +import org.codelibs.fess.es.config.exentity.FailureUrl; import org.codelibs.fess.exception.ContainerNotAvailableException; import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction; import org.codelibs.fess.util.ComponentUtil; @@ -84,25 +86,30 @@ protected void processFinishedCrawling(final Object... objs) { @Override protected void processCrawlingAccessException(final Object... objs) { + String failureUrlId = "?"; + final CrawlerContext crawlerContext = (CrawlerContext) objs[0]; + final UrlQueue urlQueue = (UrlQueue) objs[1]; + final CrawlingAccessException cae = (CrawlingAccessException) objs[2]; try { - final CrawlerContext crawlerContext = (CrawlerContext) objs[0]; - final UrlQueue urlQueue = (UrlQueue) objs[1]; - Throwable e = (Throwable) objs[2]; - if (e instanceof MultipleCrawlingAccessException) { - final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses(); + Throwable t = cae; + if (t instanceof MultipleCrawlingAccessException mcae) { + final Throwable[] causes = mcae.getCauses(); if (causes.length > 0) { - e = causes[causes.length - 1]; + t = causes[causes.length - 1]; } } String errorName; - final Throwable cause = e.getCause(); + final Throwable cause = t.getCause(); if (cause != null) { errorName = cause.getClass().getCanonicalName(); } else { - errorName = e.getClass().getCanonicalName(); + errorName = t.getClass().getCanonicalName(); + } + FailureUrl failureUrl = storeFailureUrl(crawlerContext, urlQueue, errorName, t); + if (failureUrl != null) { + failureUrlId = failureUrl.getId(); } - storeFailureUrl(crawlerContext, urlQueue, errorName, e); } catch (final ContainerNotAvailableException e) { if (logger.isDebugEnabled()) { logger.debug("container was destroyed."); @@ -118,10 +125,17 @@ protected void processCrawlingAccessException(final Object... objs) { logger.warn("Failed to store a failure url.", e); } - super.processCrawlingAccessException(objs); - if (objs.length > 1 && objs[1] instanceof final UrlQueue urlQueue) { - ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION); + if (cae.isDebugEnabled()) { + logger.debug("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae); + } else if (cae.isInfoEnabled()) { + logger.info("[{}] {}", failureUrlId, cae.getMessage()); + } else if (cae.isWarnEnabled()) { + logger.warn("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae); + } else if (cae.isErrorEnabled()) { + logger.error("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae); } + + ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION); } @Override @@ -153,14 +167,14 @@ protected void processCrawlingException(final Object... objs) { } } - protected void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue urlQueue, final String errorName, + protected FailureUrl storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue urlQueue, final String errorName, final Throwable e) { final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId()); final String url = urlQueue.getUrl(); final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class); - failureUrlService.store(crawlingConfig, errorName, url, e); + return failureUrlService.store(crawlingConfig, errorName, url, e); } protected CrawlingConfig getCrawlingConfig(final String sessionCountId) {