Skip to content

Commit

Permalink
fix #2823 Log Failure URL ID in CrawlingAccessException in fess.log
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed Jun 24, 2024
1 parent a26e133 commit a26cf43
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ public void deleteByConfigId(final String configId) {
});
}

public void store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) {
public FailureUrl store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) {
if (e instanceof ContainerNotAvailableException) {
return;
return null;
}

final FailureUrlBhv bhv = ComponentUtil.getComponent(FailureUrlBhv.class);
Expand Down Expand Up @@ -160,6 +160,7 @@ public void store(final CrawlingConfig crawlingConfig, final String errorName, f
bhv.insertOrUpdate(failureUrl, op -> {
op.setRefreshPolicy(Constants.TRUE);
});
return failureUrl;
}

private String getStackTrace(final Throwable t) {
Expand Down
42 changes: 28 additions & 14 deletions src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.entity.UrlQueue;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.crawler.helper.impl.LogHelperImpl;
import org.codelibs.fess.crawler.log.LogType;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.FailureUrl;
import org.codelibs.fess.exception.ContainerNotAvailableException;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
import org.codelibs.fess.util.ComponentUtil;
Expand Down Expand Up @@ -84,25 +86,30 @@ protected void processFinishedCrawling(final Object... objs) {

@Override
protected void processCrawlingAccessException(final Object... objs) {
String failureUrlId = "?";
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
final CrawlingAccessException cae = (CrawlingAccessException) objs[2];
try {
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
Throwable e = (Throwable) objs[2];
if (e instanceof MultipleCrawlingAccessException) {
final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
Throwable t = cae;
if (t instanceof MultipleCrawlingAccessException mcae) {
final Throwable[] causes = mcae.getCauses();
if (causes.length > 0) {
e = causes[causes.length - 1];
t = causes[causes.length - 1];
}
}

String errorName;
final Throwable cause = e.getCause();
final Throwable cause = t.getCause();
if (cause != null) {
errorName = cause.getClass().getCanonicalName();
} else {
errorName = e.getClass().getCanonicalName();
errorName = t.getClass().getCanonicalName();
}
FailureUrl failureUrl = storeFailureUrl(crawlerContext, urlQueue, errorName, t);
if (failureUrl != null) {
failureUrlId = failureUrl.getId();
}
storeFailureUrl(crawlerContext, urlQueue, errorName, e);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.debug("container was destroyed.");
Expand All @@ -118,10 +125,17 @@ protected void processCrawlingAccessException(final Object... objs) {
logger.warn("Failed to store a failure url.", e);
}

super.processCrawlingAccessException(objs);
if (objs.length > 1 && objs[1] instanceof final UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION);
if (cae.isDebugEnabled()) {
logger.debug("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
} else if (cae.isInfoEnabled()) {
logger.info("[{}] {}", failureUrlId, cae.getMessage());
} else if (cae.isWarnEnabled()) {
logger.warn("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
} else if (cae.isErrorEnabled()) {
logger.error("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
}

ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION);
}

@Override
Expand Down Expand Up @@ -153,14 +167,14 @@ protected void processCrawlingException(final Object... objs) {
}
}

protected void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
protected FailureUrl storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
final Throwable e) {

final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId());
final String url = urlQueue.getUrl();

final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
failureUrlService.store(crawlingConfig, errorName, url, e);
return failureUrlService.store(crawlingConfig, errorName, url, e);
}

protected CrawlingConfig getCrawlingConfig(final String sessionCountId) {
Expand Down

0 comments on commit a26cf43

Please sign in to comment.