Skip to content

Commit

Permalink
#2857 replace with fess-crawler-opensearch
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed Nov 7, 2024
1 parent 8ca1138 commit 9d3f26a
Show file tree
Hide file tree
Showing 26 changed files with 106 additions and 105 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,7 @@
</dependency>
<dependency>
<groupId>org.codelibs.fess</groupId>
<artifactId>fess-crawler-es</artifactId>
<artifactId>fess-crawler-opensearch</artifactId>
<version>${crawler.version}</version>
</dependency>
<dependency>
Expand Down
20 changes: 10 additions & 10 deletions src/main/java/org/codelibs/fess/app/web/admin/AdminAction.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.codelibs.fess.app.web.admin.dict.AdminDictAction;
import org.codelibs.fess.app.web.admin.duplicatehost.AdminDuplicatehostAction;
import org.codelibs.fess.app.web.admin.elevateword.AdminElevatewordAction;
import org.codelibs.fess.app.web.admin.esreq.AdminEsreqAction;
import org.codelibs.fess.app.web.admin.failureurl.AdminFailureurlAction;
import org.codelibs.fess.app.web.admin.fileauth.AdminFileauthAction;
import org.codelibs.fess.app.web.admin.fileconfig.AdminFileconfigAction;
Expand All @@ -46,6 +45,7 @@
import org.codelibs.fess.app.web.admin.role.AdminRoleAction;
import org.codelibs.fess.app.web.admin.scheduler.AdminSchedulerAction;
import org.codelibs.fess.app.web.admin.searchlog.AdminSearchlogAction;
import org.codelibs.fess.app.web.admin.sereq.AdminSereqAction;
import org.codelibs.fess.app.web.admin.storage.AdminStorageAction;
import org.codelibs.fess.app.web.admin.suggest.AdminSuggestAction;
import org.codelibs.fess.app.web.admin.systeminfo.AdminSysteminfoAction;
Expand Down Expand Up @@ -142,8 +142,8 @@ protected String getActionRole() {
AdminBackupAction.ROLE + VIEW, //
AdminMaintenanceAction.ROLE, //
AdminMaintenanceAction.ROLE + VIEW, //
AdminEsreqAction.ROLE, //
AdminEsreqAction.ROLE + VIEW, //
AdminSereqAction.ROLE, //
AdminSereqAction.ROLE + VIEW, //
AdminDashboardAction.ROLE, //
AdminDashboardAction.ROLE + VIEW, //
AdminWizardAction.ROLE, //
Expand Down Expand Up @@ -216,8 +216,8 @@ protected String getActionRole() {
AdminBackupAction.ROLE + VIEW, //
AdminMaintenanceAction.ROLE, //
AdminMaintenanceAction.ROLE + VIEW, //
AdminEsreqAction.ROLE, //
AdminEsreqAction.ROLE + VIEW, //
AdminSereqAction.ROLE, //
AdminSereqAction.ROLE + VIEW, //
AdminDashboardAction.ROLE, //
AdminDashboardAction.ROLE + VIEW, //
AdminWizardAction.ROLE, //
Expand Down Expand Up @@ -290,8 +290,8 @@ protected String getActionRole() {
AdminBackupAction.ROLE + VIEW, //
AdminMaintenanceAction.ROLE, //
AdminMaintenanceAction.ROLE + VIEW, //
AdminEsreqAction.ROLE, //
AdminEsreqAction.ROLE + VIEW,//
AdminSereqAction.ROLE, //
AdminSereqAction.ROLE + VIEW,//

})
public HtmlResponse index() {
Expand Down Expand Up @@ -413,8 +413,8 @@ public static Class<? extends FessAdminAction> getAdminActionClass(final FessUse
if (user.hasRoles(getActionRoles(AdminMaintenanceAction.ROLE))) {
return AdminMaintenanceAction.class;
}
if (user.hasRoles(getActionRoles(AdminEsreqAction.ROLE))) {
return AdminEsreqAction.class;
if (user.hasRoles(getActionRoles(AdminSereqAction.ROLE))) {
return AdminSereqAction.class;
}
return null;
}
Expand All @@ -423,4 +423,4 @@ private static String[] getActionRoles(final String role) {
return new String[] { role, role + VIEW };
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.app.web.admin.esreq;
package org.codelibs.fess.app.web.admin.sereq;

import java.io.BufferedReader;
import java.io.File;
Expand Down Expand Up @@ -42,16 +42,16 @@
/**
* @author shinsuke
*/
public class AdminEsreqAction extends FessAdminAction {
public class AdminSereqAction extends FessAdminAction {

public static final String ROLE = "admin-esreq";
public static final String ROLE = "admin-sereq";

private static final Logger logger = LogManager.getLogger(AdminEsreqAction.class);
private static final Logger logger = LogManager.getLogger(AdminSereqAction.class);

@Override
protected void setupHtmlData(final ActionRuntime runtime) {
super.setupHtmlData(runtime);
runtime.registerData("helpLink", systemHelper.getHelpLink(fessConfig.getOnlineHelpNameEsreq()));
runtime.registerData("helpLink", systemHelper.getHelpLink(fessConfig.getOnlineHelpNameSereq()));
}

@Override
Expand Down Expand Up @@ -95,7 +95,7 @@ public ActionResponse upload(final UploadForm form) {
throwValidationError(messages -> messages.addErrorsInvalidHeaderForRequestFile(GLOBAL, msg), () -> asListHtml(this::saveToken));
} else {
try (final CurlResponse response = curlRequest.body(buf.toString()).execute()) {
final File tempFile = ComponentUtil.getSystemHelper().createTempFile("esreq_", ".json");
final File tempFile = ComponentUtil.getSystemHelper().createTempFile("sereq_", ".json");
try (final InputStream in = response.getContentAsStream()) {
CopyUtil.copy(in, tempFile);
} catch (final Exception e1) {
Expand Down Expand Up @@ -160,7 +160,7 @@ private HtmlResponse asListHtml(final Runnable runnable) {
if (runnable != null) {
runnable.run();
}
return asHtml(path_AdminEsreq_AdminEsreqJsp).useForm(UploadForm.class);
return asHtml(path_AdminSereq_AdminSereqJsp).useForm(UploadForm.class);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.app.web.admin.esreq;
package org.codelibs.fess.app.web.admin.sereq;

import org.lastaflute.web.ruts.multipart.MultipartFormFile;
import org.lastaflute.web.validation.Required;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.fess.crawler.entity.EsUrlQueue;
import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
import org.codelibs.fess.crawler.util.EsCrawlerConfig;
import org.codelibs.fess.crawler.entity.OpenSearchUrlQueue;
import org.codelibs.fess.crawler.service.impl.OpenSearchUrlQueueService;
import org.codelibs.fess.crawler.util.OpenSearchCrawlerConfig;
import org.codelibs.fess.helper.CrawlingConfigHelper;
import org.codelibs.fess.opensearch.config.exentity.CrawlingConfig;
import org.codelibs.fess.opensearch.config.exentity.CrawlingConfig.ConfigName;
Expand All @@ -33,21 +33,21 @@
import org.opensearch.search.sort.SortBuilders;
import org.opensearch.search.sort.SortOrder;

public class FessUrlQueueService extends EsUrlQueueService {
public class FessUrlQueueService extends OpenSearchUrlQueueService {
private static final Logger logger = LogManager.getLogger(FessUrlQueueService.class);

public FessUrlQueueService(final EsCrawlerConfig crawlerConfig) {
public FessUrlQueueService(final OpenSearchCrawlerConfig crawlerConfig) {
super(crawlerConfig);
}

@Override
protected List<EsUrlQueue> fetchUrlQueueList(final String sessionId) {
protected List<OpenSearchUrlQueue> fetchUrlQueueList(final String sessionId) {
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(sessionId);
final Map<String, String> configParams = crawlingConfig.getConfigParameterMap(ConfigName.CONFIG);
final String crawlOrder = configParams.getOrDefault(CrawlingConfig.Param.Config.CRAWL_ORDER, "sequential");
if ("random".equals(crawlOrder)) {
return getList(EsUrlQueue.class, sessionId,
return getList(OpenSearchUrlQueue.class, sessionId,
QueryBuilders.functionScoreQuery(QueryBuilders.matchAllQuery(),
new FunctionScoreQueryBuilder.FilterFunctionBuilder[] { new FunctionScoreQueryBuilder.FilterFunctionBuilder(
new RandomScoreFunctionBuilder().seed(sessionId.hashCode())) }),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import org.codelibs.fess.util.ComponentUtil;

public class FessCrawlerConfig extends EsCrawlerConfig {
public class FessCrawlerConfig extends OpenSearchCrawlerConfig {

@Override
public String getQueueIndex() {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/codelibs/fess/helper/PluginHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ protected boolean isExcludedName(final ArtifactType artifactType, final String n
|| "fess-crawler-db-h2".equals(name)//
|| "fess-crawler-db-mysql".equals(name)//
|| "fess-crawler-es".equals(name)//
|| "fess-crawler-opensearch".equals(name)//
|| "fess-crawler-lasta".equals(name)//
|| "fess-crawler-parent".equals(name)//
|| "fess-crawler-playwright".equals(name)//
Expand Down
16 changes: 8 additions & 8 deletions src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.CrawlerStatus;
import org.codelibs.fess.crawler.interval.FessIntervalController;
import org.codelibs.fess.crawler.service.impl.EsDataService;
import org.codelibs.fess.crawler.service.impl.EsUrlFilterService;
import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
import org.codelibs.fess.crawler.service.impl.OpenSearchDataService;
import org.codelibs.fess.crawler.service.impl.OpenSearchUrlFilterService;
import org.codelibs.fess.crawler.service.impl.OpenSearchUrlQueueService;
import org.codelibs.fess.indexer.IndexUpdater;
import org.codelibs.fess.opensearch.config.exbhv.BoostDocumentRuleBhv;
import org.codelibs.fess.opensearch.config.exentity.BoostDocumentRule;
Expand Down Expand Up @@ -142,7 +142,7 @@ protected void doCrawl(final String sessionId, final List<WebConfig> webConfigLi
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_ALL))) {
deleteCrawlData(sid);
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_URL_FILTERS))) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
final OpenSearchUrlFilterService urlFilterService = ComponentUtil.getComponent(OpenSearchUrlFilterService.class);
try {
urlFilterService.delete(sid);
} catch (final Exception e) {
Expand Down Expand Up @@ -268,7 +268,7 @@ protected void doCrawl(final String sessionId, final List<WebConfig> webConfigLi
if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_ALL))) {
deleteCrawlData(sid);
} else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Config.CLEANUP_URL_FILTERS))) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
final OpenSearchUrlFilterService urlFilterService = ComponentUtil.getComponent(OpenSearchUrlFilterService.class);
try {
urlFilterService.delete(sid);
} catch (final Exception e) {
Expand Down Expand Up @@ -479,9 +479,9 @@ protected List<BoostDocumentRule> getAvailableBoostDocumentRuleList() {
}

protected void deleteCrawlData(final String sid) {
final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
final OpenSearchUrlFilterService urlFilterService = ComponentUtil.getComponent(OpenSearchUrlFilterService.class);
final OpenSearchUrlQueueService urlQueueService = ComponentUtil.getComponent(OpenSearchUrlQueueService.class);
final OpenSearchDataService dataService = ComponentUtil.getComponent(OpenSearchDataService.class);

try {
// clear url filter
Expand Down
42 changes: 21 additions & 21 deletions src/main/java/org/codelibs/fess/indexer/IndexUpdater.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@
import org.codelibs.fess.crawler.Crawler;
import org.codelibs.fess.crawler.entity.AccessResult;
import org.codelibs.fess.crawler.entity.AccessResultData;
import org.codelibs.fess.crawler.entity.EsAccessResult;
import org.codelibs.fess.crawler.entity.EsUrlQueue;
import org.codelibs.fess.crawler.entity.OpenSearchAccessResult;
import org.codelibs.fess.crawler.entity.OpenSearchUrlQueue;
import org.codelibs.fess.crawler.service.DataService;
import org.codelibs.fess.crawler.service.UrlFilterService;
import org.codelibs.fess.crawler.service.UrlQueueService;
import org.codelibs.fess.crawler.service.impl.EsDataService;
import org.codelibs.fess.crawler.service.impl.OpenSearchDataService;
import org.codelibs.fess.crawler.transformer.Transformer;
import org.codelibs.fess.crawler.util.EsResultList;
import org.codelibs.fess.crawler.util.OpenSearchResultList;
import org.codelibs.fess.exception.ContainerNotAvailableException;
import org.codelibs.fess.exception.FessSystemException;
import org.codelibs.fess.helper.IndexingHelper;
Expand Down Expand Up @@ -70,10 +70,10 @@ public class IndexUpdater extends Thread {
protected SearchEngineClient searchEngineClient;

@Resource
protected DataService<EsAccessResult> dataService;
protected DataService<OpenSearchAccessResult> dataService;

@Resource
protected UrlQueueService<EsUrlQueue> urlQueueService;
protected UrlQueueService<OpenSearchUrlQueue> urlQueueService;

@Resource
protected UrlFilterService urlFilterService;
Expand Down Expand Up @@ -176,17 +176,17 @@ public void run() {
try {
final Consumer<SearchRequestBuilder> cb = builder -> {
final QueryBuilder queryBuilder =
QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(EsAccessResult.SESSION_ID, sessionIdList))
.filter(QueryBuilders.termQuery(EsAccessResult.STATUS, org.codelibs.fess.crawler.Constants.OK_STATUS));
QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(OpenSearchAccessResult.SESSION_ID, sessionIdList)).filter(
QueryBuilders.termQuery(OpenSearchAccessResult.STATUS, org.codelibs.fess.crawler.Constants.OK_STATUS));
builder.setQuery(queryBuilder);
builder.setFrom(0);
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger();
builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
builder.addSort(OpenSearchAccessResult.CREATE_TIME, SortOrder.ASC);
};

final DocList docList = new DocList();
final List<EsAccessResult> accessResultList = new ArrayList<>();
final List<OpenSearchAccessResult> accessResultList = new ArrayList<>();

long updateTime = systemHelper.getCurrentTimeAsLong();
int errorCount = 0;
Expand Down Expand Up @@ -219,13 +219,13 @@ public void run() {

updateTime = systemHelper.getCurrentTimeAsLong();

List<EsAccessResult> arList = getAccessResultList(cb, cleanupTime);
List<OpenSearchAccessResult> arList = getAccessResultList(cb, cleanupTime);
if (arList.isEmpty()) {
emptyListCount++;
} else {
emptyListCount = 0; // reset
}
long hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
long hitCount = ((OpenSearchResultList<OpenSearchAccessResult>) arList).getTotalHits();
while (hitCount > 0) {
if (arList.isEmpty()) {
ThreadUtil.sleep(fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue());
Expand All @@ -235,7 +235,7 @@ public void run() {
cleanupTime = cleanupAccessResults(accessResultList);
}
arList = getAccessResultList(cb, cleanupTime);
hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
hitCount = ((OpenSearchResultList<OpenSearchAccessResult>) arList).getTotalHits();
}
if (!docList.isEmpty()) {
indexingHelper.sendDocuments(searchEngineClient, docList);
Expand Down Expand Up @@ -320,11 +320,11 @@ public void run() {

}

private void processAccessResults(final DocList docList, final List<EsAccessResult> accessResultList,
final List<EsAccessResult> arList) {
private void processAccessResults(final DocList docList, final List<OpenSearchAccessResult> accessResultList,
final List<OpenSearchAccessResult> arList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final long maxDocumentRequestSize = Long.parseLong(fessConfig.getIndexerWebfsMaxDocumentRequestSize());
for (final EsAccessResult accessResult : arList) {
for (final OpenSearchAccessResult accessResult : arList) {
if (logger.isDebugEnabled()) {
logger.debug("Indexing {}", accessResult.getUrl());
}
Expand Down Expand Up @@ -395,7 +395,7 @@ private void processAccessResults(final DocList docList, final List<EsAccessResu
}
}

private AccessResultData<?> getAccessResultData(final EsAccessResult accessResult) {
private AccessResultData<?> getAccessResultData(final OpenSearchAccessResult accessResult) {
try {
return accessResult.getAccessResultData();
} catch (final Exception e) {
Expand Down Expand Up @@ -483,7 +483,7 @@ protected void addFavoriteCountField(final Map<String, Object> map) {
}
}

private long cleanupAccessResults(final List<EsAccessResult> accessResultList) {
private long cleanupAccessResults(final List<OpenSearchAccessResult> accessResultList) {
if (!accessResultList.isEmpty()) {
final long execTime = systemHelper.getCurrentTimeAsLong();
final int size = accessResultList.size();
Expand All @@ -498,12 +498,12 @@ private long cleanupAccessResults(final List<EsAccessResult> accessResultList) {
return -1;
}

private List<EsAccessResult> getAccessResultList(final Consumer<SearchRequestBuilder> cb, final long cleanupTime) {
private List<OpenSearchAccessResult> getAccessResultList(final Consumer<SearchRequestBuilder> cb, final long cleanupTime) {
if (logger.isDebugEnabled()) {
logger.debug("Getting documents in IndexUpdater queue.");
}
final long execTime = systemHelper.getCurrentTimeAsLong();
final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
final List<OpenSearchAccessResult> arList = ((OpenSearchDataService) dataService).getAccessResultList(cb);
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (!arList.isEmpty()) {
final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
Expand All @@ -513,7 +513,7 @@ private List<EsAccessResult> getAccessResultList(final Consumer<SearchRequestBui
}
}
}
final long totalHits = ((EsResultList<EsAccessResult>) arList).getTotalHits();
final long totalHits = ((OpenSearchResultList<OpenSearchAccessResult>) arList).getTotalHits();
if (logger.isInfoEnabled()) {
final StringBuilder buf = new StringBuilder(100);
buf.append("Processing ");
Expand Down
Loading

0 comments on commit 9d3f26a

Please sign in to comment.