Skip to content

Commit

Permalink
fix #2819 add ProtocolHelper for URL protocol management and update r…
Browse files Browse the repository at this point in the history
…elated classes
  • Loading branch information
marevol committed Jun 17, 2024
1 parent 8b2adce commit 10c84ab
Show file tree
Hide file tree
Showing 7 changed files with 207 additions and 5 deletions.
86 changes: 86 additions & 0 deletions src/main/java/org/codelibs/fess/helper/ProtocolHelper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright 2012-2024 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.helper;

import static org.codelibs.core.stream.StreamUtil.split;
import static org.codelibs.core.stream.StreamUtil.stream;

import java.util.Arrays;

import javax.annotation.PostConstruct;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;

public class ProtocolHelper {
private static final Logger logger = LogManager.getLogger(ProtocolHelper.class);

protected String[] webProtocols = StringUtil.EMPTY_STRINGS;

protected String[] fileProtocols = StringUtil.EMPTY_STRINGS;

@PostConstruct
public void init() {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
webProtocols = split(fessConfig.getCrawlerWebProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
fileProtocols = split(fessConfig.getCrawlerFileProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
if (logger.isDebugEnabled()) {
logger.debug("web protocols: {}", Arrays.toString(webProtocols));
logger.debug("file protocols: {}", Arrays.toString(fileProtocols));
}
}

public String[] getWebProtocols() {
return webProtocols;
}

public String[] getFileProtocols() {
return fileProtocols;
}

public boolean isValidWebProtocol(final String url) {
return stream(webProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}

public boolean isValidFileProtocol(final String url) {
return stream(fileProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}

public void addWebProtocol(final String protocol) {
final String prefix = protocol + ":";
if (stream(webProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) {
logger.debug("web protocols contains {}.", protocol);
return;
}
webProtocols = Arrays.copyOf(webProtocols, webProtocols.length + 1);
webProtocols[webProtocols.length - 1] = prefix;
}

public void addFileProtocol(final String protocol) {
final String prefix = protocol + ":";
if (stream(fileProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) {
logger.debug("file protocols contains {}.", protocol);
return;
}
fileProtocols = Arrays.copyOf(fileProtocols, fileProtocols.length + 1);
fileProtocols[fileProtocols.length - 1] = prefix;
}
}
5 changes: 3 additions & 2 deletions src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ protected void doCrawl(final String sessionId, final List<WebConfig> webConfigLi

final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final ProtocolHelper protocolHelper = ComponentUtil.getProtocolHelper();

final long startTime = systemHelper.getCurrentTimeAsLong();

Expand Down Expand Up @@ -154,7 +155,7 @@ protected void doCrawl(final String sessionId, final List<WebConfig> webConfigLi

// set urls
split(urlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(urlValue)) {
if (!urlValue.startsWith("#") && protocolHelper.isValidWebProtocol(urlValue)) {
final String u = duplicateHostHelper.convert(urlValue);
crawler.addUrl(u);
if (logger.isInfoEnabled()) {
Expand Down Expand Up @@ -280,7 +281,7 @@ protected void doCrawl(final String sessionId, final List<WebConfig> webConfigLi
split(pathsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
if (!urlValue.startsWith("#")) {
final String u;
if (!fessConfig.isValidCrawlerFileProtocol(urlValue)) {
if (!protocolHelper.isValidFileProtocol(urlValue)) {
if (urlValue.startsWith("/")) {
u = "file:" + urlValue;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1190,22 +1190,26 @@ default boolean isLdapAdminEnabled(final String username) {

String getCrawlerWebProtocols();

@Deprecated
default String[] getCrawlerWebProtocolsAsArray() {
return split(getCrawlerWebProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
}

@Deprecated
default boolean isValidCrawlerWebProtocol(final String url) {
return stream(getCrawlerWebProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}

String getCrawlerFileProtocols();

@Deprecated
default String[] getCrawlerFileProtocolsAsArray() {
return split(getCrawlerFileProtocols(), ",")
.get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n]));
}

@Deprecated
default boolean isValidCrawlerFileProtocol(final String url) {
return stream(getCrawlerFileProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s)));
}
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/org/codelibs/fess/util/ComponentUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.codelibs.fess.helper.PluginHelper;
import org.codelibs.fess.helper.PopularWordHelper;
import org.codelibs.fess.helper.ProcessHelper;
import org.codelibs.fess.helper.ProtocolHelper;
import org.codelibs.fess.helper.QueryHelper;
import org.codelibs.fess.helper.RelatedContentHelper;
import org.codelibs.fess.helper.RelatedQueryHelper;
Expand Down Expand Up @@ -216,6 +217,8 @@ public final class ComponentUtil {

private static final String RANK_FUSION_PROCESSOR = "rankFusionProcessor";

private static final String PROTOCOL_HELPER = "protocolHelper";

private static IndexingHelper indexingHelper;

private static CrawlingConfigHelper crawlingConfigHelper;
Expand Down Expand Up @@ -521,6 +524,10 @@ public static RankFusionProcessor getRankFusionProcessor() {
return getComponent(RANK_FUSION_PROCESSOR);
}

public static ProtocolHelper getProtocolHelper() {
return getComponent(PROTOCOL_HELPER);
}

@SuppressWarnings("unchecked")
public static <T> T getComponent(final Class<T> clazz) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ public class UriTypeValidator implements ConstraintValidator<UriType, String> {
@Override
public void initialize(final UriType uriType) {
protocols = switch (uriType.protocolType()) {
case WEB -> ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray();
case FILE -> ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray();
case WEB -> ComponentUtil.getProtocolHelper().getWebProtocols();
case FILE -> ComponentUtil.getProtocolHelper().getFileProtocols();
default -> throw new ConstraintDefinitionException("protocolType is emtpy.");
};
}
Expand Down
4 changes: 3 additions & 1 deletion src/main/resources/fess.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
</component>
<component name="pathMappingHelper" class="org.codelibs.fess.helper.PathMappingHelper">
</component>
<component name="permissionHelper" class="org.codelibs.fess.helper.PermissionHelper">
</component>
<component name="processHelper" class="org.codelibs.fess.helper.ProcessHelper">
</component>
<component name="permissionHelper" class="org.codelibs.fess.helper.PermissionHelper">
<component name="protocolHelper" class="org.codelibs.fess.helper.ProtocolHelper">
</component>
<component name="sambaHelper" class="org.codelibs.fess.helper.SambaHelper">
</component>
Expand Down
102 changes: 102 additions & 0 deletions src/test/java/org/codelibs/fess/helper/ProtocolHelperTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright 2012-2024 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.helper;

import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.unit.UnitFessTestCase;
import org.codelibs.fess.util.ComponentUtil;

public class ProtocolHelperTest extends UnitFessTestCase {
public void test_add_httpx() {
ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() {
@Override
public String getCrawlerWebProtocols() {
return "http,https";
}

@Override
public String getCrawlerFileProtocols() {
return "file,smb";
}
});

final ProtocolHelper protocolHelper = new ProtocolHelper();
protocolHelper.init();
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals(2, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);

assertFalse(protocolHelper.isValidWebProtocol("httpx://test"));

protocolHelper.addWebProtocol("httpx");
assertEquals(3, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals("httpx:", protocolHelper.getWebProtocols()[2]);
assertEquals(2, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);

assertTrue(protocolHelper.isValidWebProtocol("httpx://test"));

protocolHelper.addWebProtocol("httpx");
assertEquals(3, protocolHelper.getWebProtocols().length);
assertEquals(2, protocolHelper.getFileProtocols().length);
}

public void test_add_smbx() {
ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() {
@Override
public String getCrawlerWebProtocols() {
return "http,https";
}

@Override
public String getCrawlerFileProtocols() {
return "file,smb";
}
});

final ProtocolHelper protocolHelper = new ProtocolHelper();
protocolHelper.init();
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals(2, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);

assertFalse(protocolHelper.isValidFileProtocol("smbx://test"));

protocolHelper.addFileProtocol("smbx");
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals("http:", protocolHelper.getWebProtocols()[0]);
assertEquals("https:", protocolHelper.getWebProtocols()[1]);
assertEquals(3, protocolHelper.getFileProtocols().length);
assertEquals("file:", protocolHelper.getFileProtocols()[0]);
assertEquals("smb:", protocolHelper.getFileProtocols()[1]);
assertEquals("smbx:", protocolHelper.getFileProtocols()[2]);

assertTrue(protocolHelper.isValidFileProtocol("smbx://test"));

protocolHelper.addFileProtocol("smbx");
assertEquals(2, protocolHelper.getWebProtocols().length);
assertEquals(3, protocolHelper.getFileProtocols().length);
}
}

0 comments on commit 10c84ab

Please sign in to comment.