diff --git a/src/main/java/org/codelibs/fess/helper/ProtocolHelper.java b/src/main/java/org/codelibs/fess/helper/ProtocolHelper.java new file mode 100644 index 000000000..ae86271c0 --- /dev/null +++ b/src/main/java/org/codelibs/fess/helper/ProtocolHelper.java @@ -0,0 +1,86 @@ +/* + * Copyright 2012-2024 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.helper; + +import static org.codelibs.core.stream.StreamUtil.split; +import static org.codelibs.core.stream.StreamUtil.stream; + +import java.util.Arrays; + +import javax.annotation.PostConstruct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.util.ComponentUtil; + +public class ProtocolHelper { + private static final Logger logger = LogManager.getLogger(ProtocolHelper.class); + + protected String[] webProtocols = StringUtil.EMPTY_STRINGS; + + protected String[] fileProtocols = StringUtil.EMPTY_STRINGS; + + @PostConstruct + public void init() { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + webProtocols = split(fessConfig.getCrawlerWebProtocols(), ",") + .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n])); + fileProtocols = split(fessConfig.getCrawlerFileProtocols(), ",") + .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n])); + if (logger.isDebugEnabled()) { + logger.debug("web protocols: {}", Arrays.toString(webProtocols)); + logger.debug("file protocols: {}", Arrays.toString(fileProtocols)); + } + } + + public String[] getWebProtocols() { + return webProtocols; + } + + public String[] getFileProtocols() { + return fileProtocols; + } + + public boolean isValidWebProtocol(final String url) { + return stream(webProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s))); + } + + public boolean isValidFileProtocol(final String url) { + return stream(fileProtocols).get(stream -> stream.anyMatch(s -> url.startsWith(s))); + } + + public void addWebProtocol(final String protocol) { + final String prefix = protocol + ":"; + if (stream(webProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) { + logger.debug("web protocols contains {}.", protocol); + return; + } + webProtocols = Arrays.copyOf(webProtocols, webProtocols.length + 1); + webProtocols[webProtocols.length - 1] = prefix; + } + + public void addFileProtocol(final String protocol) { + final String prefix = protocol + ":"; + if (stream(fileProtocols).get(stream -> stream.anyMatch(s -> s.equals(prefix)))) { + logger.debug("file protocols contains {}.", protocol); + return; + } + fileProtocols = Arrays.copyOf(fileProtocols, fileProtocols.length + 1); + fileProtocols[fileProtocols.length - 1] = prefix; + } +} diff --git a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java index 8e264aaea..e9849e57c 100644 --- a/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java @@ -93,6 +93,7 @@ protected void doCrawl(final String sessionId, final List webConfigLi final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); + final ProtocolHelper protocolHelper = ComponentUtil.getProtocolHelper(); final long startTime = systemHelper.getCurrentTimeAsLong(); @@ -154,7 +155,7 @@ protected void doCrawl(final String sessionId, final List webConfigLi // set urls split(urlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> { - if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(urlValue)) { + if (!urlValue.startsWith("#") && protocolHelper.isValidWebProtocol(urlValue)) { final String u = duplicateHostHelper.convert(urlValue); crawler.addUrl(u); if (logger.isInfoEnabled()) { @@ -280,7 +281,7 @@ protected void doCrawl(final String sessionId, final List webConfigLi split(pathsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> { if (!urlValue.startsWith("#")) { final String u; - if (!fessConfig.isValidCrawlerFileProtocol(urlValue)) { + if (!protocolHelper.isValidFileProtocol(urlValue)) { if (urlValue.startsWith("/")) { u = "file:" + urlValue; } else { diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index dcfc274d8..c23ddeedf 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -1190,22 +1190,26 @@ default boolean isLdapAdminEnabled(final String username) { String getCrawlerWebProtocols(); + @Deprecated default String[] getCrawlerWebProtocolsAsArray() { return split(getCrawlerWebProtocols(), ",") .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n])); } + @Deprecated default boolean isValidCrawlerWebProtocol(final String url) { return stream(getCrawlerWebProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s))); } String getCrawlerFileProtocols(); + @Deprecated default String[] getCrawlerFileProtocolsAsArray() { return split(getCrawlerFileProtocols(), ",") .get(stream -> stream.filter(StringUtil::isNotBlank).map(s -> s.trim() + ":").toArray(n -> new String[n])); } + @Deprecated default boolean isValidCrawlerFileProtocol(final String url) { return stream(getCrawlerFileProtocolsAsArray()).get(stream -> stream.anyMatch(s -> url.startsWith(s))); } diff --git a/src/main/java/org/codelibs/fess/util/ComponentUtil.java b/src/main/java/org/codelibs/fess/util/ComponentUtil.java index e2ef4287a..6a87dcf59 100644 --- a/src/main/java/org/codelibs/fess/util/ComponentUtil.java +++ b/src/main/java/org/codelibs/fess/util/ComponentUtil.java @@ -58,6 +58,7 @@ import org.codelibs.fess.helper.PluginHelper; import org.codelibs.fess.helper.PopularWordHelper; import org.codelibs.fess.helper.ProcessHelper; +import org.codelibs.fess.helper.ProtocolHelper; import org.codelibs.fess.helper.QueryHelper; import org.codelibs.fess.helper.RelatedContentHelper; import org.codelibs.fess.helper.RelatedQueryHelper; @@ -216,6 +217,8 @@ public final class ComponentUtil { private static final String RANK_FUSION_PROCESSOR = "rankFusionProcessor"; + private static final String PROTOCOL_HELPER = "protocolHelper"; + private static IndexingHelper indexingHelper; private static CrawlingConfigHelper crawlingConfigHelper; @@ -521,6 +524,10 @@ public static RankFusionProcessor getRankFusionProcessor() { return getComponent(RANK_FUSION_PROCESSOR); } + public static ProtocolHelper getProtocolHelper() { + return getComponent(PROTOCOL_HELPER); + } + @SuppressWarnings("unchecked") public static T getComponent(final Class clazz) { try { diff --git a/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java b/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java index b5e63f518..67fb2cf05 100644 --- a/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java +++ b/src/main/java/org/codelibs/fess/validation/UriTypeValidator.java @@ -28,8 +28,8 @@ public class UriTypeValidator implements ConstraintValidator { @Override public void initialize(final UriType uriType) { protocols = switch (uriType.protocolType()) { - case WEB -> ComponentUtil.getFessConfig().getCrawlerWebProtocolsAsArray(); - case FILE -> ComponentUtil.getFessConfig().getCrawlerFileProtocolsAsArray(); + case WEB -> ComponentUtil.getProtocolHelper().getWebProtocols(); + case FILE -> ComponentUtil.getProtocolHelper().getFileProtocols(); default -> throw new ConstraintDefinitionException("protocolType is emtpy."); }; } diff --git a/src/main/resources/fess.xml b/src/main/resources/fess.xml index 4c1d7753f..310b2b420 100644 --- a/src/main/resources/fess.xml +++ b/src/main/resources/fess.xml @@ -36,9 +36,11 @@ + + - + diff --git a/src/test/java/org/codelibs/fess/helper/ProtocolHelperTest.java b/src/test/java/org/codelibs/fess/helper/ProtocolHelperTest.java new file mode 100644 index 000000000..ba7812690 --- /dev/null +++ b/src/test/java/org/codelibs/fess/helper/ProtocolHelperTest.java @@ -0,0 +1,102 @@ +/* + * Copyright 2012-2024 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.helper; + +import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.unit.UnitFessTestCase; +import org.codelibs.fess.util.ComponentUtil; + +public class ProtocolHelperTest extends UnitFessTestCase { + public void test_add_httpx() { + ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() { + @Override + public String getCrawlerWebProtocols() { + return "http,https"; + } + + @Override + public String getCrawlerFileProtocols() { + return "file,smb"; + } + }); + + final ProtocolHelper protocolHelper = new ProtocolHelper(); + protocolHelper.init(); + assertEquals(2, protocolHelper.getWebProtocols().length); + assertEquals("http:", protocolHelper.getWebProtocols()[0]); + assertEquals("https:", protocolHelper.getWebProtocols()[1]); + assertEquals(2, protocolHelper.getFileProtocols().length); + assertEquals("file:", protocolHelper.getFileProtocols()[0]); + assertEquals("smb:", protocolHelper.getFileProtocols()[1]); + + assertFalse(protocolHelper.isValidWebProtocol("httpx://test")); + + protocolHelper.addWebProtocol("httpx"); + assertEquals(3, protocolHelper.getWebProtocols().length); + assertEquals("http:", protocolHelper.getWebProtocols()[0]); + assertEquals("https:", protocolHelper.getWebProtocols()[1]); + assertEquals("httpx:", protocolHelper.getWebProtocols()[2]); + assertEquals(2, protocolHelper.getFileProtocols().length); + assertEquals("file:", protocolHelper.getFileProtocols()[0]); + assertEquals("smb:", protocolHelper.getFileProtocols()[1]); + + assertTrue(protocolHelper.isValidWebProtocol("httpx://test")); + + protocolHelper.addWebProtocol("httpx"); + assertEquals(3, protocolHelper.getWebProtocols().length); + assertEquals(2, protocolHelper.getFileProtocols().length); + } + + public void test_add_smbx() { + ComponentUtil.setFessConfig(new FessConfig.SimpleImpl() { + @Override + public String getCrawlerWebProtocols() { + return "http,https"; + } + + @Override + public String getCrawlerFileProtocols() { + return "file,smb"; + } + }); + + final ProtocolHelper protocolHelper = new ProtocolHelper(); + protocolHelper.init(); + assertEquals(2, protocolHelper.getWebProtocols().length); + assertEquals("http:", protocolHelper.getWebProtocols()[0]); + assertEquals("https:", protocolHelper.getWebProtocols()[1]); + assertEquals(2, protocolHelper.getFileProtocols().length); + assertEquals("file:", protocolHelper.getFileProtocols()[0]); + assertEquals("smb:", protocolHelper.getFileProtocols()[1]); + + assertFalse(protocolHelper.isValidFileProtocol("smbx://test")); + + protocolHelper.addFileProtocol("smbx"); + assertEquals(2, protocolHelper.getWebProtocols().length); + assertEquals("http:", protocolHelper.getWebProtocols()[0]); + assertEquals("https:", protocolHelper.getWebProtocols()[1]); + assertEquals(3, protocolHelper.getFileProtocols().length); + assertEquals("file:", protocolHelper.getFileProtocols()[0]); + assertEquals("smb:", protocolHelper.getFileProtocols()[1]); + assertEquals("smbx:", protocolHelper.getFileProtocols()[2]); + + assertTrue(protocolHelper.isValidFileProtocol("smbx://test")); + + protocolHelper.addFileProtocol("smbx"); + assertEquals(2, protocolHelper.getWebProtocols().length); + assertEquals(3, protocolHelper.getFileProtocols().length); + } +}