From 3db0428419051fd11cfbd1c4ceabf1018cbad6f2 Mon Sep 17 00:00:00 2001 From: guqing <38999863+guqing@users.noreply.github.com> Date: Mon, 16 Dec 2024 16:39:31 +0800 Subject: [PATCH] fix: links in the post could be relative links (#47) --- .../run/halo/feed/RelativeLinkProcessor.java | 121 ++++++++++++++++++ .../java/run/halo/feed/RssXmlBuilder.java | 7 + app/src/test/java/run/halo/feed/RSS2Test.java | 4 +- .../halo/feed/RelativeLinkProcessorTest.java | 39 ++++++ 4 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 app/src/main/java/run/halo/feed/RelativeLinkProcessor.java create mode 100644 app/src/test/java/run/halo/feed/RelativeLinkProcessorTest.java diff --git a/app/src/main/java/run/halo/feed/RelativeLinkProcessor.java b/app/src/main/java/run/halo/feed/RelativeLinkProcessor.java new file mode 100644 index 0000000..9a79305 --- /dev/null +++ b/app/src/main/java/run/halo/feed/RelativeLinkProcessor.java @@ -0,0 +1,121 @@ +package run.halo.feed; + +import com.google.common.base.Throwables; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.springframework.util.Assert; +import org.springframework.web.util.UriComponentsBuilder; +import org.springframework.web.util.UriUtils; +import run.halo.app.core.attachment.ThumbnailSize; +import run.halo.app.infra.utils.PathUtils; +import run.halo.feed.telemetry.TelemetryEndpoint; + +import java.net.URI; +import java.nio.charset.StandardCharsets; + +@Slf4j +public class RelativeLinkProcessor { + private final URI externalUri; + + public RelativeLinkProcessor(String externalUrl) { + Assert.notNull(externalUrl, "External URL must not be null"); + this.externalUri = URI.create(externalUrl); + } + + public String processForHtml(String html) { + try { + return doProcessForHtml(html); + } catch (Throwable e) { + log.warn("Failed to process relative links for HTML", Throwables.getRootCause(e)); + } + return html; + } + + private String doProcessForHtml(String html) { + var document = Jsoup.parse(html); + + // Process all links + var links = document.select("a[href]"); + processElementAttr(links, "href", false); + // process all images + var images = document.select("img[src]"); + processElementAttr(images, "src", true); + // video/audio source src + var sources = document.select("source[src]"); + processElementAttr(sources, "src", false); + // video src + var videos = document.select("video[src]"); + processElementAttr(videos, "src", false); + // link href + var linksHref = document.select("link[href]"); + processElementAttr(linksHref, "href", false); + // script src + var scripts = document.select("script[src]"); + processElementAttr(scripts, "src", false); + // iframe src + var iframes = document.select("iframe[src]"); + processElementAttr(iframes, "src", false); + // frame src + var frames = document.select("frame[src]"); + processElementAttr(frames, "src", false); + // embed src + var embeds = document.select("embed[src]"); + processElementAttr(embeds, "src", false); + + return document.body().html(); +// var outputHtml = document.body().html(); +// return StringEscapeUtils.unescapeHtml4(outputHtml); + } + + private void processElementAttr(Elements elements, String attrKey, boolean canThumb) { + for (Element link : elements) { + String src = link.attr(attrKey); + if (canThumb && isNotTelemetryLink(src)) { + var thumb = genThumbUrl(src, ThumbnailSize.M); + var absoluteUrl = processLink(thumb); + link.attr(attrKey, absoluteUrl); + } else { + var absoluteUrl = processLink(src); + link.attr(attrKey, absoluteUrl); + } + } + } + + boolean isNotTelemetryLink(String uri) { + return uri != null && !uri.contains(TelemetryEndpoint.TELEMETRY_PATH); + } + + private String genThumbUrl(String url, ThumbnailSize size) { + return processLink("/apis/api.storage.halo.run/v1alpha1/thumbnails/-/via-uri?uri=" + + UriUtils.encode(url, StandardCharsets.UTF_8) + + "&size=" + size.name().toLowerCase() + ); + } + + private String processLink(String link) { + if (StringUtils.isBlank(link) || PathUtils.isAbsoluteUri(link)) { + return link; + } + var contextPath = StringUtils.defaultIfBlank(externalUri.getPath(), "/"); + var linkUri = UriComponentsBuilder.fromUriString(URI.create(link).toASCIIString()) + .build(true); + var builder = UriComponentsBuilder.fromUriString(externalUri.toString()); + if (shouldAppendPath(contextPath, link)) { + builder.pathSegment(linkUri.getPathSegments().toArray(new String[0])); + } else { + builder.replacePath(linkUri.getPath()); + } + return builder.query(linkUri.getQuery()) + .fragment(linkUri.getFragment()) + .build(true) + .toUri() + .toString(); + } + + private static boolean shouldAppendPath(String contextPath, String link) { + return !"/".equals(contextPath) && !link.startsWith(contextPath); + } +} diff --git a/app/src/main/java/run/halo/feed/RssXmlBuilder.java b/app/src/main/java/run/halo/feed/RssXmlBuilder.java index 58d05b0..f555634 100644 --- a/app/src/main/java/run/halo/feed/RssXmlBuilder.java +++ b/app/src/main/java/run/halo/feed/RssXmlBuilder.java @@ -152,6 +152,13 @@ private void createItemElementToChannel(Element channel, RSS2.Item item) { itemElement.addElement("link").addText(item.getLink()); var description = Optional.of(getDescriptionWithTelemetry(item)) + .map(content -> { + if (externalUrl != null) { + return new RelativeLinkProcessor(externalUrl) + .processForHtml(content); + } + return content; + }) .map(XmlCharUtils::removeInvalidXmlChar) .orElseThrow(); itemElement.addElement("description").addCDATA(description); diff --git a/app/src/test/java/run/halo/feed/RSS2Test.java b/app/src/test/java/run/halo/feed/RSS2Test.java index c350485..34271d5 100644 --- a/app/src/test/java/run/halo/feed/RSS2Test.java +++ b/app/src/test/java/run/halo/feed/RSS2Test.java @@ -142,7 +142,7 @@ void invalidCharTest() { RSS2.Item.builder() .title("title1") .description(""" -

并且会保留处理后的图片以供后面的访问。

+

&并且会保留处理后的图片以供后面的访问。

""") .link("link1") .pubDate(Instant.EPOCH) @@ -175,7 +175,7 @@ void invalidCharTest() { link1 - 并且会保留处理后的图片以供后面的访问。

]]> + &并且会保留处理后的图片以供后面的访问。

]]>
guid1 Thu, 1 Jan 1970 00:00:00 GMT diff --git a/app/src/test/java/run/halo/feed/RelativeLinkProcessorTest.java b/app/src/test/java/run/halo/feed/RelativeLinkProcessorTest.java new file mode 100644 index 0000000..06f58d4 --- /dev/null +++ b/app/src/test/java/run/halo/feed/RelativeLinkProcessorTest.java @@ -0,0 +1,39 @@ +package run.halo.feed; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests for {@link RelativeLinkProcessor}. + * + * @author guqing + * @since 1.4.1 + */ +class RelativeLinkProcessorTest { + private final RelativeLinkProcessor linkProcessor = + new RelativeLinkProcessor("http://localhost:8090"); + + @Test + void textContent() { + var content = "hello world"; + var processed = linkProcessor.processForHtml(content); + assertThat(processed).isEqualTo(content); + } + + @Test + void testProcessForHtmlIncludeATag() { + var content = "hello"; + var processed = linkProcessor.processForHtml(content); + assertThat(processed).isEqualTo("hello"); + } + + @Test + void processForHtmlIncludeImgTag() { + var content = ""; + var processed = linkProcessor.processForHtml(content); + assertThat(processed).isEqualTo( + ""); + } +}