Skip to content

Commit

Permalink
fix: links in the post could be relative links (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
guqing authored Dec 16, 2024
1 parent 80b1922 commit 3db0428
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 2 deletions.
121 changes: 121 additions & 0 deletions app/src/main/java/run/halo/feed/RelativeLinkProcessor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package run.halo.feed;

import com.google.common.base.Throwables;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.Assert;
import org.springframework.web.util.UriComponentsBuilder;
import org.springframework.web.util.UriUtils;
import run.halo.app.core.attachment.ThumbnailSize;
import run.halo.app.infra.utils.PathUtils;
import run.halo.feed.telemetry.TelemetryEndpoint;

import java.net.URI;
import java.nio.charset.StandardCharsets;

@Slf4j
public class RelativeLinkProcessor {
private final URI externalUri;

public RelativeLinkProcessor(String externalUrl) {
Assert.notNull(externalUrl, "External URL must not be null");
this.externalUri = URI.create(externalUrl);
}

public String processForHtml(String html) {
try {
return doProcessForHtml(html);
} catch (Throwable e) {
log.warn("Failed to process relative links for HTML", Throwables.getRootCause(e));
}
return html;
}

private String doProcessForHtml(String html) {
var document = Jsoup.parse(html);

// Process all links
var links = document.select("a[href]");
processElementAttr(links, "href", false);
// process all images
var images = document.select("img[src]");
processElementAttr(images, "src", true);
// video/audio source src
var sources = document.select("source[src]");
processElementAttr(sources, "src", false);
// video src
var videos = document.select("video[src]");
processElementAttr(videos, "src", false);
// link href
var linksHref = document.select("link[href]");
processElementAttr(linksHref, "href", false);
// script src
var scripts = document.select("script[src]");
processElementAttr(scripts, "src", false);
// iframe src
var iframes = document.select("iframe[src]");
processElementAttr(iframes, "src", false);
// frame src
var frames = document.select("frame[src]");
processElementAttr(frames, "src", false);
// embed src
var embeds = document.select("embed[src]");
processElementAttr(embeds, "src", false);

return document.body().html();
// var outputHtml = document.body().html();
// return StringEscapeUtils.unescapeHtml4(outputHtml);
}

private void processElementAttr(Elements elements, String attrKey, boolean canThumb) {
for (Element link : elements) {
String src = link.attr(attrKey);
if (canThumb && isNotTelemetryLink(src)) {
var thumb = genThumbUrl(src, ThumbnailSize.M);
var absoluteUrl = processLink(thumb);
link.attr(attrKey, absoluteUrl);
} else {
var absoluteUrl = processLink(src);
link.attr(attrKey, absoluteUrl);
}
}
}

boolean isNotTelemetryLink(String uri) {
return uri != null && !uri.contains(TelemetryEndpoint.TELEMETRY_PATH);
}

private String genThumbUrl(String url, ThumbnailSize size) {
return processLink("/apis/api.storage.halo.run/v1alpha1/thumbnails/-/via-uri?uri="
+ UriUtils.encode(url, StandardCharsets.UTF_8)
+ "&size=" + size.name().toLowerCase()
);
}

private String processLink(String link) {
if (StringUtils.isBlank(link) || PathUtils.isAbsoluteUri(link)) {
return link;
}
var contextPath = StringUtils.defaultIfBlank(externalUri.getPath(), "/");
var linkUri = UriComponentsBuilder.fromUriString(URI.create(link).toASCIIString())
.build(true);
var builder = UriComponentsBuilder.fromUriString(externalUri.toString());
if (shouldAppendPath(contextPath, link)) {
builder.pathSegment(linkUri.getPathSegments().toArray(new String[0]));
} else {
builder.replacePath(linkUri.getPath());
}
return builder.query(linkUri.getQuery())
.fragment(linkUri.getFragment())
.build(true)
.toUri()
.toString();
}

private static boolean shouldAppendPath(String contextPath, String link) {
return !"/".equals(contextPath) && !link.startsWith(contextPath);
}
}
7 changes: 7 additions & 0 deletions app/src/main/java/run/halo/feed/RssXmlBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ private void createItemElementToChannel(Element channel, RSS2.Item item) {
itemElement.addElement("link").addText(item.getLink());

var description = Optional.of(getDescriptionWithTelemetry(item))
.map(content -> {
if (externalUrl != null) {
return new RelativeLinkProcessor(externalUrl)
.processForHtml(content);
}
return content;
})
.map(XmlCharUtils::removeInvalidXmlChar)
.orElseThrow();
itemElement.addElement("description").addCDATA(description);
Expand Down
4 changes: 2 additions & 2 deletions app/src/test/java/run/halo/feed/RSS2Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ void invalidCharTest() {
RSS2.Item.builder()
.title("title1")
.description("""
<p>并且会保留处理后的图片以供后面的访问。</p>
<p>&并且会保留处理后的图片以供后面的访问。</p>
""")
.link("link1")
.pubDate(Instant.EPOCH)
Expand Down Expand Up @@ -175,7 +175,7 @@ void invalidCharTest() {
</title>
<link>link1</link>
<description>
<![CDATA[<p>并且会保留处理后的图片以供后面的访问。</p>]]>
<![CDATA[<p>&并且会保留处理后的图片以供后面的访问。</p>]]>
</description>
<guid isPermaLink="false">guid1</guid>
<pubDate>Thu, 1 Jan 1970 00:00:00 GMT</pubDate>
Expand Down
39 changes: 39 additions & 0 deletions app/src/test/java/run/halo/feed/RelativeLinkProcessorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package run.halo.feed;

import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Tests for {@link RelativeLinkProcessor}.
*
* @author guqing
* @since 1.4.1
*/
class RelativeLinkProcessorTest {
private final RelativeLinkProcessor linkProcessor =
new RelativeLinkProcessor("http://localhost:8090");

@Test
void textContent() {
var content = "hello world";
var processed = linkProcessor.processForHtml(content);
assertThat(processed).isEqualTo(content);
}

@Test
void testProcessForHtmlIncludeATag() {
var content = "<a href=\"/hello\">hello</a>";
var processed = linkProcessor.processForHtml(content);
assertThat(processed).isEqualTo("<a href=\"http://localhost:8090/hello\">hello</a>");
}

@Test
void processForHtmlIncludeImgTag() {
var content = "<img src=\"/hello.jpg\"/>";
var processed = linkProcessor.processForHtml(content);
assertThat(processed).isEqualTo(
"<img src=\"http://localhost:8090/apis/api.storage.halo"
+ ".run/v1alpha1/thumbnails/-/via-uri?uri=%2Fhello.jpg&amp;size=m\">");
}
}

0 comments on commit 3db0428

Please sign in to comment.