Skip to content

Commit

Permalink
Webscraper: Support srcset
Browse files Browse the repository at this point in the history
  • Loading branch information
cjmalloy committed Jul 17, 2023
1 parent 0b59a75 commit 8fdbe8c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/main/java/jasper/component/Sanitizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public class Sanitizer {
};

private final String[] IMG_ATTRS = {
"srcset",
"style",
"width",
"height"
Expand Down
28 changes: 26 additions & 2 deletions src/main/java/jasper/component/WebScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,21 @@ public Ref web(String url) throws IOException, URISyntaxException {
self.scrapeAsync(src);
addPluginUrl(result, "plugin/image", getImage(src));
addThumbnailUrl(result, getThumbnail(src));
} else {
} else if (image.hasAttr("data-srcset")){
var srcset = image.absUrl("data-srcset").split(",");
var src = srcset[srcset.length - 1].split(" ")[0];
self.scrapeAsync(src);
addPluginUrl(result, "plugin/image", getImage(src));
addThumbnailUrl(result, getThumbnail(src));
image.parent().remove();
} else if (image.hasAttr("srcset")){
var srcset = image.absUrl("srcset").split(",");
var src = srcset[srcset.length - 1].split(" ")[0];
self.scrapeAsync(src);
addPluginUrl(result, "plugin/image", getImage(src));
addThumbnailUrl(result, getThumbnail(src));
image.parent().remove();
} else if (image.hasAttr("src")){
var src = image.absUrl("src");
self.scrapeAsync(src);
addPluginUrl(result, "plugin/image", getImage(src));
Expand All @@ -419,7 +433,17 @@ public Ref web(String url) throws IOException, URISyntaxException {
var src = thumbnail.absUrl("href");
self.scrapeAsync(src);
addThumbnailUrl(result, getThumbnail(src));
} else if (thumbnail.hasAttr("src")) {
} else if (thumbnail.hasAttr("data-srcset")){
var srcset = thumbnail.absUrl("data-srcset").split(",");
var src = srcset[srcset.length - 1].split(" ")[0];
self.scrapeAsync(src);
addThumbnailUrl(result, getThumbnail(src));
} else if (thumbnail.hasAttr("srcset")){
var srcset = thumbnail.absUrl("srcset").split(",");
var src = srcset[srcset.length - 1].split(" ")[0];
self.scrapeAsync(src);
addThumbnailUrl(result, getThumbnail(src));
} else if (thumbnail.hasAttr("src")){
var src = thumbnail.absUrl("src");
self.scrapeAsync(src);
addThumbnailUrl(result, getThumbnail(src));
Expand Down

0 comments on commit 8fdbe8c

Please sign in to comment.