Skip to content

Commit

Permalink
Merge pull request #60 from alecsmrekar/decode-html
Browse files Browse the repository at this point in the history
Decode the HTML before loading static assets
  • Loading branch information
jeremyandrews authored Dec 14, 2023
2 parents d8c2078 + 5026074 commit a749ad4
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ license = "Apache-2.0"

[dependencies]
goose = { version = "0.17", default-features = false }
html-escape = "0.2"
http = "0.2"
log = "0.4"
rand = "0.8"
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ pub async fn get_src_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
// @TODO: parse HTML5 srcset= also
let src_elements = Regex::new(r#"(?i)src="(.*?)""#).unwrap();
let mut elements: Vec<String> = Vec::new();
for url in src_elements.captures_iter(html) {
for url in src_elements.captures_iter(html_escape::decode_html_entities(html).as_ref()) {
if valid_local_uri(user, &url[1]) {
elements.push(url[1].to_string());
}
Expand All @@ -1010,7 +1010,7 @@ pub async fn get_css_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
// <foo> is the URL to local css assets.
let css = Regex::new(r#"(?i)href="(.*?\.css.*?)""#).unwrap();
let mut elements: Vec<String> = Vec::new();
for url in css.captures_iter(html) {
for url in css.captures_iter(html_escape::decode_html_entities(html).as_ref()) {
if valid_local_uri(user, &url[1]) {
elements.push(url[1].to_string());
}
Expand Down
53 changes: 53 additions & 0 deletions tests/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use gumdrop::Options;
use httpmock::{Method::GET, MockServer};

use goose::config::GooseConfiguration;
use goose::goose::get_base_url;
use goose::metrics::GooseCoordinatedOmissionMitigation::Disabled;
use goose::prelude::*;
use goose_eggs::load_static_elements;

#[tokio::test]
// Loads static elements and checks that characters are decoded properly.
async fn test_html_decoding() {
let html: &str = r#"
<!DOCTYPE html>
<head>
<!-- Check that encoded paths are decoded properly -->
<script type="text/javascript" src="/test1.js?foo=1&amp;bar=2"></script>
<!-- Check that decoded paths still work -->
<script type="text/javascript" src="/test2.js?foo=1&bar=2"></script>
<title>Title 1234ABCD</title>
</head>
<body>
<p>Test text on the page.</p>
</body>
"#;

let server = MockServer::start();

let mock_endpoint1 = server.mock(|when, then| {
when.method(GET)
.path("/test1.js")
.query_param("foo", "1")
.query_param("bar", "2");
then.status(200).body("test");
});
let mock_endpoint2 = server.mock(|when, then| {
when.method(GET)
.path("/test2.js")
.query_param("foo", "1")
.query_param("bar", "2");
then.status(200).body("test");
});

let config: Vec<&str> = vec![];
let mut configuration = GooseConfiguration::parse_args_default(&config).unwrap();
configuration.co_mitigation = Some(Disabled);
let base_url = get_base_url(Some(server.base_url()), None, None).unwrap();
let mut user = GooseUser::new(0, "".to_string(), base_url, &configuration, 0, None).unwrap();

load_static_elements(&mut user, html).await;
assert_eq!(mock_endpoint1.hits(), 1);
assert_eq!(mock_endpoint2.hits(), 1);
}

0 comments on commit a749ad4

Please sign in to comment.