Skip to content

Commit

Permalink
Merge pull request #55 from gjtorikian/all-protocols
Browse files Browse the repository at this point in the history
Add support for :all protocols
  • Loading branch information
gjtorikian authored Jun 7, 2024
2 parents bae6c24 + 02a4144 commit 660fc24
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ attributes: {

# URL handling protocols to allow in specific attributes. By default, no
# protocols are allowed. Use :relative in place of a protocol if you want
# to allow relative URLs sans protocol.
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
protocols: {
"a" => { "href" => ["http", "https", "mailto", :relative] },
"img" => { "href" => ["http", "https"] },
Expand Down
33 changes: 20 additions & 13 deletions ext/selma/src/sanitizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,20 +211,23 @@ impl SelmaSanitizer {
}
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
}
} else if allowed_protocol.is_kind_of(class::symbol())
&& allowed_protocol.inspect() == ":relative"
{
match protocol_list {
None => {
protocol_sanitizers.insert(
attr_name.to_string(),
vec!["#".to_string(), "/".to_string()],
);
}
Some(protocol_list) => {
protocol_list.push("#".to_string());
protocol_list.push("/".to_string());
} else if allowed_protocol.is_kind_of(class::symbol()) {
let protocol_config = allowed_protocol.inspect();
if protocol_config == ":relative" {
match protocol_list {
None => {
protocol_sanitizers.insert(
attr_name.to_string(),
vec!["#".to_string(), "/".to_string()],
);
}
Some(protocol_list) => {
protocol_list.push("#".to_string());
protocol_list.push("/".to_string());
}
}
} else if protocol_config == ":all" {
protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
}
}
}
Expand Down Expand Up @@ -388,6 +391,10 @@ impl SelmaSanitizer {
}

fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
if protocols_allowed.contains(&"all".to_string()) {
return true;
}

// FIXME: is there a more idiomatic way to do this?
let mut pos: usize = 0;
let mut chars = attr_val.chars();
Expand Down
7 changes: 6 additions & 1 deletion lib/selma/sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@ def allow_class(element, *klass)
end

def allow_protocol(element, attr, protos)
protos = [protos] unless protos.is_a?(Array)
if protos.is_a?(Array)
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
else
protos = [protos]
end

set_allowed_protocols(element, attr, protos)
end

Expand Down
2 changes: 1 addition & 1 deletion lib/selma/sanitizer/config/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module Config

# URL handling protocols to allow in specific attributes. By default, no
# protocols are allowed. Use :relative in place of a protocol if you want
# to allow relative URLs sans protocol.
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
protocols: {},

# An Array of element names whose contents will be removed. The contents
Expand Down
2 changes: 1 addition & 1 deletion lib/selma/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Selma
VERSION = "0.2.2"
VERSION = "0.3.0"
end
13 changes: 13 additions & 0 deletions test/selma_maliciousness_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,17 @@ def test_that_it_raises_on_handle_text_returning_non_string
Selma::Rewriter.new(sanitizer: nil, handlers: [GarbageTextOptions.new]).rewrite(frag)
end
end

def test_sanitizer_expects_all_as_symbol
html = "<a href='https://google.com'>wow!</a>"
sanitizer = Selma::Sanitizer.new({
elements: ["a"],
attributes: { "a" => ["href"] },
protocols: { "a" => { "href" => [:all] } },
})

assert_raises(ArgumentError) do
Selma::Rewriter.new(sanitizer: sanitizer).rewrite(html)
end
end
end
18 changes: 18 additions & 0 deletions test/selma_sanitizer_elements_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,24 @@ def test_should_allow_relative_urls_containing_colons_when_the_colon_is_part_of_
assert_equal("<a>Footnote 1</a>", Selma::Rewriter.new(sanitizer: sanitizer).rewrite(input))
end

def test_should_allow_all_protocols_if_asked
input = <<~HTML
<a href="/foo/bar">Link</a>
<a href="http://wow.com/foo/bar">Link</a>
<a href="https://wow.com/foo/bar">Link</a>
<a href="ftp://wow.com/foo/bar">Link</a>
<a href="ssh://127.0.0.1">Link</a>
HTML

sanitizer = Selma::Sanitizer.new({
elements: ["a"],
attributes: { "a" => ["href"] },
protocols: { "a" => { "href" => :all } },
})

assert_equal(input, Selma::Rewriter.new(sanitizer: sanitizer).rewrite(input))
end

def test_should_remove_the_contents_of_filtered_nodes_when_remove_contents_is_true
sanitizer = Selma::Sanitizer.new({ remove_contents: true })

Expand Down

0 comments on commit 660fc24

Please sign in to comment.