diff --git a/README.md b/README.md index 0728e38..0fe05df 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ attributes: { # URL handling protocols to allow in specific attributes. By default, no # protocols are allowed. Use :relative in place of a protocol if you want -# to allow relative URLs sans protocol. +# to allow relative URLs sans protocol. Set to `:all` to allow any protocol. protocols: { "a" => { "href" => ["http", "https", "mailto", :relative] }, "img" => { "href" => ["http", "https"] }, diff --git a/ext/selma/src/sanitizer.rs b/ext/selma/src/sanitizer.rs index 2ec89f1..2f67c90 100644 --- a/ext/selma/src/sanitizer.rs +++ b/ext/selma/src/sanitizer.rs @@ -211,20 +211,23 @@ impl SelmaSanitizer { } Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()), } - } else if allowed_protocol.is_kind_of(class::symbol()) - && allowed_protocol.inspect() == ":relative" - { - match protocol_list { - None => { - protocol_sanitizers.insert( - attr_name.to_string(), - vec!["#".to_string(), "/".to_string()], - ); - } - Some(protocol_list) => { - protocol_list.push("#".to_string()); - protocol_list.push("/".to_string()); + } else if allowed_protocol.is_kind_of(class::symbol()) { + let protocol_config = allowed_protocol.inspect(); + if protocol_config == ":relative" { + match protocol_list { + None => { + protocol_sanitizers.insert( + attr_name.to_string(), + vec!["#".to_string(), "/".to_string()], + ); + } + Some(protocol_list) => { + protocol_list.push("#".to_string()); + protocol_list.push("/".to_string()); + } } + } else if protocol_config == ":all" { + protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]); } } } @@ -388,6 +391,10 @@ impl SelmaSanitizer { } fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool { + if protocols_allowed.contains(&"all".to_string()) { + return true; + } + // FIXME: is there a more idiomatic way to do this? let mut pos: usize = 0; let mut chars = attr_val.chars(); diff --git a/lib/selma/sanitizer.rb b/lib/selma/sanitizer.rb index d802ae7..db53984 100644 --- a/lib/selma/sanitizer.rb +++ b/lib/selma/sanitizer.rb @@ -66,7 +66,12 @@ def allow_class(element, *klass) end def allow_protocol(element, attr, protos) - protos = [protos] unless protos.is_a?(Array) + if protos.is_a?(Array) + raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all) + else + protos = [protos] + end + set_allowed_protocols(element, attr, protos) end diff --git a/lib/selma/sanitizer/config/default.rb b/lib/selma/sanitizer/config/default.rb index 9b55bfa..b8cacd9 100644 --- a/lib/selma/sanitizer/config/default.rb +++ b/lib/selma/sanitizer/config/default.rb @@ -28,7 +28,7 @@ module Config # URL handling protocols to allow in specific attributes. By default, no # protocols are allowed. Use :relative in place of a protocol if you want - # to allow relative URLs sans protocol. + # to allow relative URLs sans protocol. Set to `:all` to allow any protocol. protocols: {}, # An Array of element names whose contents will be removed. The contents diff --git a/test/selma_maliciousness_test.rb b/test/selma_maliciousness_test.rb index e4bb58f..fb74206 100644 --- a/test/selma_maliciousness_test.rb +++ b/test/selma_maliciousness_test.rb @@ -154,4 +154,17 @@ def test_that_it_raises_on_handle_text_returning_non_string Selma::Rewriter.new(sanitizer: nil, handlers: [GarbageTextOptions.new]).rewrite(frag) end end + + def test_sanitizer_expects_all_as_symbol + html = "wow!" + sanitizer = Selma::Sanitizer.new({ + elements: ["a"], + attributes: { "a" => ["href"] }, + protocols: { "a" => { "href" => [:all] } }, + }) + + assert_raises(ArgumentError) do + Selma::Rewriter.new(sanitizer: sanitizer).rewrite(html) + end + end end diff --git a/test/selma_sanitizer_elements_test.rb b/test/selma_sanitizer_elements_test.rb index d5878a7..34e0a2c 100644 --- a/test/selma_sanitizer_elements_test.rb +++ b/test/selma_sanitizer_elements_test.rb @@ -252,6 +252,24 @@ def test_should_allow_relative_urls_containing_colons_when_the_colon_is_part_of_ assert_equal("Footnote 1", Selma::Rewriter.new(sanitizer: sanitizer).rewrite(input)) end + def test_should_allow_all_protocols_if_asked + input = <<~HTML + Link + Link + Link + Link + Link + HTML + + sanitizer = Selma::Sanitizer.new({ + elements: ["a"], + attributes: { "a" => ["href"] }, + protocols: { "a" => { "href" => :all } }, + }) + + assert_equal(input, Selma::Rewriter.new(sanitizer: sanitizer).rewrite(input)) + end + def test_should_remove_the_contents_of_filtered_nodes_when_remove_contents_is_true sanitizer = Selma::Sanitizer.new({ remove_contents: true })