Skip to content

Commit

Permalink
Conservative normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
c960657 committed Aug 3, 2023
1 parent 9bfcdfb commit e9a1542
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 2 deletions.
17 changes: 15 additions & 2 deletions lib/http/uri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,18 @@ class URI
# @private
HTTPS_SCHEME = "https"

# @private
PERCENT_ENCODE = /[\x00-\x20\u007F-\u{1FFFF}]+/.freeze

# @private
NORMALIZER = lambda do |uri|
uri = HTTP::URI.parse uri

HTTP::URI.new(
:scheme => uri.normalized_scheme,
:authority => uri.normalized_authority,
:path => uri.normalized_path,
:query => uri.query,
:path => uri.path.empty? ? "/" : percent_encode(Addressable::URI.normalize_path(uri.path), PERCENT_ENCODE),
:query => percent_encode(uri.query, PERCENT_ENCODE),
:fragment => uri.normalized_fragment
)
end
Expand All @@ -71,6 +74,16 @@ def self.form_encode(form_values, sort = false)
Addressable::URI.form_encode(form_values, sort)
end

# Percent-encode all characters matching a regular expression.
#
# @param [String] string raw string
# @param [Regexp] pattern regular expression matching characters to percent-encode
#
# @return [String] encoded value
def self.percent_encode(string, pattern)
string&.gsub(pattern) { |substr| substr.encode(Encoding::UTF_8).bytes.map { |c| format("%%%02X", c) }.join }
end

# Creates an HTTP::URI instance from the given options
#
# @param [Hash, Addressable::URI] options_or_uri
Expand Down
86 changes: 86 additions & 0 deletions spec/lib/http/uri_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,92 @@
subject(:https_uri) { described_class.parse(example_https_uri_string) }
subject(:ipv6_uri) { described_class.parse(example_ipv6_uri_string) }

describe "NORMALIZER" do
it "lower-cases scheme" do
expect(HTTP::URI::NORMALIZER.call("HttP://example.com").scheme).to eq "http"
end

it "lower-cases hostname" do
expect(HTTP::URI::NORMALIZER.call("http://EXAMPLE.com").host).to eq "example.com"
end

it "decodes percent-encoded hostname" do
expect(HTTP::URI::NORMALIZER.call("http://ex%61mple.com").host).to eq "example.com"
end

it "removes trailing period in hostname" do
expect(HTTP::URI::NORMALIZER.call("http://example.com.").host).to eq "example.com"
end

it "IDN-encodes non-ASCII hostname" do
expect(HTTP::URI::NORMALIZER.call("http://exämple.com").host).to eq "xn--exmple-cua.com"
end

it "ensures path is not empty" do
expect(HTTP::URI::NORMALIZER.call("http://example.com").path).to eq "/"
end

it "preserves double slashes in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com//a///b").path).to eq "//a///b"
end

it "resolves single-dot segments in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/a/./b").path).to eq "/a/b"
end

it "resolves double-dot segments in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/a/b/../c").path).to eq "/a/c"
end

it "resolves leading double-dot segments in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/../a/b").path).to eq "/a/b"
end

it "percent-encodes control characters in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/\x00\x7F\n").path).to eq "/%00%7F%0A"
end

it "percent-encodes space in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/a b").path).to eq "/a%20b"
end

it "percent-encodes non-ASCII characters in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/キョ").path).to eq "/%E3%82%AD%E3%83%A7"
end

it "does not percent-encode non-special characters in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/~.-_!$&()*,;=:@{}").path).to eq "/~.-_!$&()*,;=:@{}"
end

it "preserves escape sequences in path" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/%41").path).to eq "/%41"
end

it "allows no query" do
expect(HTTP::URI::NORMALIZER.call("http://example.com").query).to be_nil
end

it "percent-encodes control characters in query" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/?\x00\x7F\n").query).to eq "%00%7F%0A"
end

it "percent-encodes space in query" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/?a b").query).to eq "a%20b"
end

it "percent-encodes non-ASCII characters in query" do
expect(HTTP::URI::NORMALIZER.call("http://example.com?キョ").query).to eq "%E3%82%AD%E3%83%A7"
end

it "does not percent-encode non-special characters in query" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/?~.-_!$&()*,;=:@{}?").query).to eq "~.-_!$&()*,;=:@{}?"
end

it "preserves escape sequences in query" do
expect(HTTP::URI::NORMALIZER.call("http://example.com/?%41").query).to eq "%41"
end
end

it "knows URI schemes" do
expect(http_uri.scheme).to eq "http"
expect(https_uri.scheme).to eq "https"
Expand Down

0 comments on commit e9a1542

Please sign in to comment.