Skip to content

Commit

Permalink
Cache header normalization to reduce object allocation (#789)
Browse files Browse the repository at this point in the history
Co-authored-by: Alexey Zapparov <[email protected]>
  • Loading branch information
alexcwatt and ixti authored Aug 26, 2024
1 parent b74b16c commit 470203f
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 70 deletions.
4 changes: 4 additions & 0 deletions .rubocop/rspec.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
RSpec/ExampleLength:
CountAsOne:
- array
- hash
- heredoc
- method_call

RSpec/MultipleExpectations:
Max: 5
24 changes: 0 additions & 24 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -267,30 +267,6 @@ RSpec/InstanceVariable:
RSpec/MessageSpies:
EnforcedStyle: receive

# Offense count: 74
# Configuration parameters: Max.
RSpec/MultipleExpectations:
Exclude:
- 'spec/lib/http/client_spec.rb'
- 'spec/lib/http/connection_spec.rb'
- 'spec/lib/http/features/auto_deflate_spec.rb'
- 'spec/lib/http/headers_spec.rb'
- 'spec/lib/http/options/body_spec.rb'
- 'spec/lib/http/options/features_spec.rb'
- 'spec/lib/http/options/form_spec.rb'
- 'spec/lib/http/options/headers_spec.rb'
- 'spec/lib/http/options/json_spec.rb'
- 'spec/lib/http/options/merge_spec.rb'
- 'spec/lib/http/options/proxy_spec.rb'
- 'spec/lib/http/redirector_spec.rb'
- 'spec/lib/http/response/body_spec.rb'
- 'spec/lib/http/response/parser_spec.rb'
- 'spec/lib/http/retriable/delay_calculator_spec.rb'
- 'spec/lib/http/retriable/performer_spec.rb'
- 'spec/lib/http/uri_spec.rb'
- 'spec/lib/http_spec.rb'
- 'spec/support/http_handling_shared.rb'

# Offense count: 9
# Configuration parameters: AllowSubject, Max.
RSpec/MultipleMemoizedHelpers:
Expand Down
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ group :test do

gem "rspec", "~> 3.10"
gem "rspec-its"
gem "rspec-memory"

gem "yardstick"
end
Expand Down
67 changes: 27 additions & 40 deletions lib/http/headers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

require "http/errors"
require "http/headers/mixin"
require "http/headers/normalizer"
require "http/headers/known"

module HTTP
Expand All @@ -12,12 +13,32 @@ class Headers
extend Forwardable
include Enumerable

# Matches HTTP header names when in "Canonical-Http-Format"
CANONICAL_NAME_RE = /\A[A-Z][a-z]*(?:-[A-Z][a-z]*)*\z/
class << self
# Coerces given `object` into Headers.
#
# @raise [Error] if object can't be coerced
# @param [#to_hash, #to_h, #to_a] object
# @return [Headers]
def coerce(object)
unless object.is_a? self
object = case
when object.respond_to?(:to_hash) then object.to_hash
when object.respond_to?(:to_h) then object.to_h
when object.respond_to?(:to_a) then object.to_a
else raise Error, "Can't coerce #{object.inspect} to Headers"
end
end

headers = new
object.each { |k, v| headers.add k, v }
headers
end
alias [] coerce

# Matches valid header field name according to RFC.
# @see http://tools.ietf.org/html/rfc7230#section-3.2
COMPLIANT_NAME_RE = /\A[A-Za-z0-9!#$%&'*+\-.^_`|~]+\z/
def normalizer
@normalizer ||= Headers::Normalizer.new
end
end

# Class constructor.
def initialize
Expand Down Expand Up @@ -194,45 +215,11 @@ def merge(other)
dup.tap { |dupped| dupped.merge! other }
end

class << self
# Coerces given `object` into Headers.
#
# @raise [Error] if object can't be coerced
# @param [#to_hash, #to_h, #to_a] object
# @return [Headers]
def coerce(object)
unless object.is_a? self
object = case
when object.respond_to?(:to_hash) then object.to_hash
when object.respond_to?(:to_h) then object.to_h
when object.respond_to?(:to_a) then object.to_a
else raise Error, "Can't coerce #{object.inspect} to Headers"
end
end

headers = new
object.each { |k, v| headers.add k, v }
headers
end
alias [] coerce
end

private

# Transforms `name` to canonical HTTP header capitalization
#
# @param [String] name
# @raise [HeaderError] if normalized name does not
# match {HEADER_NAME_RE}
# @return [String] canonical HTTP header name
def normalize_header(name)
return name if CANONICAL_NAME_RE.match?(name)

normalized = name.split(/[\-_]/).each(&:capitalize!).join("-")

return normalized if COMPLIANT_NAME_RE.match?(normalized)

raise HeaderError, "Invalid HTTP header field name: #{name.inspect}"
self.class.normalizer.call(name)
end

# Ensures there is no new line character in the header value
Expand Down
69 changes: 69 additions & 0 deletions lib/http/headers/normalizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# frozen_string_literal: true

module HTTP
class Headers
class Normalizer
# Matches HTTP header names when in "Canonical-Http-Format"
CANONICAL_NAME_RE = /\A[A-Z][a-z]*(?:-[A-Z][a-z]*)*\z/

# Matches valid header field name according to RFC.
# @see http://tools.ietf.org/html/rfc7230#section-3.2
COMPLIANT_NAME_RE = /\A[A-Za-z0-9!#$%&'*+\-.^_`|~]+\z/

NAME_PARTS_SEPARATOR_RE = /[\-_]/

# @private
# Normalized header names cache
class Cache
MAX_SIZE = 200

def initialize
@store = {}
end

def get(key)
@store[key]
end
alias [] get

def set(key, value)
# Maintain cache size
@store.delete(@store.each_key.first) while MAX_SIZE <= @store.size

@store[key] = value
end
alias []= set
end

def initialize
@cache = Cache.new
end

# Transforms `name` to canonical HTTP header capitalization
def call(name)
name = -name.to_s
value = (@cache[name] ||= -normalize_header(name))

value.dup
end

private

# Transforms `name` to canonical HTTP header capitalization
#
# @param [String] name
# @raise [HeaderError] if normalized name does not
# match {COMPLIANT_NAME_RE}
# @return [String] canonical HTTP header name
def normalize_header(name)
return name if CANONICAL_NAME_RE.match?(name)

normalized = name.split(NAME_PARTS_SEPARATOR_RE).each(&:capitalize!).join("-")

return normalized if COMPLIANT_NAME_RE.match?(normalized)

raise HeaderError, "Invalid HTTP header field name: #{name.inspect}"
end
end
end
end
52 changes: 52 additions & 0 deletions spec/lib/http/headers/normalizer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# frozen_string_literal: true

RSpec.describe HTTP::Headers::Normalizer do
subject(:normalizer) { described_class.new }

include_context RSpec::Memory

describe "#call" do
it "normalizes the header" do
expect(normalizer.call("content_type")).to eq "Content-Type"
end

it "returns a non-frozen string" do
expect(normalizer.call("content_type")).not_to be_frozen
end

it "evicts the oldest item when cache is full" do
max_headers = (1..described_class::Cache::MAX_SIZE).map { |i| "Header#{i}" }
max_headers.each { |header| normalizer.call(header) }
normalizer.call("New-Header")
cache_store = normalizer.instance_variable_get(:@cache).instance_variable_get(:@store)
expect(cache_store.keys).to eq(max_headers[1..] + ["New-Header"])
end

it "retuns mutable strings" do
normalized_headers = Array.new(3) { normalizer.call("content_type") }

expect(normalized_headers)
.to satisfy { |arr| arr.uniq.size == 1 }
.and(satisfy { |arr| arr.map(&:object_id).uniq.size == normalized_headers.size })
.and(satisfy { |arr| arr.none?(&:frozen?) })
end

it "allocates minimal memory for normalization of the same header" do
normalizer.call("accept") # XXX: Ensure normalizer is pre-allocated

# On first call it is expected to allocate during normalization
expect { normalizer.call("content_type") }.to limit_allocations(
Array => 1,
MatchData => 1,
String => 6
)

# On subsequent call it is expected to only allocate copy of a cached string
expect { normalizer.call("content_type") }.to limit_allocations(
Array => 0,
MatchData => 0,
String => 1
)
end
end
end
11 changes: 6 additions & 5 deletions spec/lib/http/redirector_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,13 @@ def redirect_response(status, location, set_cookie = {})
expect(req_cookie).to eq request_cookies.shift
hops.shift
end

expect(res.to_s).to eq "bar"
cookies = res.cookies.cookies.to_h { |c| [c.name, c.value] }
expect(cookies["foo"]).to eq "42"
expect(cookies["bar"]).to eq "53"
expect(cookies["baz"]).to eq "65"
expect(cookies["deleted"]).to eq nil
expect(res.cookies.cookies.to_h { |c| [c.name, c.value] }).to eq({
"foo" => "42",
"bar" => "53",
"baz" => "65"
})
end

it "returns original cookies in response" do
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/http/retriable/performer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def response(**options)
end

describe "should_retry option" do
it "decides if the request should be retried" do
it "decides if the request should be retried" do # rubocop:disable RSpec/MultipleExpectations
retry_proc = proc do |req, err, res, attempt|
expect(req).to eq request
if res
Expand Down
1 change: 1 addition & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

require "http"
require "rspec/its"
require "rspec/memory"
require "support/capture_warning"
require "support/fakeio"

Expand Down

0 comments on commit 470203f

Please sign in to comment.