Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply explicit chunked encoding #95

Merged
merged 13 commits into from
Feb 7, 2024
Merged
11 changes: 0 additions & 11 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,3 @@ source 'https://rubygems.org'

# Specify your gem's dependencies in zipline.gemspec
gemspec

group :development, :test do
gem 'rspec', '~> 3'
gem 'fog-aws'
gem 'activesupport'
gem 'actionpack'
gem 'aws-sdk-s3'
gem 'carrierwave'
gem 'paperclip'
gem 'rake'
end
37 changes: 34 additions & 3 deletions lib/zipline.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
require 'content_disposition'
require "zipline/version"
require 'zipline/version'
require 'zip_tricks'
require "zipline/zip_generator"
require 'zipline/zip_generator'
require 'zipline/chunked_body'
require 'zipline/tempfile_body'

# class MyController < ApplicationController
# include Zipline
Expand All @@ -18,7 +20,36 @@ def zipline(files, zipname = 'zipline.zip', **kwargs_for_new)
headers['Content-Type'] = Mime::Type.lookup_by_extension('zip').to_s
response.sending_file = true
response.cache_control[:public] ||= false
self.response_body = zip_generator

# Disables Rack::ETag if it is enabled (prevent buffering)
# see https://github.com/rack/rack/issues/1619#issuecomment-606315714
self.response.headers['Last-Modified'] = Time.now.httpdate

if request.get_header("HTTP_VERSION") == "HTTP/1.0"
# If HTTP/1.0 is used it is not possible to stream, and if that happens it usually will be
# unclear why buffering is happening. Some info in the log is the least one can do.
logger.warn { "The downstream HTTP proxy/LB insists on HTTP/1.0 protocol, ZIP response will be buffered." } if logger

# If we use Rack::ContentLength it would run through our ZIP block twice - once to calculate the content length
# of the response, and once - to serve. We can trade performance for disk space and buffer the response into a Tempfile
# since we are already buffering.
tempfile_body = TempfileBody.new(request.env, zip_generator)
headers["Content-Length"] = tempfile_body.size.to_s
headers["X-Zipline-Output"] = "buffered"
self.response_body = tempfile_body
else
# Disable buffering for both nginx and Google Load Balancer, see
# https://cloud.google.com/appengine/docs/flexible/how-requests-are-handled?tab=python#x-accel-buffering
response.headers["X-Accel-Buffering"] = "no"

# Make sure Rack::ContentLength does not try to compute a content length,
# and remove the one already set
headers.delete("Content-Length")

# and send out in chunked encoding
headers["Transfer-Encoding"] = "chunked"
headers["X-Zipline-Output"] = "streamed"
self.response_body = Chunked.new(zip_generator)
end
end
end
31 changes: 31 additions & 0 deletions lib/zipline/chunked_body.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module Zipline
# A body wrapper that emits chunked responses, creating valid
# "Transfer-Encoding: chunked" HTTP response body. This is copied from Rack::Chunked::Body,
# because Rack is not going to include that class after version 3.x
# Rails has a substitute class for this inside ActionController::Streaming,
# but that module is a private constant in the Rails codebase, and is thus
# considered "private" from the Rails standpoint. It is not that much code to
# carry, so we copy it into our code.
class Chunked
TERM = "\r\n"
TAIL = "0#{TERM}"

def initialize(body)
@body = body
end

# For each string yielded by the response body, yield
# the element in chunked encoding - and finish off with a terminator
def each
term = TERM
@body.each do |chunk|
size = chunk.bytesize
next if size == 0

yield [size.to_s(16), term, chunk.b, term].join
end
yield TAIL
yield term
end
end
end
52 changes: 52 additions & 0 deletions lib/zipline/tempfile_body.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
module Zipline
# Contains a file handle which can be closed once the response finishes sending.
# It supports `to_path` so that `Rack::Sendfile` can intercept it
class TempfileBody
TEMPFILE_NAME_PREFIX = "zipline-tf-body-"
attr_reader :tempfile

# @param body[#each] the enumerable that yields bytes, usually a `RackBody`.
# The `body` will be read in full immediately and closed.
def initialize(env, body)
@tempfile = Tempfile.new(TEMPFILE_NAME_PREFIX)
# Rack::TempfileReaper calls close! on tempfiles which get buffered
# We wil assume that it works fine with Rack::Sendfile (i.e. the path
# to the file getting served gets used before we unlink the tempfile)
env['rack.tempfiles'] ||= []
env['rack.tempfiles'] << @tempfile

@tempfile.binmode

body.each { |bytes| @tempfile << bytes }
body.close if body.respond_to?(:close)

@tempfile.flush
end

# Returns the size of the contained `Tempfile` so that a correct
# Content-Length header can be set
#
# @return [Integer]
def size
@tempfile.size
end

# Returns the path to the `Tempfile`, so that Rack::Sendfile can send this response
# using the downstream webserver
#
# @return [String]
def to_path
@tempfile.to_path
end

# Stream the file's contents if `Rack::Sendfile` isn't present.
#
# @return [void]
def each
@tempfile.rewind
while chunk = @tempfile.read(16384)
yield chunk
end
end
end
end
39 changes: 8 additions & 31 deletions lib/zipline/zip_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,18 @@
module Zipline
class ZipGenerator
# takes an array of pairs [[uploader, filename], ... ]
def initialize(files, **kwargs_for_new)
@files = files
@kwargs_for_new = kwargs_for_new
end

#this is supposed to be streamed!
def to_s
throw "stop!"
def initialize(files, **kwargs_for_streamer)
# Use RackBody as it has buffering built-in in zip_tricks 5.x+
@body = ZipTricks::RackBody.new(**kwargs_for_streamer) do |streamer|
files.each do |file, name, options = {}|
handle_file(streamer, file, name.to_s, options)
end
end
end

def each(&block)
return to_enum(:each) unless block_given?

fake_io_writer = ZipTricks::BlockWrite.new(&block)
# ZipTricks outputs lots of strings in rapid succession, and with
# servers it can be beneficial to avoid doing too many tiny writes so that
# the number of syscalls is minimized. See https://github.com/WeTransfer/zip_tricks/issues/78
# There is a built-in facility for this in ZipTricks which can be used to implement
# some cheap buffering here (it exists both in version 4 and version 5). The buffer is really
# tiny and roughly equal to the medium Linux socket buffer size (16 KB). Although output
# will be not so immediate with this buffering the overall performance will be better,
# especially with multiple clients being serviced at the same time.
# Note that the WriteBuffer writes the same, retained String object - but the contents
# of that object changes between calls. This should work fine with servers where the
# contents of the string gets written to a socket immediately before the execution inside
# the WriteBuffer resumes), but if the strings get retained somewhere - like in an Array -
# this might pose a problem. Unlikely that it will be an issue here though.
write_buffer_size = 16 * 1024
write_buffer = ZipTricks::WriteBuffer.new(fake_io_writer, write_buffer_size)
ZipTricks::Streamer.open(write_buffer, **@kwargs_for_new) do |streamer|
@files.each do |file, name, options = {}|
handle_file(streamer, file, name.to_s, options)
end
end
write_buffer.flush! # for any remaining writes
@body.each(&block)
end

def handle_file(streamer, file, name, options)
Expand Down
87 changes: 0 additions & 87 deletions spec/lib/zipline/zip_generator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,91 +185,4 @@ def create_filename
end
end
end

describe '.write_file' do
let(:file) { StringIO.new('passthrough') }

context 'when passing an ActiveStorage::Filename object as filename' do
let(:filename) { ActiveStorage::Filename.new('test') }

let(:generator) do
Zipline::ZipGenerator.new([[file, filename]])
end

it 'passes a string as filename to ZipTricks' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with('test')
generator.each { |_| 'Test' }
end
end
end

describe 'passing an options hash' do
let(:file) { StringIO.new('passthrough') }

context 'with optional arguments' do
let(:mtime) { 1.day.ago }
let(:generator) do
Zipline::ZipGenerator.new([[file, 'test', modification_time: mtime]])
end

it 'passes the options hash through handle_file' do
expect(generator).to receive(:handle_file)
.with(anything, anything, anything, { modification_time: mtime })
generator.each { |_| 'Test' }
end

it 'passes the options hash to ZipTricks as kwargs' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with(anything, modification_time: mtime)
generator.each { |_| 'Test' }
end
end

context 'without optional arguments' do
let(:generator) do
Zipline::ZipGenerator.new([[file, 'test']])
end

it 'passes the options hash through handle_file' do
expect(generator).to receive(:handle_file)
.with(anything, anything, anything, {})
generator.each { |_| 'Test' }
end

it 'passes the options hash to ZipTricks as kwargs' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with(anything)
generator.each { |_| 'Test' }
end
end

context 'with extra invalid options' do
let(:mtime) { 1.day.ago }
let(:generator) do
Zipline::ZipGenerator.new([[file, 'test', modification_time: mtime, extra: 'invalid']])
end

it 'passes the whole options hash through handle_file' do
expect(generator).to receive(:handle_file)
.with(anything, anything, anything, { modification_time: mtime, extra: 'invalid' })
generator.each { |_| 'Test' }
end

it 'only passes the kwargs to ZipTricks that it expects (i.e., :modification_time)' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with(anything, modification_time: mtime)
generator.each { |_| 'Test' }
end
end
end
it 'passes along constructor options to ZipTricks streamer' do
expect(ZipTricks::Streamer).to receive(:open).with(anything, { :some => 'options' })
generator = Zipline::ZipGenerator.new([file, 'somefile'], :some => 'options')
generator.each { |_| 'Test' }
end
end
43 changes: 24 additions & 19 deletions spec/lib/zipline/zipline_spec.rb
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
require 'spec_helper'
require 'ostruct'
require 'action_controller'

describe Zipline do
before { Fog.mock! }

let (:undertest) {
class TestZipline
class FakeController < ActionController::Base
include Zipline
def download_zip
files = [
[StringIO.new("File content goes here"), "one.txt"],
[StringIO.new("Some other content goes here"), "two.txt"]
]
zipline(files, 'myfiles.zip', auto_rename_duplicate_filenames: false)
end
end

attr_accessor :headers
attr_accessor :response
attr_accessor :response_body
def initialize
@headers = {}
@response = OpenStruct.new(:cache_control => {}, :headers => {} )
end
include Zipline
end
return TestZipline.new()
}
it 'passes keyword parameters to ZipTricks::Streamer' do
fake_rack_env = {
"HTTP_VERSION" => "HTTP/1.0",
"REQUEST_METHOD" => "GET",
"SCRIPT_NAME" => "",
"PATH_INFO" => "/download",
"QUERY_STRING" => "",
"SERVER_NAME" => "host.example",
"rack.input" => StringIO.new,
}
expect(ZipTricks::Streamer).to receive(:new).with(anything, auto_rename_duplicate_filenames: false).and_call_original

status, headers, body = FakeController.action(:download_zip).call(fake_rack_env)

it 'passes arguments along' do
expect(Zipline::ZipGenerator).to receive(:new)
.with(['some', 'fake', 'files'], { some: 'options' })
undertest.zipline(['some', 'fake', 'files'], 'myfiles.zip', some: 'options')
expect(undertest.headers['Content-Disposition']).to eq("attachment; filename=\"myfiles.zip\"; filename*=UTF-8''myfiles.zip")
expect(headers['Content-Disposition']).to eq("attachment; filename=\"myfiles.zip\"; filename*=UTF-8''myfiles.zip")
end
end
13 changes: 6 additions & 7 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@
require 'fog-aws'
require 'carrierwave'

Dir["#{File.expand_path('..', __FILE__)}/support/**/*.rb"].each { |f| require f }
Dir["#{File.expand_path('..', __FILE__)}/support/**/*.rb"].sort.each { |f| require f }

CarrierWave.configure do |config|
config.fog_provider = 'fog/aws'
config.fog_credentials = {
provider: 'AWS',
aws_access_key_id: 'dummy',
aws_secret_access_key: 'data',
region: 'us-west-2',
}

provider: 'AWS',
aws_access_key_id: 'dummy',
aws_secret_access_key: 'data',
region: 'us-west-2',
}
end

RSpec.configure do |config|
Expand Down
11 changes: 9 additions & 2 deletions zipline.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@ Gem::Specification.new do |gem|

gem.add_dependency 'actionpack', ['>= 6.0', '< 8.0']
gem.add_dependency 'content_disposition', '~> 1.0'
gem.add_dependency 'zip_tricks', ['>= 4.2.1', '< 6.0']
gem.add_dependency 'zip_tricks', ['~> 4.8', '< 6'] # Minimum to 4.8.3 which is the last-released MIT version

gem.add_development_dependency 'rspec', '~> 3'
gem.add_development_dependency 'fog-aws'
gem.add_development_dependency 'aws-sdk-s3'
gem.add_development_dependency 'carrierwave'
gem.add_development_dependency 'paperclip'
gem.add_development_dependency 'rake'

# https://github.com/rspec/rspec-mocks/issues/1457
gem.add_development_dependency 'rspec-mocks', '3.10.2'
gem.add_development_dependency 'rspec-mocks', '~> 3.12'
end
Loading