From 7b4464b6a235997b7084d41701a673551fc2cf48 Mon Sep 17 00:00:00 2001 From: marocchino Date: Thu, 7 Jan 2021 06:52:10 +0900 Subject: [PATCH] Use webrick's escape instead of encode_www_form_component MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` irb(main):001:0> require 'webrick' => true irb(main):002:0> URI.parse(URI.encode_www_form_component("http://example.com/path?query=あああ")) => # irb(main):003:0> URI.parse(WEBrick::HTTPUtils.escape("http://example.com/path?query=あああ")) => # ``` --- .gitlab-ci.yml | 4 ++++ lib/validate_website/crawl.rb | 2 +- lib/validate_website/static_link.rb | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1aca237..d3ca86e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -33,6 +33,10 @@ test:2.7: extends: .tests image: 'ruby:2.7' +test:3.0: + extends: .tests + image: 'ruby:3.0' + test:jruby: extends: .tests image: 'jruby:9.2.12-jre' diff --git a/lib/validate_website/crawl.rb b/lib/validate_website/crawl.rb index c015109..0cff182 100644 --- a/lib/validate_website/crawl.rb +++ b/lib/validate_website/crawl.rb @@ -46,7 +46,7 @@ def extract_imgs_from_page(page) page.doc.search('//img[@src]').reduce(Set[]) do |result, elem| u = elem.attributes['src'].content - result << page.to_absolute(URI.parse(URI.encode_www_form_component(u))) + result << page.to_absolute(URI.parse(WEBrick::HTTPUtils.escape(u))) end end diff --git a/lib/validate_website/static_link.rb b/lib/validate_website/static_link.rb index a3399f7..5dfe259 100644 --- a/lib/validate_website/static_link.rb +++ b/lib/validate_website/static_link.rb @@ -8,7 +8,7 @@ # rubocop:disable Metrics/BlockLength StaticLink = Struct.new(:link, :site) do def link_uri - @link_uri = URI.parse(URI.encode_www_form_component(link)) + @link_uri = URI.parse(WEBrick::HTTPUtils.escape(link)) @link_uri = URI.join(site, @link_uri) if @link_uri.host.nil? @link_uri end