From 165ba8081fc0fb816f315d359553b32c85b2a910 Mon Sep 17 00:00:00 2001 From: takundao71 Date: Sat, 3 Aug 2019 19:41:17 +0900 Subject: [PATCH] =?UTF-8?q?proxy=E3=81=AE=E8=AA=8D=E8=A8=BC=E5=87=A6?= =?UTF-8?q?=E7=90=86=E3=82=92=E4=BB=A5=E4=B8=8B=E3=81=AEPR=E3=81=8B?= =?UTF-8?q?=E3=82=89=E5=8F=96=E5=BE=97=20https://github.com/chriskite/anem?= =?UTF-8?q?one/pull/93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/anemone/core.rb | 10 +++++++--- lib/anemone/http.rb | 46 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/lib/anemone/core.rb b/lib/anemone/core.rb index 33c844ec..e966912b 100644 --- a/lib/anemone/core.rb +++ b/lib/anemone/core.rb @@ -50,10 +50,14 @@ class Core :accept_cookies => false, # skip any link with a query string? e.g. http://foo.com/?u=user :skip_query_strings => false, - # proxy server hostname + # proxy server hostname :proxy_host => nil, # proxy server port number :proxy_port => false, + # proxy server username + :proxy_user => nil, + # proxy server password + :proxy_pass => nil, # HTTP read timeout in seconds :read_timeout => nil, # Crawl subdomains? @@ -293,7 +297,7 @@ def too_deep?(from_page) false end end - + # # Returns +true+ if *link* should not be visited because # it has a query string and +skip_query_strings+ is true. @@ -311,4 +315,4 @@ def skip_link?(link) end end -end +end \ No newline at end of file diff --git a/lib/anemone/http.rb b/lib/anemone/http.rb index 3e6a298f..62f3b3c7 100644 --- a/lib/anemone/http.rb +++ b/lib/anemone/http.rb @@ -56,7 +56,7 @@ def fetch_pages(url, referer = nil, depth = nil) # The maximum number of redirects to follow # def redirect_limit - @opts[:redirect_limit] || REDIRECT_LIMIT + @opts["redirect_limit"] || REDIRECT_LIMIT end # @@ -64,35 +64,48 @@ def redirect_limit # or nil if no such option is set # def user_agent - @opts[:user_agent] + @opts["user_agent"] end # # Does this HTTP client accept cookies from the server? # def accept_cookies? - @opts[:accept_cookies] + @opts["accept_cookies"] end # # The proxy address string # def proxy_host - @opts[:proxy_host] + @opts["proxy_host"] end # # The proxy port # def proxy_port - @opts[:proxy_port] + @opts["proxy_port"] + end + + # + # The proxy username + # + def proxy_user + @opts["proxy_user"] + end + # + # The proxy password + # + def proxy_pass + @opts["proxy_pass"] end # # HTTP read timeout in seconds # def read_timeout - @opts[:read_timeout] + @opts["read_timeout"] end private @@ -132,11 +145,24 @@ def get_response(url, referer = nil) retries = 0 begin start = Time.now() + # + # proxy with authentication + proxy = Net::HTTP::Proxy(proxy_host, proxy_port, proxy_user, proxy_pass) unless (proxy_user.blank? || proxy_pass.blank?) + # # format request req = Net::HTTP::Get.new(full_path, opts) + # # HTTP Basic authentication req.basic_auth url.user, url.password if url.user - response = connection(url).request(req) + + if proxy.present? + response = proxy.start(url.host,url.port, :use_ssl => url.scheme == 'https') do |http| + http.request(req) + end + else + response = connection(url).request(req) + end + finish = Time.now() response_time = ((finish - start) * 1000).round @cookie_store.merge!(response['Set-Cookie']) if accept_cookies? @@ -160,7 +186,7 @@ def connection(url) end def refresh_connection(url) - http = Net::HTTP.new(url.host, url.port, proxy_host, proxy_port) + http = Net::HTTP.new(url.host, url.port, proxy_host, proxy_port, proxy_user, proxy_pass) http.read_timeout = read_timeout if !!read_timeout @@ -169,7 +195,7 @@ def refresh_connection(url) http.verify_mode = OpenSSL::SSL::VERIFY_NONE end - @connections[url.host][url.port] = http.start + @connections[url.host][url.port] = http.start end def verbose? @@ -184,4 +210,4 @@ def allowed?(to_url, from_url) end end -end +end \ No newline at end of file