From a461d93148b7568a35b97dd7be0de993f36c0382 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 15 Aug 2019 17:02:08 +0300 Subject: [PATCH] Various improvements Removed ------- * `rack` as dependency * Unused `config.ru` Changed ------- * Default path for cache is now in `./tmp` directory instead of `./config` Added ----- * Support of Rails 6 Development ----------- * [EditorConfig](https://editorconfig.org/) added * [RuboCop](https://github.com/rubocop-hq/rubocop) added and offenses fixed * RuboCop task added for Travis CI * Drop support for Ruby <= 2.2 (they even will not get security patches) * Ignore built gems for git --- .editorconfig | 9 +++ .gitignore | 1 + .rubocop.yml | 14 ++++ .ruby-version | 1 - .travis.yml | 16 +++- Gemfile | 2 + README.md | 23 +++--- config.ru | 7 -- lib/tasks/voight_kampff.rake | 10 ++- lib/voight_kampff.rb | 12 ++- lib/voight_kampff/engine.rb | 5 +- lib/voight_kampff/methods.rb | 19 +++-- lib/voight_kampff/rack_request.rb | 6 +- lib/voight_kampff/test.rb | 77 +++++++++++-------- .../controllers/replicants_controller_spec.rb | 5 +- .../app/controllers/replicants_controller.rb | 13 ++-- spec/internal/config/routes.rb | 2 + spec/lib/voight_kampff/rack_request_spec.rb | 8 +- spec/lib/voight_kampff/test_spec.rb | 26 +++++-- spec/lib/voight_kampff_spec.rb | 6 +- spec/spec_helper.rb | 7 +- spec/support/humans.rb | 4 +- spec/support/replicants.rb | 4 +- {config => tmp}/crawler-user-agents.json | 0 voight_kampff.gemspec | 31 ++++---- 25 files changed, 197 insertions(+), 111 deletions(-) create mode 100644 .editorconfig create mode 100644 .rubocop.yml delete mode 100644 .ruby-version delete mode 100644 config.ru rename {config => tmp}/crawler-user-agents.json (100%) diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..c6c8b36 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*] +indent_style = space +indent_size = 2 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/.gitignore b/.gitignore index 76f634c..8d18ff7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ Gemfile.lock *.rbc coverage doc +*.gem diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..f9e522f --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,14 @@ +AllCops: + TargetRubyVersion: 2.4 + NewCops: enable + +Layout/MultilineMethodCallIndentation: + EnforcedStyle: indented + +Metrics/BlockLength: + Exclude: + - spec/**/* + +Metrics/LineLength: + Exclude: + - spec/support/* diff --git a/.ruby-version b/.ruby-version deleted file mode 100644 index 57cf282..0000000 --- a/.ruby-version +++ /dev/null @@ -1 +0,0 @@ -2.6.5 diff --git a/.travis.yml b/.travis.yml index caec4a4..a1a6031 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,17 @@ language: ruby + rvm: - - 2.4.9 - - 2.5.7 - - 2.6.5 -script: bundle exec rspec + - 2.4 + - 2.5 + - 2.6 + before_install: + - gem update --system # fixes Travis CI error: NoMethodError: undefined method `spec' for nil:NilClass - gem install bundler + +cache: bundler + +script: + - bundle exec rspec + - bundle exec rubocop diff --git a/Gemfile b/Gemfile index fa75df1..7f4f5e9 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source 'https://rubygems.org' gemspec diff --git a/README.md b/README.md index 2785240..1e62d15 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Configuration A JSON file is used to match [user agent strings](http://simplyfast.info/browser) to a list of known bots. -If you'd like to use an [updated list](https://github.com/monperrus/crawler-user-agents) or make your own customizations, run `rake voight_kampff:import_user_agents`. This will download a `crawler-user-agents.json` file into the `./config` directory. +If you'd like to use an [updated list](https://github.com/monperrus/crawler-user-agents) or make your own customizations, run `rake voight_kampff:import_user_agents`. This will download a `crawler-user-agents.json` file into the `./tmp` directory. __Note:__ The pattern entries in the JSON file are evaluated as [regular expressions](http://en.wikipedia.org/wiki/Regular_expression). @@ -23,21 +23,24 @@ Usage ----- There are three ways to use Voight-Kampff -1. Through Rack::Request such as in your [Ruby on Rails](http://rubyonrails.org) controllers: - `request.bot?` +1. Through `Rack::Request` in your app such as [Ruby on Rails](http://rubyonrails.org): + ```ruby + request.bot? + ``` -2. Through the `VoightKampff` module: +2. Through the `VoightKampff` module: `VoightKampff.bot? 'your user agent string'` -3. Through a `VoightKampff::Test` instance: +3. Through a `VoightKampff::Test` instance: `VoightKampff::Test.new('your user agent string').bot?` -All of the above examples accept `human?` and `bot?` methods. All of these methods will return `true` or `false`. +All of the above examples accept `human?` and `bot?` methods. +All of these methods will return `true` or `false`. Upgrading to version 1.0 ------------------------ -Version 1.0 uses a new source for a list of bot user agent strings since the old source was no longer maintained. This new source, unfortuately, does not include as much detail. Therefore the following methods have been deprecated: +Version 1.0 uses a new source for a list of bot user agent strings since the old source was no longer maintained. This new source, unfortunately, does not include as much detail. Therefore the following methods have been deprecated: - `#browser?` - `#checker?` - `#downloader?` @@ -51,10 +54,10 @@ Also, the gem no longer extends `ActionDispatch::Request` instead it extends `Ra FAQ --- -__Q:__ __What's with the name?__ +__Q:__ __What's with the name?__ __A:__ It's the [machine in Blade Runner](https://en.wikipedia.org/wiki/Blade_Runner#Voight-Kampff_machine) that is used to test whether someone is a human or a replicant. -__Q:__ __I've found a bot that isn't being matched__ +__Q:__ __I've found a bot that isn't being matched__ __A:__ The list is being pulled from [github.com/monperrus/crawler-user-agents](https://github.com/monperrus/crawler-user-agents). If you'd like to have entries added to the list, please create a pull request with that project. Once that pull request is merged, feel free to create an issue here and I'll release a new gem version with the updated list. In the meantime you can always run `rake voight_kampff:import_user_agents` on your project to get that updated list. @@ -67,7 +70,7 @@ Thanks to [github.com/monperrus/crawler-user-agents](https://github.com/monperru Contributing ------------ -PR without tests will not get merged, Make sure you write tests for api and rails app. +PR without tests will not get merged, Make sure you write tests for API and Rails app. Feel free to ask for help, if you do not know how to write a determined test. Running Tests? diff --git a/config.ru b/config.ru deleted file mode 100644 index d49744b..0000000 --- a/config.ru +++ /dev/null @@ -1,7 +0,0 @@ -require 'rubygems' -require 'bundler' - -Bundler.require :default, :development - -Combustion.initialize! :action_controller -run Combustion::Application diff --git a/lib/tasks/voight_kampff.rake b/lib/tasks/voight_kampff.rake index e7d6836..520fa23 100644 --- a/lib/tasks/voight_kampff.rake +++ b/lib/tasks/voight_kampff.rake @@ -1,6 +1,8 @@ +# frozen_string_literal: true + namespace :voight_kampff do desc 'Import a new crawler-user-agents.json file' - task :import_user_agents, :url do |t, args| + task :import_user_agents, :url do |_t, args| args.with_defaults url: 'https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json' require 'net/http' @@ -9,8 +11,10 @@ namespace :voight_kampff do contents = Net::HTTP.get(uri) if contents.present? - file = File.open('./config/crawler-user-agents.json', 'w') - file.write(contents.force_encoding(Encoding::UTF_8)) + File.write( + './tmp/crawler-user-agents.json', + contents.force_encoding(Encoding::UTF_8) + ) else puts "voight_kampff:import_user_agents - empty file received from #{uri}" end diff --git a/lib/voight_kampff.rb b/lib/voight_kampff.rb index 1446322..9678a2e 100644 --- a/lib/voight_kampff.rb +++ b/lib/voight_kampff.rb @@ -1,17 +1,15 @@ -require 'json' +# frozen_string_literal: true require 'voight_kampff/test' require 'voight_kampff/methods' require 'voight_kampff/rack_request' if defined?(Rack::Request) require 'voight_kampff/engine' if defined?(Rails::Engine) +# Class helper methods module VoightKampff - class << self - def root - require 'pathname' - Pathname.new File.expand_path '..', File.dirname(__FILE__) - end + ROOT = File.expand_path '..', __dir__ + class << self def human?(user_agent_string) test(user_agent_string).human? end @@ -19,7 +17,7 @@ def human?(user_agent_string) def bot?(user_agent_string) test(user_agent_string).bot? end - alias :replicant? :bot? + alias replicant? bot? private diff --git a/lib/voight_kampff/engine.rb b/lib/voight_kampff/engine.rb index 6f8f96f..1bd2402 100644 --- a/lib/voight_kampff/engine.rb +++ b/lib/voight_kampff/engine.rb @@ -1,10 +1,13 @@ +# frozen_string_literal: true + module VoightKampff + # Integration with Rails class Engine < Rails::Engine rake_tasks do load 'tasks/voight_kampff.rake' end - initializer :add_voight_kampff_methods do |app| + initializer :add_voight_kampff_methods do |_app| ActionDispatch::Request.class_eval do include VoightKampff::Methods end diff --git a/lib/voight_kampff/methods.rb b/lib/voight_kampff/methods.rb index 67e4e0e..c932b24 100644 --- a/lib/voight_kampff/methods.rb +++ b/lib/voight_kampff/methods.rb @@ -1,10 +1,15 @@ -module VoightKampff::Methods - def human? - VoightKampff::Test.new(user_agent).human? - end +# frozen_string_literal: true + +module VoightKampff + # Helper for Rack::Request + module Methods + extend Forwardable + def_delegators :voight_kampff, :human?, :bot?, :replicant? + + private - def bot? - VoightKampff::Test.new(user_agent).bot? + def voight_kampff + VoightKampff::Test.new(user_agent) + end end - alias :replicant? :bot? end diff --git a/lib/voight_kampff/rack_request.rb b/lib/voight_kampff/rack_request.rb index a7e1e00..26f42fc 100644 --- a/lib/voight_kampff/rack_request.rb +++ b/lib/voight_kampff/rack_request.rb @@ -1,4 +1,4 @@ +# frozen_string_literal: true + # Reopen the Rack::Request class to add bot detection methods -Rack::Request.class_eval do - include VoightKampff::Methods -end +Rack::Request.include VoightKampff::Methods if defined?(Rack::Request) diff --git a/lib/voight_kampff/test.rb b/lib/voight_kampff/test.rb index 5897098..44ef505 100644 --- a/lib/voight_kampff/test.rb +++ b/lib/voight_kampff/test.rb @@ -1,7 +1,48 @@ +# frozen_string_literal: true + module VoightKampff + # Test User-Agent against Voight-Kampff class Test CRAWLERS_FILENAME = 'crawler-user-agents.json' + class << self + def crawlers + @crawlers ||= JSON.parse File.read preferred_path + end + + def crawler_regexp + @crawler_regexp ||= begin + # NOTE: This is admittedly a bit convoluted + # but the performance gains make it worthwhile + crawler_patterns = + crawlers.map.with_index do |crawler, index| + "(?#{crawler['pattern']})" + end.join('|') + crawler_patterns = "(#{crawler_patterns})" + Regexp.new(crawler_patterns, Regexp::IGNORECASE) + end + end + + def lookup_paths + # These paths should be orderd by priority + [ + (Rails.root if defined? Rails), + Dir.pwd, + VoightKampff::ROOT + ] + end + + def preferred_path + lookup_paths.find do |base_path| + next unless base_path + + path = File.join base_path, 'tmp', CRAWLERS_FILENAME + + break path if File.exist? path + end + end + end + attr_accessor :user_agent_string def initialize(user_agent_string) @@ -19,42 +60,16 @@ def human? def bot? !human? end - alias :replicant? :bot? + alias replicant? bot? private - def lookup_paths - # These paths should be orderd by priority - base_paths = [] - base_paths << Rails.root if defined? Rails - base_paths << VoightKampff.root - - base_paths.map { |p| p.join('config', CRAWLERS_FILENAME) } - end - - def preferred_path - lookup_paths.find { |path| File.exists? path } - end - def matching_crawler - if match = crawler_regexp.match(@user_agent_string) - index = match.names.first.sub(/match/, '').to_i - crawlers[index] - end - end - - def crawler_regexp - @@crawler_regexp ||= begin - # NOTE: This is admittedly a bit convoluted but the performance gains make it worthwhile - index = -1 - crawler_patterns = crawlers.map{|c| index += 1; "(?#{c["pattern"]})" }.join("|") - crawler_patterns = "(#{crawler_patterns})" - Regexp.new(crawler_patterns, Regexp::IGNORECASE) - end - end + match = self.class.crawler_regexp.match(@user_agent_string) + return unless match - def crawlers - @@crawlers ||= JSON.load(File.open(preferred_path, 'r')) + index = match.names.first.sub(/match/, '').to_i + self.class.crawlers[index] end end end diff --git a/spec/controllers/replicants_controller_spec.rb b/spec/controllers/replicants_controller_spec.rb index 502df1d..8524b2b 100644 --- a/spec/controllers/replicants_controller_spec.rb +++ b/spec/controllers/replicants_controller_spec.rb @@ -1,9 +1,12 @@ +# frozen_string_literal: true + require 'spec_helper' describe ReplicantsController, type: :controller do let(:user_agent_string) { '' } before do - expect_any_instance_of(ActionController::TestRequest).to receive(:user_agent).and_return user_agent_string + expect_any_instance_of(ActionController::TestRequest) + .to receive(:user_agent).and_return user_agent_string get :index end diff --git a/spec/internal/app/controllers/replicants_controller.rb b/spec/internal/app/controllers/replicants_controller.rb index 5da67eb..93b5975 100644 --- a/spec/internal/app/controllers/replicants_controller.rb +++ b/spec/internal/app/controllers/replicants_controller.rb @@ -1,12 +1,15 @@ +# frozen_string_literal: true + class ReplicantsController < ActionController::Base def index header = "Replicants:\n===========\n" - status, content = if request.bot? - [200, '- Rick Deckard'] - else - [403, 'No replicants here'] - end + status, content = + if request.bot? + [200, '- Rick Deckard'] + else + [403, 'No replicants here'] + end render plain: header + content, status: status end diff --git a/spec/internal/config/routes.rb b/spec/internal/config/routes.rb index ea33d34..c1869af 100644 --- a/spec/internal/config/routes.rb +++ b/spec/internal/config/routes.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + Rails.application.routes.draw do resources :replicants, only: :index root to: 'replicants#index' diff --git a/spec/lib/voight_kampff/rack_request_spec.rb b/spec/lib/voight_kampff/rack_request_spec.rb index 73174f4..0978cd5 100644 --- a/spec/lib/voight_kampff/rack_request_spec.rb +++ b/spec/lib/voight_kampff/rack_request_spec.rb @@ -1,10 +1,14 @@ +# frozen_string_literal: true + require 'spec_helper' describe Rack::Request do - let(:user_agent_string) { } - let(:env) { {'HTTP_USER_AGENT' => user_agent_string} } + let(:user_agent_string) {} + let(:env) { { 'HTTP_USER_AGENT' => user_agent_string } } subject { Rack::Request.new(env) } + require_relative '../../../lib/voight_kampff/rack_request' + it { expect(subject).to respond_to :human? } it { expect(subject).to respond_to :bot? } it { expect(subject).to respond_to :replicant? } diff --git a/spec/lib/voight_kampff/test_spec.rb b/spec/lib/voight_kampff/test_spec.rb index e1e9d63..6b0916d 100644 --- a/spec/lib/voight_kampff/test_spec.rb +++ b/spec/lib/voight_kampff/test_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' describe VoightKampff::Test do @@ -27,14 +29,22 @@ end context 'after the first run' do - before { VoightKampff::Test.new('anything').bot? } - - it 'is fast' do - expect( - Benchmark.realtime do - 20.times { VoightKampff::Test.new('anything').bot? } - end - ).to be < 0.003 + def time_of_run + Benchmark.realtime { VoightKampff::Test.new('anything').bot? } + end + + before do + VoightKampff::Test.instance_variable_set(:@crawler_regexp, nil) + end + + let(:number_of_runs) { 20 } + + times_faster = 2 + + it "is at least #{times_faster} times faster" do + expect(time_of_run / times_faster).to be > ( + (1..number_of_runs).map { time_of_run }.sum / number_of_runs + ) end end end diff --git a/spec/lib/voight_kampff_spec.rb b/spec/lib/voight_kampff_spec.rb index 0638348..721e862 100644 --- a/spec/lib/voight_kampff_spec.rb +++ b/spec/lib/voight_kampff_spec.rb @@ -1,9 +1,11 @@ +# frozen_string_literal: true + require 'spec_helper' describe VoightKampff do subject { VoightKampff } - HUMANS.each do |name, ua_string| + HUMANS.each do |_name, ua_string| context "when user agent is #{ua_string}" do let(:user_agent_string) { ua_string } @@ -14,7 +16,7 @@ end end - REPLICANTS.each do |name, ua_string| + REPLICANTS.each do |_name, ua_string| context "when user agent is #{ua_string}" do let(:user_agent_string) { ua_string } diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 9140cfa..aeed5a5 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,11 +1,14 @@ +# frozen_string_literal: true + require 'bundler/setup' +require 'pry-byebug' require 'combustion' -require 'voight_kampff' +require_relative '../lib/voight_kampff' Combustion.initialize! :action_controller require 'rspec/rails' -Dir['./spec/support/**/*.rb'].each { |f| require f } +Dir['./spec/support/**/*.rb'].sort.each { |f| require f } RSpec.configure do |config| end diff --git a/spec/support/humans.rb b/spec/support/humans.rb index c2e5654..31d43bf 100644 --- a/spec/support/humans.rb +++ b/spec/support/humans.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + HUMANS = { 'Unknown' => nil, # for the moment we're treating a blank user agent string as not a bot 'Chrome' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', @@ -6,4 +8,4 @@ 'Internet Explorer' => 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', 'Chrome Mobile' => 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19', 'Safari for iOS' => 'Mozilla/5.0 (iPad; CPU OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53' -} +}.freeze diff --git a/spec/support/replicants.rb b/spec/support/replicants.rb index 3885d4d..1df2b74 100644 --- a/spec/support/replicants.rb +++ b/spec/support/replicants.rb @@ -1,5 +1,7 @@ +# frozen_string_literal: true + REPLICANTS = { 'Googlebot' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Bingbot' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'Yahoo! Slurp' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)' -} +}.freeze diff --git a/config/crawler-user-agents.json b/tmp/crawler-user-agents.json similarity index 100% rename from config/crawler-user-agents.json rename to tmp/crawler-user-agents.json diff --git a/voight_kampff.gemspec b/voight_kampff.gemspec index 342dc67..801a911 100644 --- a/voight_kampff.gemspec +++ b/voight_kampff.gemspec @@ -1,29 +1,30 @@ -# -*- encoding: utf-8 -*- -$:.unshift File.expand_path('../lib', __FILE__) -require 'voight_kampff/version' +# frozen_string_literal: true + +require_relative 'lib/voight_kampff/version' Gem::Specification.new do |s| s.name = 'voight_kampff' - s.summary = "Voight-Kampff bot detection" + s.summary = 'Voight-Kampff bot detection' s.description = 'Voight-Kampff detects bots, spiders, crawlers and replicants' - s.licenses = ['MIT'] + s.licenses = ['MIT'] - s.author = "Adam Crownoble" - s.email = "adam@codenoble.com" - s.homepage = "https://github.com/biola/Voight-Kampff" + s.author = 'Adam Crownoble' + s.email = 'adam@codenoble.com' + s.homepage = 'https://github.com/biola/Voight-Kampff' - # so that rubygems does not uses the actual object - s.version = VoightKampff::VERSION.dup - s.platform = Gem::Platform::RUBY.dup + s.version = VoightKampff::VERSION s.files = `git ls-files`.split("\n") s.files.reject! { |fn| fn.match(/\.travis.yml/) } - s.test_files = `git ls-files -- {tests}/**/*`.split("\n") + s.test_files = `git ls-files -- spec/**/*`.split("\n") s.require_path = 'lib' - s.add_dependency 'rack', ['>= 1.4', '< 3.0'] + s.required_ruby_version = '>= 2.4' s.add_development_dependency 'combustion', '~> 1.1' - s.add_development_dependency 'rails', '~> 5.2' - s.add_development_dependency 'rspec-rails', '~> 3.8' + s.add_development_dependency 'pry-byebug', '~> 3.7' + s.add_development_dependency 'rack', ['>= 1.4', '< 3.0'] + s.add_development_dependency 'rails', '>= 5.2', '< 7' + s.add_development_dependency 'rspec-rails', '~> 5.0' + s.add_development_dependency 'rubocop', '~> 0.89.0' end