From 22ddef28321b7a1d678f5d0bedba68ed6c08a968 Mon Sep 17 00:00:00 2001 From: iris Date: Wed, 19 Feb 2025 14:19:39 +0300 Subject: [PATCH 1/5] benchmark --- .rspec | 1 + Gemfile | 4 ++ Gemfile.lock | 23 +++++++ app/services/trips_importer.rb | 40 ++++++++++++ case_study.md | 18 ++++++ lib/tasks/utils.rake | 35 ++--------- spec/fixtures/files/example.json | 32 ++++++++++ spec/rails_helper.rb | 68 ++++++++++++++++++++ spec/services/trips_importer_spec.rb | 50 +++++++++++++++ spec/spec_helper.rb | 94 ++++++++++++++++++++++++++++ 10 files changed, 336 insertions(+), 29 deletions(-) create mode 100644 .rspec create mode 100644 app/services/trips_importer.rb create mode 100644 case_study.md create mode 100644 spec/fixtures/files/example.json create mode 100644 spec/rails_helper.rb create mode 100644 spec/services/trips_importer_spec.rb create mode 100644 spec/spec_helper.rb diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..c99d2e73 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/Gemfile b/Gemfile index 34074dfd..e467ba32 100644 --- a/Gemfile +++ b/Gemfile @@ -12,3 +12,7 @@ gem 'rack-mini-profiler' # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] + +group :development, :test do + gem 'rspec-rails', '~> 7.0.0' +end diff --git a/Gemfile.lock b/Gemfile.lock index a9ddd818..749cdb2f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -82,9 +82,11 @@ GEM connection_pool (2.5.0) crass (1.0.6) date (3.4.1) + diff-lcs (1.6.0) drb (2.2.1) erubi (1.13.1) ffi (1.17.1-arm64-darwin) + ffi (1.17.1-x86_64-linux-gnu) globalid (1.2.1) activesupport (>= 6.1) i18n (1.14.7) @@ -122,6 +124,8 @@ GEM nio4r (2.7.4) nokogiri (1.18.2-arm64-darwin) racc (~> 1.4) + nokogiri (1.18.2-x86_64-linux-gnu) + racc (~> 1.4) pg (1.5.9) pp (0.6.2) prettyprint @@ -179,6 +183,23 @@ GEM psych (>= 4.0.0) reline (0.6.0) io-console (~> 0.5) + rspec-core (3.13.3) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.3) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-rails (7.0.2) + actionpack (>= 7.0) + activesupport (>= 7.0) + railties (>= 7.0) + rspec-core (~> 3.13) + rspec-expectations (~> 3.13) + rspec-mocks (~> 3.13) + rspec-support (~> 3.13) + rspec-support (3.13.2) securerandom (0.4.1) stringio (3.1.2) thor (1.3.2) @@ -195,6 +216,7 @@ GEM PLATFORMS arm64-darwin-24 + x86_64-linux DEPENDENCIES bootsnap @@ -203,6 +225,7 @@ DEPENDENCIES puma rack-mini-profiler rails (~> 8.0.1) + rspec-rails (~> 7.0.0) tzinfo-data RUBY VERSION diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb new file mode 100644 index 00000000..32f3d896 --- /dev/null +++ b/app/services/trips_importer.rb @@ -0,0 +1,40 @@ +class TripsImporter + attr_reader :file_name + + def initialize(file_name) + @file_name = file_name + end + + def call + json = JSON.parse(File.read(file_name)) + + ActiveRecord::Base.transaction do + City.delete_all + Bus.delete_all + Service.delete_all + Trip.delete_all + ActiveRecord::Base.connection.execute('delete from buses_services;') + + json.each do |trip| + from = City.find_or_create_by(name: trip['from']) + to = City.find_or_create_by(name: trip['to']) + services = [] + trip['bus']['services'].each do |service| + s = Service.find_or_create_by(name: service) + services << s + end + bus = Bus.find_or_create_by(number: trip['bus']['number']) + bus.update(model: trip['bus']['model'], services: services) + + Trip.create!( + from: from, + to: to, + bus: bus, + start_time: trip['start_time'], + duration_minutes: trip['duration_minutes'], + price_cents: trip['price_cents'], + ) + end + end + end +end diff --git a/case_study.md b/case_study.md new file mode 100644 index 00000000..c4c31766 --- /dev/null +++ b/case_study.md @@ -0,0 +1,18 @@ +# Case-study оптимизации + +## Актуальная проблема +1. Файл с данными `large.json` (100K трипов) загружается больше 12 минут. В то время как бюджет на загрузку <= 1 минуты. +2. Страница с данными загружается за ~11 секунд. Видно, что выполняется очень много запросов к БД. + +## Формирование метрики +Для файла `small.json` данные загружаются за 12.00s и 154MB +Для файла `medium.json` данные загружаются за 81.85s и 197MB +Для файла `large.json` данные загружаются за 735.55s и 391MB + +Я буду проверять время загрузки сначала для 1K, потом 10K, 100K трипов. + +## Гарантия корректности работы оптимизированной программы +Я вынесла логику из таски в сервисе TripsImporter и написала rspec тест. + + + diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index 540fe871..c975116b 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -1,34 +1,11 @@ # Наивная загрузка данных из json-файла в БД # rake reload_json[fixtures/small.json] task :reload_json, [:file_name] => :environment do |_task, args| - json = JSON.parse(File.read(args.file_name)) - - ActiveRecord::Base.transaction do - City.delete_all - Bus.delete_all - Service.delete_all - Trip.delete_all - ActiveRecord::Base.connection.execute('delete from buses_services;') - - json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) - services = [] - trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s - end - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) - - Trip.create!( - from: from, - to: to, - bus: bus, - start_time: trip['start_time'], - duration_minutes: trip['duration_minutes'], - price_cents: trip['price_cents'], - ) - end + time = Benchmark.realtime do + TripsImporter.new(args.file_name).call end + puts "Finish in #{time.round(2)}" + + memory = `ps -o rss= -p #{Process.pid}`.to_i / 1024 + puts "MEMORY USAGE: %d MB" % (memory) end diff --git a/spec/fixtures/files/example.json b/spec/fixtures/files/example.json new file mode 100644 index 00000000..45756f9e --- /dev/null +++ b/spec/fixtures/files/example.json @@ -0,0 +1,32 @@ +[ + { + "bus": { + "model": "Икарус", + "number": "123", + "services": [ + "Туалет", + "WiFi" + ] + }, + "duration_minutes": 168, + "from": "Москва", + "price_cents": 474, + "start_time": "11:00", + "to": "Самара" + }, + { + "bus": { + "model": "Икарус", + "number": "123", + "services": [ + "Туалет", + "WiFi" + ] + }, + "duration_minutes": 37, + "from": "Самара", + "price_cents": 173, + "start_time": "17:30", + "to": "Москва" + } +] diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb new file mode 100644 index 00000000..0cd9a904 --- /dev/null +++ b/spec/rails_helper.rb @@ -0,0 +1,68 @@ +# This file is copied to spec/ when you run 'rails generate rspec:install' +require 'spec_helper' +ENV['RAILS_ENV'] ||= 'test' +require_relative '../config/environment' +# Prevent database truncation if the environment is production +abort("The Rails environment is running in production mode!") if Rails.env.production? +# Uncomment the line below in case you have `--require rails_helper` in the `.rspec` file +# that will avoid rails generators crashing because migrations haven't been run yet +# return unless Rails.env.test? +require 'rspec/rails' +# Add additional requires below this line. Rails is not loaded until this point! + +# Requires supporting ruby files with custom matchers and macros, etc, in +# spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are +# run as spec files by default. This means that files in spec/support that end +# in _spec.rb will both be required and run as specs, causing the specs to be +# run twice. It is recommended that you do not name files matching this glob to +# end with _spec.rb. You can configure this pattern with the --pattern +# option on the command line or in ~/.rspec, .rspec or `.rspec-local`. +# +# The following line is provided for convenience purposes. It has the downside +# of increasing the boot-up time by auto-requiring all files in the support +# directory. Alternatively, in the individual `*_spec.rb` files, manually +# require only the support files necessary. +# +# Rails.root.glob('spec/support/**/*.rb').sort_by(&:to_s).each { |f| require f } + +# Checks for pending migrations and applies them before tests are run. +# If you are not using ActiveRecord, you can remove these lines. +begin + ActiveRecord::Migration.maintain_test_schema! +rescue ActiveRecord::PendingMigrationError => e + abort e.to_s.strip +end +RSpec.configure do |config| + # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures + config.fixture_paths = [ + Rails.root.join('spec/fixtures') + ] + + # If you're not using ActiveRecord, or you'd prefer not to run each of your + # examples within a transaction, remove the following line or assign false + # instead of true. + config.use_transactional_fixtures = true + + # You can uncomment this line to turn off ActiveRecord support entirely. + # config.use_active_record = false + + # RSpec Rails can automatically mix in different behaviours to your tests + # based on their file location, for example enabling you to call `get` and + # `post` in specs under `spec/controllers`. + # + # You can disable this behaviour by removing the line below, and instead + # explicitly tag your specs with their type, e.g.: + # + # RSpec.describe UsersController, type: :controller do + # # ... + # end + # + # The different available types are documented in the features, such as in + # https://rspec.info/features/7-0/rspec-rails + config.infer_spec_type_from_file_location! + + # Filter lines from Rails gems in backtraces. + config.filter_rails_from_backtrace! + # arbitrary gems may also be filtered via: + # config.filter_gems_from_backtrace("gem name") +end diff --git a/spec/services/trips_importer_spec.rb b/spec/services/trips_importer_spec.rb new file mode 100644 index 00000000..22f9ec47 --- /dev/null +++ b/spec/services/trips_importer_spec.rb @@ -0,0 +1,50 @@ +require 'rails_helper' + +RSpec.describe TripsImporter do + describe '#call' do + let(:file_name) { Rails.root.join("spec/fixtures/files/example.json") } + subject { TripsImporter.new(file_name).call } + + it 'creates cities' do + expect {subject}.to change{City.count}.by(2) + + expect(City.find_by(name: 'Самара')).to be_present + expect(City.find_by(name: 'Москва')).to be_present + end + + it 'creates services' do + expect {subject}.to change{Service.count}.by(2) + + expect(Service.find_by(name: 'WiFi')).to be_present + expect(Service.find_by(name: 'Туалет')).to be_present + end + + it 'creates buses and associates services' do + expect {subject}.to change{Bus.count}.by(2) + + bus = Bus.find_by(number: '123', model: 'Икарус') + expect(bus).to be_present + expect(bus.services.map(&:name)).to match_array(['WiFi', 'Туалет']) + end + + it 'creates trips with correct attributes' do + expect {subject}.to change{Trip.count}.by(2) + + trip1 = Trip.first + expect(trip1.from.name).to eq('Москва') + expect(trip1.to.name).to eq('Самара') + expect(trip1.bus.number).to eq('123') + expect(trip1.start_time).to eq('11:00') + expect(trip1.duration_minutes).to eq(168) + expect(trip1.price_cents).to eq(474) + + trip2 = Trip.second + expect(trip2.from.name).to eq('Самара') + expect(trip2.to.name).to eq('Москва') + expect(trip2.bus.number).to eq('123') + expect(trip2.start_time).to eq('17:30') + expect(trip2.duration_minutes).to eq(37) + expect(trip2.price_cents).to eq(173) + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..327b58ea --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,94 @@ +# This file was generated by the `rails generate rspec:install` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/ + config.disable_monkey_patching! + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = "doc" + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end From 8d309312f5b530f1b036158a9309130e95b38631 Mon Sep 17 00:00:00 2001 From: iris Date: Tue, 25 Feb 2025 13:02:20 +0300 Subject: [PATCH 2/5] rewrite TripsImporter --- Gemfile | 7 +++++-- Gemfile.lock | 3 +++ app/models/bus.rb | 3 ++- app/models/buses_service.rb | 4 ++++ app/models/service.rb | 3 ++- app/services/trips_importer.rb | 37 +++++++++++++++++++++++----------- case_study.md | 9 +++++++-- 7 files changed, 48 insertions(+), 18 deletions(-) create mode 100644 app/models/buses_service.rb diff --git a/Gemfile b/Gemfile index e467ba32..f4356fbc 100644 --- a/Gemfile +++ b/Gemfile @@ -4,12 +4,15 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" } ruby file: '.ruby-version' gem 'rails', '~> 8.0.1' -gem 'pg' -gem 'puma' + gem 'listen' +gem 'activerecord-import' gem 'bootsnap' +gem 'pg' +gem 'puma' gem 'rack-mini-profiler' + # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] diff --git a/Gemfile.lock b/Gemfile.lock index 749cdb2f..a0651ab5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -53,6 +53,8 @@ GEM activemodel (= 8.0.1) activesupport (= 8.0.1) timeout (>= 0.4.0) + activerecord-import (2.1.0) + activerecord (>= 4.2) activestorage (8.0.1) actionpack (= 8.0.1) activejob (= 8.0.1) @@ -219,6 +221,7 @@ PLATFORMS x86_64-linux DEPENDENCIES + activerecord-import bootsnap listen pg diff --git a/app/models/bus.rb b/app/models/bus.rb index 1dcc54cb..d97de31f 100644 --- a/app/models/bus.rb +++ b/app/models/bus.rb @@ -13,7 +13,8 @@ class Bus < ApplicationRecord ].freeze has_many :trips - has_and_belongs_to_many :services, join_table: :buses_services + has_many :buses_services + has_many :services, through: :buses_services validates :number, presence: true, uniqueness: true validates :model, inclusion: { in: MODELS } diff --git a/app/models/buses_service.rb b/app/models/buses_service.rb new file mode 100644 index 00000000..6219d44e --- /dev/null +++ b/app/models/buses_service.rb @@ -0,0 +1,4 @@ +class BusesService < ApplicationRecord + belongs_to :bus + belongs_to :service +end diff --git a/app/models/service.rb b/app/models/service.rb index 9cbb2a32..1781543c 100644 --- a/app/models/service.rb +++ b/app/models/service.rb @@ -12,7 +12,8 @@ class Service < ApplicationRecord 'Можно не печатать билет', ].freeze - has_and_belongs_to_many :buses, join_table: :buses_services + has_many :buses_services + has_many :buses, through: :buses_services validates :name, presence: true validates :name, inclusion: { in: SERVICES } diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index 32f3d896..cedd687f 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class TripsImporter attr_reader :file_name @@ -15,26 +17,37 @@ def call Trip.delete_all ActiveRecord::Base.connection.execute('delete from buses_services;') + cities = {} + buses = {} + services = {} + buses_services = {} + trips = [] + json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) - services = [] + cities[trip['from']] ||= City.new(name: trip['from']) + cities[trip['to']] ||= City.new(name: trip['to']) + bus = buses[trip['bus']['number']] ||= Bus.new(number: trip['bus']['number'], model: trip['bus']['model']) + trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s + bus_service = services[service] ||= Service.new(name: service) + buses_services[[bus, bus_service]] ||= BusesService.new(bus: bus, service: bus_service) end - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) - Trip.create!( - from: from, - to: to, - bus: bus, + trips << Trip.new( + from: cities[trip['from']], + to: cities[trip['to']], + bus: buses[trip['bus']['number']], start_time: trip['start_time'], duration_minutes: trip['duration_minutes'], - price_cents: trip['price_cents'], + price_cents: trip['price_cents'] ) end + + City.import!(cities.values) + Bus.import!(buses.values) + Service.import!(services.values) + BusesService.import!(buses_services.values) + Trip.import!(trips) end end end diff --git a/case_study.md b/case_study.md index c4c31766..c4077d6a 100644 --- a/case_study.md +++ b/case_study.md @@ -2,7 +2,7 @@ ## Актуальная проблема 1. Файл с данными `large.json` (100K трипов) загружается больше 12 минут. В то время как бюджет на загрузку <= 1 минуты. -2. Страница с данными загружается за ~11 секунд. Видно, что выполняется очень много запросов к БД. +2. Страница с данными загружается за ~17 секунд. Видно, что выполняется очень много запросов к БД. ## Формирование метрики Для файла `small.json` данные загружаются за 12.00s и 154MB @@ -15,4 +15,9 @@ Я вынесла логику из таски в сервисе TripsImporter и написала rspec тест. - +## Проблема 1 +Долгий иморт +Переписала TripsImporter с использованием Activerecord-Import, время загрузки изменилось +для файла `small.json` данные загружаются за 1.69s +для файла `medium.json` данные загружаются за 4.1s +для файла `large.json` данные загружаются за 13.8s From 694dfcb03118ce749ae48b5d2b164428807d7039 Mon Sep 17 00:00:00 2001 From: iris Date: Tue, 25 Feb 2025 18:20:58 +0300 Subject: [PATCH 3/5] optimize N+1 --- Gemfile | 4 ++++ Gemfile.lock | 5 +++++ app/controllers/trips_controller.rb | 2 +- case_study.md | 7 +++++++ config/environments/development.rb | 9 +++++++++ config/environments/test.rb | 6 ++++++ spec/controllers/trips_controller_spec.rb | 24 +++++++++++++++++++++++ 7 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 spec/controllers/trips_controller_spec.rb diff --git a/Gemfile b/Gemfile index f4356fbc..61de8f74 100644 --- a/Gemfile +++ b/Gemfile @@ -19,3 +19,7 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] group :development, :test do gem 'rspec-rails', '~> 7.0.0' end + +group :development do + gem 'bullet' +end diff --git a/Gemfile.lock b/Gemfile.lock index a0651ab5..f95cb2df 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -80,6 +80,9 @@ GEM bootsnap (1.18.4) msgpack (~> 1.2) builder (3.3.0) + bullet (8.0.1) + activesupport (>= 3.0.0) + uniform_notifier (~> 1.11) concurrent-ruby (1.3.5) connection_pool (2.5.0) crass (1.0.6) @@ -208,6 +211,7 @@ GEM timeout (0.4.3) tzinfo (2.0.6) concurrent-ruby (~> 1.0) + uniform_notifier (1.16.0) uri (1.0.2) useragent (0.16.11) websocket-driver (0.7.7) @@ -223,6 +227,7 @@ PLATFORMS DEPENDENCIES activerecord-import bootsnap + bullet listen pg puma diff --git a/app/controllers/trips_controller.rb b/app/controllers/trips_controller.rb index acb38be2..ab6dcbb4 100644 --- a/app/controllers/trips_controller.rb +++ b/app/controllers/trips_controller.rb @@ -2,6 +2,6 @@ class TripsController < ApplicationController def index @from = City.find_by_name!(params[:from]) @to = City.find_by_name!(params[:to]) - @trips = Trip.where(from: @from, to: @to).order(:start_time) + @trips = Trip.where(from: @from, to: @to).order(:start_time).includes([bus: :services]) end end diff --git a/case_study.md b/case_study.md index c4077d6a..6435c548 100644 --- a/case_study.md +++ b/case_study.md @@ -21,3 +21,10 @@ для файла `small.json` данные загружаются за 1.69s для файла `medium.json` данные загружаются за 4.1s для файла `large.json` данные загружаются за 13.8s + +## Проблема 2 +Долгая загрузка страницы +rack-mini-profiler показывает, что страница грузится минимум 17s +`Rendering: trips/index.html.erb 9023.1 +30.4 1012 sql 1318.8` +Добавляю gem bullet. Он показывает, что надо добавить в запрос `.includes[:bus]` и `.includes[:services]` После добавления includes время загрузки уменьшилось до 9s, и больше нет 1000+ запросов sql из одного места. + diff --git a/config/environments/development.rb b/config/environments/development.rb index bc3f8142..59d84671 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -1,4 +1,13 @@ Rails.application.configure do + config.after_initialize do + Bullet.enable = true + Bullet.alert = true + Bullet.bullet_logger = true + Bullet.console = true + Bullet.rails_logger = true + Bullet.add_footer = true + end + # Settings specified here will take precedence over those in config/application.rb. # In the development environment your application's code is reloaded on diff --git a/config/environments/test.rb b/config/environments/test.rb index 0a38fd3c..bc5971ab 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -1,4 +1,10 @@ Rails.application.configure do + config.after_initialize do + Bullet.enable = true + Bullet.bullet_logger = true + Bullet.raise = true # raise an error if n+1 query occurs + end + # Settings specified here will take precedence over those in config/application.rb. # The test environment is used exclusively to run your application's diff --git a/spec/controllers/trips_controller_spec.rb b/spec/controllers/trips_controller_spec.rb new file mode 100644 index 00000000..470cd549 --- /dev/null +++ b/spec/controllers/trips_controller_spec.rb @@ -0,0 +1,24 @@ +require 'rails_helper' + +RSpec.describe TripsController, type: :controller do + describe 'GET #index' do + let!(:city_from) { create(:city, name: 'Самара') } + let!(:city_to) { create(:city, name: 'Москва') } + let!(:trip) { create(:trip, from: city_from, to: city_to, start_time: 1.day.from_now) } + let!(:other_trip) { create(:trip, from: create(:city), to: create(:city)) } + + describe 'response' do + before do + get :index, params: { from: 'Самара', to: 'Москва' } + end + + it 'returns http success', :aggregate_failures do + expect(response).to have_http_status(:success) + expect(assigns(:from)).to eq(city_from) + expect(assigns(:to)).to eq(city_to) + expect(assigns(:trips)).to eq([trip]) + expect(response).to render_template(:index) + end + end + end +end From 70297a2442bb097d180275c8231e4be68b04c28e Mon Sep 17 00:00:00 2001 From: iris Date: Mon, 10 Mar 2025 11:19:57 +0300 Subject: [PATCH 4/5] add pghero --- Gemfile | 5 +++ Gemfile.lock | 24 ++++++++++++ app/views/trips/_delimiter.html.erb | 1 - app/views/trips/_service.html.erb | 1 - app/views/trips/_services.html.erb | 6 --- app/views/trips/_trip.html.erb | 5 --- app/views/trips/index.html.erb | 18 +++++++-- case_study.md | 20 +++++++++- config/application.rb | 3 ++ config/routes.rb | 2 + ...0250310075553_create_pghero_query_stats.rb | 15 ++++++++ ...0250310080020_create_pghero_space_stats.rb | 13 +++++++ .../20250310080218_remove_unused_indexes.rb | 5 +++ ...0310083455_add_composite_index_to_trips.rb | 7 ++++ ...50310091912_add_index_to_buses_servises.rb | 7 ++++ db/schema.rb | 38 ++++++++++++++----- 16 files changed, 142 insertions(+), 28 deletions(-) delete mode 100644 app/views/trips/_delimiter.html.erb delete mode 100644 app/views/trips/_service.html.erb delete mode 100644 app/views/trips/_services.html.erb delete mode 100644 app/views/trips/_trip.html.erb create mode 100644 db/migrate/20250310075553_create_pghero_query_stats.rb create mode 100644 db/migrate/20250310080020_create_pghero_space_stats.rb create mode 100644 db/migrate/20250310080218_remove_unused_indexes.rb create mode 100644 db/migrate/20250310083455_add_composite_index_to_trips.rb create mode 100644 db/migrate/20250310091912_add_index_to_buses_servises.rb diff --git a/Gemfile b/Gemfile index 61de8f74..60147ee8 100644 --- a/Gemfile +++ b/Gemfile @@ -9,8 +9,13 @@ gem 'listen' gem 'activerecord-import' gem 'bootsnap' gem 'pg' +gem 'pghero' +gem "pg_query", ">= 2" gem 'puma' gem 'rack-mini-profiler' +gem 'sprockets', '~> 4.0' +gem 'sprockets-rails' +gem 'strong_migrations' # Windows does not include zoneinfo files, so bundle the tzinfo-data gem diff --git a/Gemfile.lock b/Gemfile.lock index f95cb2df..3289194c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -94,6 +94,12 @@ GEM ffi (1.17.1-x86_64-linux-gnu) globalid (1.2.1) activesupport (>= 6.1) + google-protobuf (4.29.3-arm64-darwin) + bigdecimal + rake (>= 13) + google-protobuf (4.29.3-x86_64-linux) + bigdecimal + rake (>= 13) i18n (1.14.7) concurrent-ruby (~> 1.0) io-console (0.8.0) @@ -132,6 +138,10 @@ GEM nokogiri (1.18.2-x86_64-linux-gnu) racc (~> 1.4) pg (1.5.9) + pg_query (6.0.0) + google-protobuf (>= 3.25.3) + pghero (3.6.1) + activerecord (>= 6.1) pp (0.6.2) prettyprint prettyprint (0.2.0) @@ -206,7 +216,16 @@ GEM rspec-support (~> 3.13) rspec-support (3.13.2) securerandom (0.4.1) + sprockets (4.2.1) + concurrent-ruby (~> 1.0) + rack (>= 2.2.4, < 4) + sprockets-rails (3.5.2) + actionpack (>= 6.1) + activesupport (>= 6.1) + sprockets (>= 3.0.0) stringio (3.1.2) + strong_migrations (2.2.0) + activerecord (>= 7) thor (1.3.2) timeout (0.4.3) tzinfo (2.0.6) @@ -230,10 +249,15 @@ DEPENDENCIES bullet listen pg + pg_query (>= 2) + pghero puma rack-mini-profiler rails (~> 8.0.1) rspec-rails (~> 7.0.0) + sprockets (~> 4.0) + sprockets-rails + strong_migrations tzinfo-data RUBY VERSION diff --git a/app/views/trips/_delimiter.html.erb b/app/views/trips/_delimiter.html.erb deleted file mode 100644 index 3f845ad0..00000000 --- a/app/views/trips/_delimiter.html.erb +++ /dev/null @@ -1 +0,0 @@ -==================================================== diff --git a/app/views/trips/_service.html.erb b/app/views/trips/_service.html.erb deleted file mode 100644 index 178ea8c0..00000000 --- a/app/views/trips/_service.html.erb +++ /dev/null @@ -1 +0,0 @@ -
  • <%= "#{service.name}" %>
  • diff --git a/app/views/trips/_services.html.erb b/app/views/trips/_services.html.erb deleted file mode 100644 index 2de639fc..00000000 --- a/app/views/trips/_services.html.erb +++ /dev/null @@ -1,6 +0,0 @@ -
  • Сервисы в автобусе:
  • -
      - <% services.each do |service| %> - <%= render "service", service: service %> - <% end %> -
    diff --git a/app/views/trips/_trip.html.erb b/app/views/trips/_trip.html.erb deleted file mode 100644 index fa1de9aa..00000000 --- a/app/views/trips/_trip.html.erb +++ /dev/null @@ -1,5 +0,0 @@ -
  • <%= "Отправление: #{trip.start_time}" %>
  • -
  • <%= "Прибытие: #{(Time.parse(trip.start_time) + trip.duration_minutes.minutes).strftime('%H:%M')}" %>
  • -
  • <%= "В пути: #{trip.duration_minutes / 60}ч. #{trip.duration_minutes % 60}мин." %>
  • -
  • <%= "Цена: #{trip.price_cents / 100}р. #{trip.price_cents % 100}коп." %>
  • -
  • <%= "Автобус: #{trip.bus.model} №#{trip.bus.number}" %>
  • diff --git a/app/views/trips/index.html.erb b/app/views/trips/index.html.erb index a60bce41..90dbbd31 100644 --- a/app/views/trips/index.html.erb +++ b/app/views/trips/index.html.erb @@ -7,10 +7,20 @@ <% @trips.each do |trip| %>
      - <%= render "trip", trip: trip %> - <% if trip.bus.services.present? %> - <%= render "services", services: trip.bus.services %> +
    • <%= "Отправление: #{trip.start_time}" %>
    • +
    • <%= "Прибытие: #{(Time.parse(trip.start_time) + trip.duration_minutes.minutes).strftime('%H:%M')}" %>
    • +
    • <%= "В пути: #{trip.duration_minutes / 60}ч. #{trip.duration_minutes % 60}мин." %>
    • +
    • <%= "Цена: #{trip.price_cents / 100}р. #{trip.price_cents % 100}коп." %>
    • +
    • <%= "Автобус: #{trip.bus.model} №#{trip.bus.number}" %>
    • + <% services = trip.bus.services %> + <% if services.present? %> +
    • Сервисы в автобусе:
    • +
        + <% services.each do |service| %> +
      • <%= "#{service.name}" %>
      • + <% end %> +
      <% end %>
    - <%= render "delimiter" %> + ==================================================== <% end %> diff --git a/case_study.md b/case_study.md index 6435c548..691514bf 100644 --- a/case_study.md +++ b/case_study.md @@ -26,5 +26,21 @@ Долгая загрузка страницы rack-mini-profiler показывает, что страница грузится минимум 17s `Rendering: trips/index.html.erb 9023.1 +30.4 1012 sql 1318.8` -Добавляю gem bullet. Он показывает, что надо добавить в запрос `.includes[:bus]` и `.includes[:services]` После добавления includes время загрузки уменьшилось до 9s, и больше нет 1000+ запросов sql из одного места. - +- Добавляю gem bullet. Он показывает, что надо добавить в запрос `.includes[:bus]` и `.includes[:services]` После добавления includes время загрузки уменьшилось до 9s, и больше нет 1000+ запросов sql из одного места. + + - Убрала паршалы из вью, загрузка сократилась до 6-7s + +## Проблема 3 +Долгая загрузка страницы. Изучаю страницу при помощи pghero. +Вкладка Overview показывает No long running queries. Не предлагает никаких индексов. +На вкладке Queries: +22% времени тратится на запрос `SELECT "trips".* FROM "trips" WHERE "trips"."from_id" = $1 AND "trips"."to_id" = $2 ORDER BY "trips"."start_time" ASC` +Предлагается +`CREATE INDEX CONCURRENTLY ON trips (from_id, to_id)` +И 18% на `SELECT COUNT(*) FROM "trips" WHERE "trips"."from_id" = $1 AND "trips"."to_id" = $2` +Предлагается то же самое. +Добавляю индексы. +После добавления индексов процент снизился до 17% и 14% соответственно, скорость загрузки сократилась до 5837ms + +Так же pghero предлагает добавить индекс `CREATE INDEX CONCURRENTLY ON buses_services (bus_id)` +Хотя запрос `SELECT "buses_services".* FROM "buses_services" WHERE ....` pfybvftn 3% diff --git a/config/application.rb b/config/application.rb index f714e62d..b630c72f 100644 --- a/config/application.rb +++ b/config/application.rb @@ -10,6 +10,9 @@ module Task4 class Application < Rails::Application # Initialize configuration defaults for originally generated Rails version. config.load_defaults 8.0 + config.assets.enabled = true + config.assets.compile = true + config.assets.precompile += %w( pghero/application.css pghero/application.js ) # Settings in config/environments/* take precedence over those specified here. # Application configuration can go into files in config/initializers diff --git a/config/routes.rb b/config/routes.rb index 0bbefa7a..f2784ed4 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,4 +1,6 @@ Rails.application.routes.draw do # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html get "автобусы/:from/:to" => "trips#index" + + mount PgHero::Engine, at: "pghero" end diff --git a/db/migrate/20250310075553_create_pghero_query_stats.rb b/db/migrate/20250310075553_create_pghero_query_stats.rb new file mode 100644 index 00000000..74aaaa9a --- /dev/null +++ b/db/migrate/20250310075553_create_pghero_query_stats.rb @@ -0,0 +1,15 @@ +class CreatePgheroQueryStats < ActiveRecord::Migration[8.0] + def change + create_table :pghero_query_stats do |t| + t.text :database + t.text :user + t.text :query + t.integer :query_hash, limit: 8 + t.float :total_time + t.integer :calls, limit: 8 + t.timestamp :captured_at + end + + add_index :pghero_query_stats, [:database, :captured_at] + end +end diff --git a/db/migrate/20250310080020_create_pghero_space_stats.rb b/db/migrate/20250310080020_create_pghero_space_stats.rb new file mode 100644 index 00000000..5592db37 --- /dev/null +++ b/db/migrate/20250310080020_create_pghero_space_stats.rb @@ -0,0 +1,13 @@ +class CreatePgheroSpaceStats < ActiveRecord::Migration[8.0] + def change + create_table :pghero_space_stats do |t| + t.text :database + t.text :schema + t.text :relation + t.integer :size, limit: 8 + t.timestamp :captured_at + end + + add_index :pghero_space_stats, [:database, :captured_at] + end +end diff --git a/db/migrate/20250310080218_remove_unused_indexes.rb b/db/migrate/20250310080218_remove_unused_indexes.rb new file mode 100644 index 00000000..62f7b10f --- /dev/null +++ b/db/migrate/20250310080218_remove_unused_indexes.rb @@ -0,0 +1,5 @@ +class RemoveUnusedIndexes < ActiveRecord::Migration[8.0] + def change + remove_index :pghero_space_stats, name: "index_pghero_space_stats_on_database_and_captured_at" + end +end diff --git a/db/migrate/20250310083455_add_composite_index_to_trips.rb b/db/migrate/20250310083455_add_composite_index_to_trips.rb new file mode 100644 index 00000000..d317c97c --- /dev/null +++ b/db/migrate/20250310083455_add_composite_index_to_trips.rb @@ -0,0 +1,7 @@ +class AddCompositeIndexToTrips < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def change + add_index :trips, [:from_id, :to_id], algorithm: :concurrently + end +end diff --git a/db/migrate/20250310091912_add_index_to_buses_servises.rb b/db/migrate/20250310091912_add_index_to_buses_servises.rb new file mode 100644 index 00000000..d6270960 --- /dev/null +++ b/db/migrate/20250310091912_add_index_to_buses_servises.rb @@ -0,0 +1,7 @@ +class AddIndexToBusesServises < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def change + add_index :buses_services, :bus_id, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index f6921e45..574e76ac 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2,18 +2,18 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_03_30_193044) do - +ActiveRecord::Schema[8.0].define(version: 2025_03_10_091912) do # These are extensions that must be enabled in order to support this database - enable_extension "plpgsql" + enable_extension "pg_catalog.plpgsql" + enable_extension "pg_stat_statements" create_table "buses", force: :cascade do |t| t.string "number" @@ -23,12 +23,32 @@ create_table "buses_services", force: :cascade do |t| t.integer "bus_id" t.integer "service_id" + t.index ["bus_id"], name: "index_buses_services_on_bus_id" end create_table "cities", force: :cascade do |t| t.string "name" end + create_table "pghero_query_stats", force: :cascade do |t| + t.text "database" + t.text "user" + t.text "query" + t.bigint "query_hash" + t.float "total_time" + t.bigint "calls" + t.datetime "captured_at", precision: nil + t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at" + end + + create_table "pghero_space_stats", force: :cascade do |t| + t.text "database" + t.text "schema" + t.text "relation" + t.bigint "size" + t.datetime "captured_at", precision: nil + end + create_table "services", force: :cascade do |t| t.string "name" end @@ -40,6 +60,6 @@ t.integer "duration_minutes" t.integer "price_cents" t.integer "bus_id" + t.index ["from_id", "to_id"], name: "index_trips_on_from_id_and_to_id" end - end From 9d280954586d721d4511be0696b476274e0426aa Mon Sep 17 00:00:00 2001 From: iris Date: Mon, 10 Mar 2025 13:03:08 +0300 Subject: [PATCH 5/5] add eager_load --- app/controllers/trips_controller.rb | 2 +- case_study.md | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/app/controllers/trips_controller.rb b/app/controllers/trips_controller.rb index ab6dcbb4..fa994da9 100644 --- a/app/controllers/trips_controller.rb +++ b/app/controllers/trips_controller.rb @@ -2,6 +2,6 @@ class TripsController < ApplicationController def index @from = City.find_by_name!(params[:from]) @to = City.find_by_name!(params[:to]) - @trips = Trip.where(from: @from, to: @to).order(:start_time).includes([bus: :services]) + @trips = Trip.where(from: @from, to: @to).order(:start_time).eager_load([bus: :services]) end end diff --git a/case_study.md b/case_study.md index 691514bf..c8c6a89d 100644 --- a/case_study.md +++ b/case_study.md @@ -26,7 +26,7 @@ Долгая загрузка страницы rack-mini-profiler показывает, что страница грузится минимум 17s `Rendering: trips/index.html.erb 9023.1 +30.4 1012 sql 1318.8` -- Добавляю gem bullet. Он показывает, что надо добавить в запрос `.includes[:bus]` и `.includes[:services]` После добавления includes время загрузки уменьшилось до 9s, и больше нет 1000+ запросов sql из одного места. +- Добавляю gem bullet. Он показывает, что надо добавить в запрос `.includes[:bus]` и `.includes[:services]` После добавления includes время загрузки уменьшилось до 9s, и больше нет 1000+ запросов sql из одного места. Осталось 7 запросов. - Убрала паршалы из вью, загрузка сократилась до 6-7s @@ -43,4 +43,10 @@ rack-mini-profiler показывает, что страница грузитс После добавления индексов процент снизился до 17% и 14% соответственно, скорость загрузки сократилась до 5837ms Так же pghero предлагает добавить индекс `CREATE INDEX CONCURRENTLY ON buses_services (bus_id)` -Хотя запрос `SELECT "buses_services".* FROM "buses_services" WHERE ....` pfybvftn 3% +Хотя запрос `SELECT "buses_services".* FROM "buses_services" WHERE ....` занимает 3% + +## Проблема 4 +rack-mini-profiler показывает что запросы на buses, buses_services и services выполняются отдельно. +Предполагаю что надо заменить в контроллере includes на eager_load, чтобы избавиться от этого. Теперь вместо 7 запросов - 4 и страница загружается за 1439ms + +Менее 1,5 секунд уже приемлемое время, а что еще оптимизировать я с имеющимися инструментами не обнаружила.